diff --git a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt index e41b2d59ca7f..f591ab782dbc 100644 --- a/Documentation/devicetree/bindings/net/apm-xgene-enet.txt +++ b/Documentation/devicetree/bindings/net/apm-xgene-enet.txt @@ -47,6 +47,9 @@ Optional properties: Valid values are between 0 to 7, that maps to 273, 589, 899, 1222, 1480, 1806, 2147, 2464 ps Default value is 2, which corresponds to 899 ps +- rxlos-gpios: Input gpio from SFP+ module to indicate availability of + incoming signal. + Example: menetclk: menetclk { diff --git a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt index 30d487597ecb..fb40891ee606 100644 --- a/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt +++ b/Documentation/devicetree/bindings/net/brcm,bcm7445-switch-v4.0.txt @@ -6,9 +6,13 @@ Required properties: - reg: addresses and length of the register sets for the device, must be 6 pairs of register addresses and lengths - interrupts: interrupts for the devices, must be two interrupts +- #address-cells: must be 1, see dsa/dsa.txt +- #size-cells: must be 0, see dsa/dsa.txt + +Deprecated binding required properties: + - dsa,mii-bus: phandle to the MDIO bus controller, see dsa/dsa.txt - dsa,ethernet: phandle to the CPU network interface controller, see dsa/dsa.txt -- #size-cells: must be 0 - #address-cells: must be 2, see dsa/dsa.txt Subnodes: @@ -39,6 +43,45 @@ Optional properties: Example: +switch_top@f0b00000 { + compatible = "simple-bus"; + #size-cells = <1>; + #address-cells = <1>; + ranges = <0 0xf0b00000 0x40804>; + + ethernet_switch@0 { + compatible = "brcm,bcm7445-switch-v4.0"; + #size-cells = <0>; + #address-cells = <1>; + reg = <0x0 0x40000 + 0x40000 0x110 + 0x40340 0x30 + 0x40380 0x30 + 0x40400 0x34 + 0x40600 0x208>; + reg-names = "core", "reg", intrl2_0", "intrl2_1", + "fcb, "acb"; + interrupts = <0 0x18 0 + 0 0x19 0>; + brcm,num-gphy = <1>; + brcm,num-rgmii-ports = <2>; + brcm,fcb-pause-override; + brcm,acb-packets-inflight; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + label = "gphy"; + reg = <0>; + }; + }; + }; +}; + +Example using the old DSA DeviceTree binding: + switch_top@f0b00000 { compatible = "simple-bus"; #size-cells = <1>; diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt new file mode 100644 index 000000000000..9c67ee4890d7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -0,0 +1,89 @@ +* Qualcomm Atheros QCA8xxx switch family + +Required properties: + +- compatible: should be "qca,qca8337" +- #size-cells: must be 0 +- #address-cells: must be 1 + +Subnodes: + +The integrated switch subnode should be specified according to the binding +described in dsa/dsa.txt. As the QCA8K switches do not have a N:N mapping of +port and PHY id, each subnode describing a port needs to have a valid phandle +referencing the internal PHY connected to it. The CPU port of this switch is +always port 0. + +Example: + + + &mdio0 { + phy_port1: phy@0 { + reg = <0>; + }; + + phy_port2: phy@1 { + reg = <1>; + }; + + phy_port3: phy@2 { + reg = <2>; + }; + + phy_port4: phy@3 { + reg = <3>; + }; + + phy_port5: phy@4 { + reg = <4>; + }; + + switch0@0 { + compatible = "qca,qca8337"; + #address-cells = <1>; + #size-cells = <0>; + + reg = <0>; + + ports { + #address-cells = <1>; + #size-cells = <0>; + port@0 { + reg = <0>; + label = "cpu"; + ethernet = <&gmac1>; + phy-mode = "rgmii"; + }; + + port@1 { + reg = <1>; + label = "lan1"; + phy-handle = <&phy_port1>; + }; + + port@2 { + reg = <2>; + label = "lan2"; + phy-handle = <&phy_port2>; + }; + + port@3 { + reg = <3>; + label = "lan3"; + phy-handle = <&phy_port3>; + }; + + port@4 { + reg = <4>; + label = "lan4"; + phy-handle = <&phy_port4>; + }; + + port@5 { + reg = <5>; + label = "wan"; + phy-handle = <&phy_port5>; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/ethernet.txt b/Documentation/devicetree/bindings/net/ethernet.txt index 5d88f37480b6..e1d76812419c 100644 --- a/Documentation/devicetree/bindings/net/ethernet.txt +++ b/Documentation/devicetree/bindings/net/ethernet.txt @@ -11,8 +11,8 @@ The following properties are common to the Ethernet controllers: the maximum frame size (there's contradiction in ePAPR). - phy-mode: string, operation mode of the PHY interface; supported values are "mii", "gmii", "sgmii", "qsgmii", "tbi", "rev-mii", "rmii", "rgmii", "rgmii-id", - "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii"; this is now a de-facto - standard property; + "rgmii-rxid", "rgmii-txid", "rtbi", "smii", "xgmii", "trgmii"; this is now a + de-facto standard property; - phy-connection-type: the same as "phy-mode" property but described in ePAPR; - phy-handle: phandle, specifies a reference to a node representing a PHY device; this property is described in ePAPR and so preferred; diff --git a/Documentation/devicetree/bindings/net/macb.txt b/Documentation/devicetree/bindings/net/macb.txt index b5a42df4c928..1506e948610c 100644 --- a/Documentation/devicetree/bindings/net/macb.txt +++ b/Documentation/devicetree/bindings/net/macb.txt @@ -21,6 +21,7 @@ Required properties: - clock-names: Tuple listing input clock names. Required elements: 'pclk', 'hclk' Optional elements: 'tx_clk' + Optional elements: 'rx_clk' applies to cdns,zynqmp-gem - clocks: Phandles to input clocks. Optional properties for PHY child node: diff --git a/Documentation/devicetree/bindings/net/mediatek-net.txt b/Documentation/devicetree/bindings/net/mediatek-net.txt index 32eaaca04d9b..f09525772369 100644 --- a/Documentation/devicetree/bindings/net/mediatek-net.txt +++ b/Documentation/devicetree/bindings/net/mediatek-net.txt @@ -24,14 +24,17 @@ Required properties: Optional properties: - interrupt-parent: Should be the phandle for the interrupt controller that services interrupts for this device - +- mediatek,hwlro: the capability if the hardware supports LRO functions * Ethernet MAC node Required properties: - compatible: Should be "mediatek,eth-mac" - reg: The number of the MAC -- phy-handle: see ethernet.txt file in the same directory. +- phy-handle: see ethernet.txt file in the same directory and + the phy-mode "trgmii" required being provided when reg + is equal to 0 and the MAC uses fixed-link to connect + with internal switch such as MT7530. Example: @@ -51,6 +54,7 @@ eth: ethernet@1b100000 { reset-names = "eth"; mediatek,ethsys = <ðsys>; mediatek,pctl = <&syscfg_pctl_a>; + mediatek,hwlro; #address-cells = <1>; #size-cells = <0>; diff --git a/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt new file mode 100644 index 000000000000..99c7eb0a00c8 --- /dev/null +++ b/Documentation/devicetree/bindings/net/mscc-phy-vsc8531.txt @@ -0,0 +1,58 @@ +* Microsemi - vsc8531 Giga bit ethernet phy + +Required properties: +- compatible : Should contain phy id as "ethernet-phy-idAAAA.BBBB" + The PHY device uses the binding described in + Documentation/devicetree/bindings/net/phy.txt + +Optional properties: +- vsc8531,vddmac : The vddmac in mV. +- vsc8531,edge-slowdown : % the edge should be slowed down relative to + the fastest possible edge time. Native sign + need not enter. + Edge rate sets the drive strength of the MAC + interface output signals. Changing the drive + strength will affect the edge rate of the output + signal. The goal of this setting is to help + reduce electrical emission (EMI) by being able + to reprogram drive strength and in effect slow + down the edge rate if desired. Table 1 shows the + impact to the edge rate per VDDMAC supply for each + drive strength setting. + Ref: Table:1 - Edge rate change below. + +Note: see dt-bindings/net/mscc-phy-vsc8531.h for applicable values + +Table: 1 - Edge rate change +----------------------------------------------------------------| +| Edge Rate Change (VDDMAC) | +| | +| 3300 mV 2500 mV 1800 mV 1500 mV | +|---------------------------------------------------------------| +| Default Deafult Default Default | +| (Fastest) (recommended) (recommended) | +|---------------------------------------------------------------| +| -2% -3% -5% -6% | +|---------------------------------------------------------------| +| -4% -6% -9% -14% | +|---------------------------------------------------------------| +| -7% -10% -16% -21% | +|(recommended) (recommended) | +|---------------------------------------------------------------| +| -10% -14% -23% -29% | +|---------------------------------------------------------------| +| -17% -23% -35% -42% | +|---------------------------------------------------------------| +| -29% -37% -52% -58% | +|---------------------------------------------------------------| +| -53% -63% -76% -77% | +| (slowest) | +|---------------------------------------------------------------| + +Example: + + vsc8531_0: ethernet-phy@0 { + compatible = "ethernet-phy-id0007.0570"; + vsc8531,vddmac = <3300>; + vsc8531,edge-slowdown = <21>; + }; diff --git a/Documentation/devicetree/bindings/net/qcom-emac.txt b/Documentation/devicetree/bindings/net/qcom-emac.txt new file mode 100644 index 000000000000..346e6c7f47b7 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qcom-emac.txt @@ -0,0 +1,111 @@ +Qualcomm Technologies EMAC Gigabit Ethernet Controller + +This network controller consists of two devices: a MAC and an SGMII +internal PHY. Each device is represented by a device tree node. A phandle +connects the MAC node to its corresponding internal phy node. Another +phandle points to the external PHY node. + +Required properties: + +MAC node: +- compatible : Should be "qcom,fsm9900-emac". +- reg : Offset and length of the register regions for the device +- interrupts : Interrupt number used by this controller +- mac-address : The 6-byte MAC address. If present, it is the default + MAC address. +- internal-phy : phandle to the internal PHY node +- phy-handle : phandle the the external PHY node + +Internal PHY node: +- compatible : Should be "qcom,fsm9900-emac-sgmii" or "qcom,qdf2432-emac-sgmii". +- reg : Offset and length of the register region(s) for the device +- interrupts : Interrupt number used by this controller + +The external phy child node: +- reg : The phy address + +Example: + +FSM9900: + +soc { + #address-cells = <1>; + #size-cells = <1>; + + emac0: ethernet@feb20000 { + compatible = "qcom,fsm9900-emac"; + reg = <0xfeb20000 0x10000>, + <0xfeb36000 0x1000>; + interrupts = <76>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <0>; + }; + + pinctrl-names = "default"; + pinctrl-0 = <&mdio_pins_a>; + }; + + emac_sgmii: ethernet@feb38000 { + compatible = "qcom,fsm9900-emac-sgmii"; + reg = <0xfeb38000 0x1000>; + interrupts = <80>; + }; + + tlmm: pinctrl@fd510000 { + compatible = "qcom,fsm9900-pinctrl"; + + mdio_pins_a: mdio { + state { + pins = "gpio123", "gpio124"; + function = "mdio"; + }; + }; + }; + + +QDF2432: + +soc { + #address-cells = <2>; + #size-cells = <2>; + + emac0: ethernet@38800000 { + compatible = "qcom,fsm9900-emac"; + reg = <0x0 0x38800000 0x0 0x10000>, + <0x0 0x38816000 0x0 0x1000>; + interrupts = <0 256 4>; + + clocks = <&gcc 0>, <&gcc 1>, <&gcc 3>, <&gcc 4>, <&gcc 5>, + <&gcc 6>, <&gcc 7>; + clock-names = "axi_clk", "cfg_ahb_clk", "high_speed_clk", + "mdio_clk", "tx_clk", "rx_clk", "sys_clk"; + + internal-phy = <&emac_sgmii>; + + phy-handle = <&phy0>; + + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@4 { + reg = <4>; + }; + }; + + emac_sgmii: ethernet@410400 { + compatible = "qcom,qdf2432-emac-sgmii"; + reg = <0x0 0x00410400 0x0 0xc00>, /* Base address */ + <0x0 0x00410000 0x0 0x400>; /* Per-lane digital */ + interrupts = <0 254 1>; + }; diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt index cccd945fc45b..95383c5131fc 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.txt +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.txt @@ -3,8 +3,12 @@ Rockchip SoC RK3288 10/100/1000 Ethernet driver(GMAC) The device node has following properties. Required properties: - - compatible: Can be one of "rockchip,rk3228-gmac", "rockchip,rk3288-gmac", - "rockchip,rk3368-gmac" + - compatible: should be "rockchip,-gamc" + "rockchip,rk3228-gmac": found on RK322x SoCs + "rockchip,rk3288-gmac": found on RK3288 SoCs + "rockchip,rk3366-gmac": found on RK3366 SoCs + "rockchip,rk3368-gmac": found on RK3368 SoCs + "rockchip,rk3399-gmac": found on RK3399 SoCs - reg: addresses and length of the register sets for the device. - interrupts: Should contain the GMAC interrupts. - interrupt-names: Should contain the interrupt names "macirq". diff --git a/Documentation/devicetree/bindings/net/sh_eth.txt b/Documentation/devicetree/bindings/net/sh_eth.txt index 2f6ec85fda8e..0115c85a2425 100644 --- a/Documentation/devicetree/bindings/net/sh_eth.txt +++ b/Documentation/devicetree/bindings/net/sh_eth.txt @@ -5,6 +5,8 @@ interface contains. Required properties: - compatible: "renesas,gether-r8a7740" if the device is a part of R8A7740 SoC. + "renesas,ether-r8a7743" if the device is a part of R8A7743 SoC. + "renesas,ether-r8a7745" if the device is a part of R8A7745 SoC. "renesas,ether-r8a7778" if the device is a part of R8A7778 SoC. "renesas,ether-r8a7779" if the device is a part of R8A7779 SoC. "renesas,ether-r8a7790" if the device is a part of R8A7790 SoC. diff --git a/Documentation/devicetree/bindings/net/smsc911x.txt b/Documentation/devicetree/bindings/net/smsc911x.txt index 3fed3c124411..16c3a9501f5d 100644 --- a/Documentation/devicetree/bindings/net/smsc911x.txt +++ b/Documentation/devicetree/bindings/net/smsc911x.txt @@ -3,9 +3,11 @@ Required properties: - compatible : Should be "smsc,lan", "smsc,lan9115" - reg : Address and length of the io space for SMSC LAN -- interrupts : Should contain SMSC LAN interrupt line -- interrupt-parent : Should be the phandle for the interrupt controller - that services interrupts for this device +- interrupts : one or two interrupt specifiers + - The first interrupt is the SMSC LAN interrupt line + - The second interrupt (if present) is the PME (power + management event) interrupt that is able to wake up the host + system with a 50ms pulse on network activity - phy-mode : See ethernet.txt file in the same directory Optional properties: @@ -21,6 +23,10 @@ Optional properties: external PHY - smsc,save-mac-address : Indicates that mac address needs to be saved before resetting the controller +- reset-gpios : a GPIO line connected to the RESET (active low) signal + of the device. On many systems this is wired high so the device goes + out of reset at power-on, but if it is under program control, this + optional GPIO can wake up in response to it. Examples: @@ -29,7 +35,8 @@ lan9220@f4000000 { reg = <0xf4000000 0x2000000>; phy-mode = "mii"; interrupt-parent = <&gpio1>; - interrupts = <31>; + interrupts = <31>, <32>; + reset-gpios = <&gpio1 30 GPIO_ACTIVE_LOW>; reg-io-width = <4>; smsc,irq-push-pull; }; diff --git a/Documentation/devicetree/bindings/net/stm32-dwmac.txt b/Documentation/devicetree/bindings/net/stm32-dwmac.txt new file mode 100644 index 000000000000..c35afb7e956a --- /dev/null +++ b/Documentation/devicetree/bindings/net/stm32-dwmac.txt @@ -0,0 +1,32 @@ +STMicroelectronics STM32 / MCU DWMAC glue layer controller + +This file documents platform glue layer for stmmac. +Please see stmmac.txt for the other unchanged properties. + +The device node has following properties. + +Required properties: +- compatible: Should be "st,stm32-dwmac" to select glue, and + "snps,dwmac-3.50a" to select IP version. +- clocks: Must contain a phandle for each entry in clock-names. +- clock-names: Should be "stmmaceth" for the host clock. + Should be "mac-clk-tx" for the MAC TX clock. + Should be "mac-clk-rx" for the MAC RX clock. +- st,syscon : Should be phandle/offset pair. The phandle to the syscon node which + encompases the glue register, and the offset of the control register. +Example: + + ethernet@40028000 { + compatible = "st,stm32-dwmac", "snps,dwmac-3.50a"; + status = "disabled"; + reg = <0x40028000 0x8000>; + reg-names = "stmmaceth"; + interrupts = <0 61 0>, <0 62 0>; + interrupt-names = "macirq", "eth_wake_irq"; + clock-names = "stmmaceth", "mac-clk-tx", "mac-clk-rx"; + clocks = <&rcc 0 25>, <&rcc 0 26>, <&rcc 0 27>; + st,syscon = <&syscfg 0x4>; + snps,pbl = <8>; + snps,mixed-burst; + dma-ranges; + }; diff --git a/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt new file mode 100644 index 000000000000..038dda48b8e6 --- /dev/null +++ b/Documentation/devicetree/bindings/net/xilinx_gmii2rgmii.txt @@ -0,0 +1,35 @@ +XILINX GMIITORGMII Converter Driver Device Tree Bindings +-------------------------------------------------------- + +The Gigabit Media Independent Interface (GMII) to Reduced Gigabit Media +Independent Interface (RGMII) core provides the RGMII between RGMII-compliant +Ethernet physical media devices (PHY) and the Gigabit Ethernet controller. +This core can be used in all three modes of operation(10/100/1000 Mb/s). +The Management Data Input/Output (MDIO) interface is used to configure the +Speed of operation. This core can switch dynamically between the three +Different speed modes by configuring the conveter register through mdio write. + +This converter sits between the ethernet MAC and the external phy. +MAC <==> GMII2RGMII <==> RGMII_PHY + +For more details about mdio please refer phy.txt file in the same directory. + +Required properties: +- compatible : Should be "xlnx,gmii-to-rgmii-1.0" +- reg : The ID number for the phy, usually a small integer +- phy-handle : Should point to the external phy device. + See ethernet.txt file in the same directory. + +Example: + mdio { + #address-cells = <1>; + #size-cells = <0>; + phy: ethernet-phy@0 { + ...... + }; + gmiitorgmii: gmiitorgmii@8 { + compatible = "xlnx,gmii-to-rgmii-1.0"; + reg = <8>; + phy-handle = <&phy>; + }; + }; diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 415154a487d0..a7697783ac4c 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -74,6 +74,8 @@ dns_resolver.txt - The DNS resolver module allows kernel servies to make DNS queries. driver.txt - Softnet driver issues. +ena.txt + - info on Amazon's Elastic Network Adapter (ENA) e100.txt - info on Intel's EtherExpress PRO/100 line of 10/100 boards e1000.txt diff --git a/Documentation/networking/batman-adv.txt b/Documentation/networking/batman-adv.txt index 1b5e7a7f2185..8a8d3d96f6c6 100644 --- a/Documentation/networking/batman-adv.txt +++ b/Documentation/networking/batman-adv.txt @@ -43,10 +43,15 @@ new interfaces to verify the compatibility. There is no need to reload the module if you plug your USB wifi adapter into your ma- chine after batman advanced was initially loaded. -To activate a given interface simply write "bat0" into its -"mesh_iface" file inside the batman_adv subfolder: +The batman-adv soft-interface can be created using the iproute2 +tool "ip" -# echo bat0 > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link add name bat0 type batadv + +To activate a given interface simply attach it to the "bat0" +interface + +# ip link set dev eth0 master bat0 Repeat this step for all interfaces you wish to add. Now batman starts using/broadcasting on this/these interface(s). @@ -56,10 +61,10 @@ By reading the "iface_status" file you can check its status: # cat /sys/class/net/eth0/batman_adv/iface_status # active -To deactivate an interface you have to write "none" into its -"mesh_iface" file: +To deactivate an interface you have to detach it from the +"bat0" interface: -# echo none > /sys/class/net/eth0/batman_adv/mesh_iface +# ip link set dev eth0 nomaster All mesh wide settings can be found in batman's own interface diff --git a/Documentation/networking/dsa/dsa.txt b/Documentation/networking/dsa/dsa.txt index f20c884c048a..6d6c07cf1a9a 100644 --- a/Documentation/networking/dsa/dsa.txt +++ b/Documentation/networking/dsa/dsa.txt @@ -227,9 +227,9 @@ to address individual switches in the tree. dsa_switch: structure describing a switch device in the tree, referencing a dsa_switch_tree as a backpointer, slave network devices, master network device, -and a reference to the backing dsa_switch_driver +and a reference to the backing dsa_switch_ops -dsa_switch_driver: structure referencing function pointers, see below for a full +dsa_switch_ops: structure referencing function pointers, see below for a full description. Design limitations @@ -357,10 +357,10 @@ regular HWMON devices in /sys/class/hwmon/. Driver development ================== -DSA switch drivers need to implement a dsa_switch_driver structure which will +DSA switch drivers need to implement a dsa_switch_ops structure which will contain the various members described below. -register_switch_driver() registers this dsa_switch_driver in its internal list +register_switch_driver() registers this dsa_switch_ops in its internal list of drivers to probe for. unregister_switch_driver() does the exact opposite. Unless requested differently by setting the priv_size member accordingly, DSA @@ -379,7 +379,7 @@ Switch configuration buses, return a non-NULL string - setup: setup function for the switch, this function is responsible for setting - up the dsa_switch_driver private structure with all it needs: register maps, + up the dsa_switch_ops private structure with all it needs: register maps, interrupts, mutexes, locks etc.. This function is also expected to properly configure the switch to separate all network interfaces from each other, that is, they should be isolated by the switch hardware itself, typically by creating @@ -584,6 +584,29 @@ of DSA, would be the its port-based VLAN, used by the associated bridge device. function that the driver has to call for each MAC address known to be behind the given port. A switchdev object is used to carry the VID and FDB info. +- port_mdb_prepare: bridge layer function invoked when the bridge prepares the + installation of a multicast database entry. If the operation is not supported, + this function should return -EOPNOTSUPP to inform the bridge code to fallback + to a software implementation. No hardware setup must be done in this function. + See port_fdb_add for this and details. + +- port_mdb_add: bridge layer function invoked when the bridge wants to install + a multicast database entry, the switch hardware should be programmed with the + specified address in the specified VLAN ID in the forwarding database + associated with this VLAN ID. + +Note: VLAN ID 0 corresponds to the port private database, which, in the context +of DSA, would be the its port-based VLAN, used by the associated bridge device. + +- port_mdb_del: bridge layer function invoked when the bridge wants to remove a + multicast database entry, the switch hardware should be programmed to delete + the specified MAC address from the specified VLAN ID if it was mapped into + this port forwarding database. + +- port_mdb_dump: bridge layer function invoked with a switchdev callback + function that the driver has to call for each MAC address known to be behind + the given port. A switchdev object is used to carry the VID and MDB info. + TODO ==== diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt new file mode 100644 index 000000000000..2b4b6f57e549 --- /dev/null +++ b/Documentation/networking/ena.txt @@ -0,0 +1,305 @@ +Linux kernel driver for Elastic Network Adapter (ENA) family: +============================================================= + +Overview: +========= +ENA is a networking interface designed to make good use of modern CPU +features and system architectures. + +The ENA device exposes a lightweight management interface with a +minimal set of memory mapped registers and extendable command set +through an Admin Queue. + +The driver supports a range of ENA devices, is link-speed independent +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has +a negotiated and extendable feature set. + +Some ENA devices support SR-IOV. This driver is used for both the +SR-IOV Physical Function (PF) and Virtual Function (VF) devices. + +ENA devices enable high speed and low overhead network traffic +processing by providing multiple Tx/Rx queue pairs (the maximum number +is advertised by the device via the Admin Queue), a dedicated MSI-X +interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, +and CPU cacheline optimized data placement. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). +Receive-side scaling (RSS) is supported for multi-core scaling. + +The ENA driver and its corresponding devices implement health +monitoring mechanisms such as watchdog, enabling the device and driver +to recover in a manner transparent to the application, as well as +debug logs. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +Supported PCI vendor ID/device IDs: +=================================== +1d0f:0ec2 - ENA PF +1d0f:1ec2 - ENA PF with LLQ support +1d0f:ec20 - ENA VF +1d0f:ec21 - ENA VF with LLQ support + +ENA Source Code Directory Structure: +==================================== +ena_com.[ch] - Management communication layer. This layer is + responsible for the handling all the management + (admin) communication between the device and the + driver. +ena_eth_com.[ch] - Tx/Rx data path. +ena_admin_defs.h - Definition of ENA management interface. +ena_eth_io_defs.h - Definition of ENA data path interface. +ena_common_defs.h - Common definitions for ena_com layer. +ena_regs_defs.h - Definition of ENA PCI memory-mapped (MMIO) registers. +ena_netdev.[ch] - Main Linux kernel driver. +ena_syfsfs.[ch] - Sysfs files. +ena_ethtool.c - ethtool callbacks. +ena_pci_id_tbl.h - Supported device IDs. + +Management Interface: +===================== +ENA management interface is exposed by means of: +- PCIe Configuration Space +- Device Registers +- Admin Queue (AQ) and Admin Completion Queue (ACQ) +- Asynchronous Event Notification Queue (AENQ) + +ENA device MMIO Registers are accessed only during driver +initialization and are not involved in further normal device +operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a very small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: +- Create I/O submission queue +- Create I/O completion queue +- Destroy I/O submission queue +- Destroy I/O completion queue +- Get feature +- Set feature +- Configure AENQ +- Get statistics + +Refer to ena_admin_defs.h for the list of supported Get/Set Feature +properties. + +The Asynchronous Event Notification Queue (AENQ) is a uni-directional +queue used by the ENA device to send to the driver events that cannot +be reported using ACQ. AENQ events are subdivided into groups. Each +group may have multiple syndromes, as shown below + +The events are: + Group Syndrome + Link state change - X - + Fatal error - X - + Notification Suspend traffic + Notification Resume traffic + Keep-Alive - X - + +ACQ and AENQ share the same MSI-X vector. + +Keep-Alive is a special mechanism that allows monitoring of the +device's health. The driver maintains a watchdog (WD) handler which, +if fired, logs the current state and statistics then resets and +restarts the ENA device and driver. A Keep-Alive event is delivered by +the device every second. The driver re-arms the WD upon reception of a +Keep-Alive event. A missed Keep-Alive event causes the WD handler to +fire. + +Data Path Interface: +==================== +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +The ENA driver supports two Queue Operation modes for Tx SQs: +- Regular mode + * In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. +- Low Latency Queue (LLQ) mode or "push-mode". + * In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. + +The Rx SQs support only the regular mode. + +Note: Not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. + +The driver supports multi-queue for both Tx and Rx. This has various +benefits: +- Reduced CPU/thread/process contention on a given Ethernet interface. +- Cache miss rate on completion is reduced, particularly for data + cache lines that hold the sk_buff structures. +- Increased process-level parallelism when handling received packets. +- Increased data cache hit rate, by steering kernel processing of + packets to the CPU, where the application thread consuming the + packet is running. +- In hardware interrupt re-direction. + +Interrupt Modes: +================ +The driver assigns a single MSI-X vector per queue pair (for both Tx +and Rx directions). The driver assigns an additional dedicated MSI-X vector +for management (for ACQ and AENQ). + +Management interrupt registration is performed when the Linux kernel +probes the adapter, and it is de-registered when the adapter is +removed. I/O queue interrupt registration is performed when the Linux +interface of the adapter is opened, and it is de-registered when the +interface is closed. + +The management interrupt is named: + ena-mgmnt@pci: +and for each queue pair, an interrupt is named: + -Tx-Rx- + +The ENA device operates in auto-mask and auto-clear interrupt +modes. That is, once MSI-X is delivered to the host, its Cause bit is +automatically cleared and the interrupt is masked. The interrupt is +unmasked by the driver after NAPI processing is complete. + +Interrupt Moderation: +===================== +ENA driver and device can operate in conventional or adaptive interrupt +moderation mode. + +In conventional mode the driver instructs device to postpone interrupt +posting according to static interrupt delay value. The interrupt delay +value can be configured through ethtool(8). The following ethtool +parameters are supported by the driver: tx-usecs, rx-usecs + +In adaptive interrupt moderation mode the interrupt delay value is +updated by the driver dynamically and adjusted every NAPI cycle +according to the traffic nature. + +By default ENA driver applies adaptive coalescing on Rx traffic and +conventional coalescing on Tx traffic. + +Adaptive coalescing can be switched on/off through ethtool(8) +adaptive_rx on|off parameter. + +The driver chooses interrupt delay value according to the number of +bytes and packets received between interrupt unmasking and interrupt +posting. The driver uses interrupt delay table that subdivides the +range of received bytes/packets into 5 levels and assigns interrupt +delay value to each level. + +The user can enable/disable adaptive moderation, modify the interrupt +delay table and restore its default values through sysfs. + +The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK +and can be configured by the ETHTOOL_STUNABLE command of the +SIOCETHTOOL ioctl. + +SKB: +The driver-allocated SKB for frames received from Rx handling using +NAPI context. The allocation method depends on the size of the packet. +If the frame length is larger than rx_copybreak, napi_get_frags() +is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer +content is copied (by CPU) to the SKB, and the buffer is recycled. + +Statistics: +=========== +The user can obtain ENA device and driver statistics using ethtool. +The driver can collect regular or extended statistics (including +per-queue stats) from the device. + +In addition the driver logs the stats to syslog upon device reset. + +MTU: +==== +The driver supports an arbitrarily large MTU with a maximum that is +negotiated with the device. The driver configures MTU using the +SetFeature command (ENA_ADMIN_MTU property). The user can change MTU +via ip(8) and similar legacy tools. + +Stateless Offloads: +=================== +The ENA driver supports: +- TSO over IPv4/IPv6 +- TSO with ECN +- IPv4 header checksum offload +- TCP/UDP over IPv4/IPv6 checksum offloads + +RSS: +==== +- The ENA device supports RSS that allows flexible Rx traffic + steering. +- Toeplitz and CRC32 hash functions are supported. +- Different combinations of L2/L3/L4 fields can be configured as + inputs for hash functions. +- The driver configures RSS settings using the AQ SetFeature command + (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties). +- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash + function delivered in the Rx CQ descriptor is set in the received + SKB. +- The user can provide a hash key, hash function, and configure the + indirection table through ethtool(8). + +DATA PATH: +========== +Tx: +--- +end_start_xmit() is called by the stack. This function does the following: +- Maps data buffers (skb->data and frags). +- Populates ena_buf for the push buffer (if the driver and device are + in push mode.) +- Prepares ENA bufs for the remaining frags. +- Allocates a new request ID from the empty req_id ring. The request + ID is the index of the packet in the Tx info. This is used for + out-of-order TX completions. +- Adds the packet to the proper place in the Tx ring. +- Calls ena_com_prepare_tx(), an ENA communication layer that converts + the ena_bufs to ENA descriptors (and adds meta ENA descriptors as + needed.) + * This function also copies the ENA descriptors and the push buffer + to the Device memory space (if in push mode.) +- Writes doorbell to the ENA device. +- When the ENA device finishes sending the packet, a completion + interrupt is raised. +- The interrupt handler schedules NAPI. +- The ena_clean_tx_irq() function is called. This function handles the + completion descriptors generated by the ENA, with a single + completion descriptor per completed packet. + * req_id is retrieved from the completion descriptor. The tx_info of + the packet is retrieved via the req_id. The data buffers are + unmapped and req_id is returned to the empty req_id ring. + * The function stops when the completion descriptors are completed or + the budget is reached. + +Rx: +--- +- When a packet is received from the ENA device. +- The interrupt handler schedules NAPI. +- The ena_clean_rx_irq() function is called. This function calls + ena_rx_pkt(), an ENA communication layer function, which returns the + number of descriptors used for a new unhandled packet, and zero if + no new packet is found. +- Then it calls the ena_clean_rx_irq() function. +- ena_eth_rx_skb() checks packet length: + * If the packet is small (len < rx_copybreak), the driver allocates + a SKB for the new packet, and copies the packet payload into the + SKB data buffer. + - In this way the original data buffer is not passed to the stack + and is reused for future Rx packets. + * Otherwise the function unmaps the Rx buffer, then allocates the + new SKB structure and hooks the Rx buffer to the SKB frags. +- The new SKB is updated with the necessary information (protocol, + checksum hw verify result, etc.), and then passed to the network + stack, using the NAPI interface function napi_gro_receive(). diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 9ae929395b24..3db8c67d2c8d 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt @@ -575,32 +575,33 @@ tcp_syncookies - BOOLEAN unconditionally generation of syncookies. tcp_fastopen - INTEGER - Enable TCP Fast Open feature (draft-ietf-tcpm-fastopen) to send data - in the opening SYN packet. To use this feature, the client application - must use sendmsg() or sendto() with MSG_FASTOPEN flag rather than - connect() to perform a TCP handshake automatically. + Enable TCP Fast Open (RFC7413) to send and accept data in the opening + SYN packet. + + The client support is enabled by flag 0x1 (on by default). The client + then must use sendmsg() or sendto() with the MSG_FASTOPEN flag, + rather than connect() to send data in SYN. + + The server support is enabled by flag 0x2 (off by default). Then + either enable for all listeners with another flag (0x400) or + enable individual listeners via TCP_FASTOPEN socket option with + the option value being the length of the syn-data backlog. The values (bitmap) are - 1: Enables sending data in the opening SYN on the client w/ MSG_FASTOPEN. - 2: Enables TCP Fast Open on the server side, i.e., allowing data in - a SYN packet to be accepted and passed to the application before - 3-way hand shake finishes. - 4: Send data in the opening SYN regardless of cookie availability and - without a cookie option. - 0x100: Accept SYN data w/o validating the cookie. - 0x200: Accept data-in-SYN w/o any cookie option present. - 0x400/0x800: Enable Fast Open on all listeners regardless of the - TCP_FASTOPEN socket option. The two different flags designate two - different ways of setting max_qlen without the TCP_FASTOPEN socket - option. + 0x1: (client) enables sending data in the opening SYN on the client. + 0x2: (server) enables the server support, i.e., allowing data in + a SYN packet to be accepted and passed to the + application before 3-way handshake finishes. + 0x4: (client) send data in the opening SYN regardless of cookie + availability and without a cookie option. + 0x200: (server) accept data-in-SYN w/o any cookie option present. + 0x400: (server) enable all listeners to support Fast Open by + default without explicit TCP_FASTOPEN socket option. - Default: 1 + Default: 0x1 - Note that the client & server side Fast Open flags (1 and 2 - respectively) must be also enabled before the rest of flags can take - effect. - - See include/net/tcp.h and the code for more details. + Note that that additional client or server features are only + effective if the basic support (0x1 and 0x2) are enabled respectively. tcp_syn_retries - INTEGER Number of times initial SYNs for an active TCP connection attempt diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt index 14422f8fcdc4..24196cef7c91 100644 --- a/Documentation/networking/ipvlan.txt +++ b/Documentation/networking/ipvlan.txt @@ -22,7 +22,7 @@ The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module There are no module parameters for this driver and it can be configured using IProute2/ip utility. - ip link add link type ipvlan mode { l2 | L3 } + ip link add link type ipvlan mode { l2 | l3 | l3s } e.g. ip link add link ipvl0 eth0 type ipvlan mode l2 @@ -48,6 +48,11 @@ master device for the L2 processing and routing from that instance will be used before packets are queued on the outbound device. In this mode the slaves will not receive nor can send multicast / broadcast traffic. +4.3 L3S mode: + This is very similar to the L3 mode except that iptables (conn-tracking) +works in this mode and hence it is L3-symmetric (L3s). This will have slightly less +performance but that shouldn't matter since you are choosing this mode over plain-L3 +mode to make conn-tracking work. 5. What to choose (macvlan vs. ipvlan)? These two devices are very similar in many regards and the specific use diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt index 70c926ae212d..1b63bbc6b94f 100644 --- a/Documentation/networking/rxrpc.txt +++ b/Documentation/networking/rxrpc.txt @@ -725,7 +725,8 @@ The kernel interface functions are as follows: (*) End a client call. - void rxrpc_kernel_end_call(struct rxrpc_call *call); + void rxrpc_kernel_end_call(struct socket *sock, + struct rxrpc_call *call); This is used to end a previously begun call. The user_call_ID is expunged from AF_RXRPC's knowledge and will not be seen again in association with @@ -733,7 +734,9 @@ The kernel interface functions are as follows: (*) Send data through a call. - int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, + int rxrpc_kernel_send_data(struct socket *sock, + struct rxrpc_call *call, + struct msghdr *msg, size_t len); This is used to supply either the request part of a client call or the @@ -745,9 +748,42 @@ The kernel interface functions are as follows: The msg must not specify a destination address, control data or any flags other than MSG_MORE. len is the total amount of data to transmit. + (*) Receive data from a call. + + int rxrpc_kernel_recv_data(struct socket *sock, + struct rxrpc_call *call, + void *buf, + size_t size, + size_t *_offset, + bool want_more, + u32 *_abort) + + This is used to receive data from either the reply part of a client call + or the request part of a service call. buf and size specify how much + data is desired and where to store it. *_offset is added on to buf and + subtracted from size internally; the amount copied into the buffer is + added to *_offset before returning. + + want_more should be true if further data will be required after this is + satisfied and false if this is the last item of the receive phase. + + There are three normal returns: 0 if the buffer was filled and want_more + was true; 1 if the buffer was filled, the last DATA packet has been + emptied and want_more was false; and -EAGAIN if the function needs to be + called again. + + If the last DATA packet is processed but the buffer contains less than + the amount requested, EBADMSG is returned. If want_more wasn't set, but + more data was available, EMSGSIZE is returned. + + If a remote ABORT is detected, the abort code received will be stored in + *_abort and ECONNABORTED will be returned. + (*) Abort a call. - void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code); + void rxrpc_kernel_abort_call(struct socket *sock, + struct rxrpc_call *call, + u32 abort_code); This is used to abort a call if it's still in an abortable state. The abort code specified will be placed in the ABORT message sent. @@ -820,47 +856,6 @@ The kernel interface functions are as follows: Other errors may be returned if the call had been aborted (-ECONNABORTED) or had timed out (-ETIME). - (*) Record the delivery of a data message. - - void rxrpc_kernel_data_consumed(struct rxrpc_call *call, - struct sk_buff *skb); - - This is used to record a data message as having been consumed and to - update the ACK state for the call. The message must still be passed to - rxrpc_kernel_free_skb() for disposal by the caller. - - (*) Free a message. - - void rxrpc_kernel_free_skb(struct sk_buff *skb); - - This is used to free a non-DATA socket buffer intercepted from an AF_RXRPC - socket. - - (*) Determine if a data message is the last one on a call. - - bool rxrpc_kernel_is_data_last(struct sk_buff *skb); - - This is used to determine if a socket buffer holds the last data message - to be received for a call (true will be returned if it does, false - if not). - - The data message will be part of the reply on a client call and the - request on an incoming call. In the latter case there will be more - messages, but in the former case there will not. - - (*) Get the abort code from an abort message. - - u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb); - - This is used to extract the abort code from a remote abort message. - - (*) Get the error number from a local or network error message. - - int rxrpc_kernel_get_error_number(struct sk_buff *skb); - - This is used to extract the error number from a message indicating either - a local error occurred or a network error occurred. - (*) Allocate a null key for doing anonymous security. struct key *rxrpc_get_null_key(const char *keyname); @@ -868,6 +863,13 @@ The kernel interface functions are as follows: This is used to allocate a null RxRPC key that can be used to indicate anonymous security for a particular domain. + (*) Get the peer address of a call. + + void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx); + + This is used to find the remote peer address of a call. + ======================= CONFIGURABLE PARAMETERS diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt new file mode 100644 index 000000000000..a0bf573dfa61 --- /dev/null +++ b/Documentation/networking/strparser.txt @@ -0,0 +1,136 @@ +Stream Parser +------------- + +The stream parser (strparser) is a utility that parses messages of an +application layer protocol running over a TCP connection. The stream +parser works in conjunction with an upper layer in the kernel to provide +kernel support for application layer messages. For instance, Kernel +Connection Multiplexor (KCM) uses the Stream Parser to parse messages +using a BPF program. + +Interface +--------- + +The API includes a context structure, a set of callbacks, utility +functions, and a data_ready function. The callbacks include +a parse_msg function that is called to perform parsing (e.g. +BPF parsing in case of KCM), and a rcv_msg function that is called +when a full message has been completed. + +A stream parser can be instantiated for a TCP connection. This is done +by: + +strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) + +strp is a struct of type strparser that is allocated by the upper layer. +csk is the TCP socket associated with the stream parser. Callbacks are +called by the stream parser. + +Callbacks +--------- + +There are four callbacks: + +int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + + parse_msg is called to determine the length of the next message + in the stream. The upper layer must implement this function. It + should parse the sk_buff as containing the headers for the + next application layer messages in the stream. + + The skb->cb in the input skb is a struct strp_rx_msg. Only + the offset field is relevant in parse_msg and gives the offset + where the message starts in the skb. + + The return values of this function are: + + >0 : indicates length of successfully parsed message + 0 : indicates more data must be received to parse the message + -ESTRPIPE : current message should not be processed by the + kernel, return control of the socket to userspace which + can proceed to read the messages itself + other < 0 : Error is parsing, give control back to userspace + assuming that synchronization is lost and the stream + is unrecoverable (application expected to close TCP socket) + + In the case that an error is returned (return value is less than + zero) the stream parser will set the error on TCP socket and wake + it up. If parse_msg returned -ESTRPIPE and the stream parser had + previously read some bytes for the current message, then the error + set on the attached socket is ENODATA since the stream is + unrecoverable in that case. + +void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + + rcv_msg is called when a full message has been received and + is queued. The callee must consume the sk_buff; it can + call strp_pause to prevent any further messages from being + received in rcv_msg (see strp_pause below). This callback + must be set. + + The skb->cb in the input skb is a struct strp_rx_msg. This + struct contains two fields: offset and full_len. Offset is + where the message starts in the skb, and full_len is the + the length of the message. skb->len - offset may be greater + then full_len since strparser does not trim the skb. + +int (*read_sock_done)(struct strparser *strp, int err); + + read_sock_done is called when the stream parser is done reading + the TCP socket. The stream parser may read multiple messages + in a loop and this function allows cleanup to occur when existing + the loop. If the callback is not set (NULL in strp_init) a + default function is used. + +void (*abort_parser)(struct strparser *strp, int err); + + This function is called when stream parser encounters an error + in parsing. The default function stops the stream parser for the + TCP socket and sets the error in the socket. The default function + can be changed by setting the callback to non-NULL in strp_init. + +Functions +--------- + +The upper layer calls strp_tcp_data_ready when data is ready on the lower +socket for strparser to process. This should be called from a data_ready +callback that is set on the socket. + +strp_stop is called to completely stop stream parser operations. This +is called internally when the stream parser encounters an error, and +it is called from the upper layer when unattaching a TCP socket. + +strp_done is called to unattach the stream parser from the TCP socket. +This must be called after the stream processor has be stopped. + +strp_check_rcv is called to check for new messages on the socket. This +is normally called at initialization of the a stream parser instance +of after strp_unpause. + +Statistics +---------- + +Various counters are kept for each stream parser for a TCP socket. +These are in the strp_stats structure. strp_aggr_stats is a convenience +structure for accumulating statistics for multiple stream parser +instances. save_strp_stats and aggregate_strp_stats are helper functions +to save and aggregate statistics. + +Message assembly limits +----------------------- + +The stream parser provide mechanisms to limit the resources consumed by +message assembly. + +A timer is set when assembly starts for a new message. The message +timeout is taken from rcvtime for the associated TCP socket. If the +timer fires before assembly completes the stream parser is aborted +and the ETIMEDOUT error is set on the TCP socket. + +Message length is limited to the receive buffer size of the associated +TCP socket. If the length returned by parse_msg is greater than +the socket buffer size then the stream parser is aborted with +EMSGSIZE error set on the TCP socket. Note that this makes the +maximum size of receive skbuffs for a socket with a stream parser +to be 2*sk_rcvbuf of the TCP socket. diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt index 31c39115834d..2bbac05ab9e2 100644 --- a/Documentation/networking/switchdev.txt +++ b/Documentation/networking/switchdev.txt @@ -283,15 +283,10 @@ be sent to the port netdev for processing by the bridge driver. The bridge should not reflood the packet to the same ports the device flooded, otherwise there will be duplicate packets on the wire. -To avoid duplicate packets, the device/driver should mark a packet as already -forwarded using skb->offload_fwd_mark. The same mark is set on the device -ports in the domain using dev->offload_fwd_mark. If the skb->offload_fwd_mark -is non-zero and matches the forwarding egress port's dev->skb_mark, the kernel -will drop the skb right before transmit on the egress port, with the -understanding that the device already forwarded the packet on same egress port. -The driver can use switchdev_port_fwd_mark_set() to set a globally unique mark -for port's dev->offload_fwd_mark, based on the port's parent ID (switch ID) and -a group ifindex. +To avoid duplicate packets, the switch driver should mark a packet as already +forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark +the skb using the ingress bridge port's mark and prevent it from being forwarded +through any bridge port with the same mark. It is possible for the switch device to not handle flooding and push the packets up to the bridge driver for flooding. This is not ideal as the number @@ -319,30 +314,29 @@ the kernel, with the device doing the FIB lookup and forwarding. The device does a longest prefix match (LPM) on FIB entries matching route prefix and forwards the packet to the matching FIB entry's nexthop(s) egress ports. -To program the device, the driver implements support for -SWITCHDEV_OBJ_IPV[4|6]_FIB object using switchdev_port_obj_xxx ops. -switchdev_port_obj_add is used for both adding a new FIB entry to the device, -or modifying an existing entry on the device. +To program the device, the driver has to register a FIB notifier handler +using register_fib_notifier. The following events are available: +FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device, + or modifying an existing entry on the device. +FIB_EVENT_ENTRY_DEL: used for removing a FIB entry +FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes -XXX: Currently, only SWITCHDEV_OBJ_ID_IPV4_FIB objects are supported. +FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass: -SWITCHDEV_OBJ_ID_IPV4_FIB object passes: - - struct switchdev_obj_ipv4_fib { /* IPV4_FIB */ + struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ u32 dst; int dst_len; struct fib_info *fi; u8 tos; u8 type; - u32 nlflags; u32 tb_id; - } ipv4_fib; + u32 nlflags; + }; to add/modify/delete IPv4 dst/dest_len prefix on table tb_id. The *fi structure holds details on the route and route's nexthops. *dev is one of the -port netdevs mentioned in the routes next hop list. If the output port netdevs -referenced in the route's nexthop list don't all have the same switch ID, the -driver is not called to add/modify/delete the FIB entry. +port netdevs mentioned in the route's next hop list. Routes offloaded to the device are labeled with "offload" in the ip route listing: @@ -360,6 +354,8 @@ listing: 12.0.0.4 via 11.0.0.9 dev sw1p2 proto zebra metric 20 offload 192.168.0.0/24 dev eth0 proto kernel scope link src 192.168.0.15 +The "offload" flag is set in case at least one device offloads the FIB entry. + XXX: add/mod/del IPv6 FIB API Nexthop Resolution diff --git a/MAINTAINERS b/MAINTAINERS index 3bb6640e86ba..f4b944c2683a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -636,6 +636,15 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMAZON ETHERNET DRIVERS +M: Netanel Belgazal +R: Saeed Bishara +R: Zorik Machulsky +L: netdev@vger.kernel.org +S: Supported +F: Documentation/networking/ena.txt +F: drivers/net/ethernet/amazon/ + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky M: Gary Hook @@ -5596,10 +5605,9 @@ F: Documentation/devicetree/bindings/scsi/hisilicon-sas.txt HOST AP DRIVER M: Jouni Malinen -L: hostap@shmoo.com (subscribers-only) L: linux-wireless@vger.kernel.org -W: http://hostap.epitest.fi/ -S: Maintained +W: http://w1.fi/hostap-driver.html +S: Obsolete F: drivers/net/wireless/intersil/hostap/ HP COMPAQ TC1100 TABLET WMI EXTRAS DRIVER @@ -9731,6 +9739,12 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvalo/ath.git S: Supported F: drivers/net/wireless/ath/ath10k/ +QUALCOMM EMAC GIGABIT ETHERNET DRIVER +M: Timur Tabi +L: netdev@vger.kernel.org +S: Supported +F: drivers/net/ethernet/qualcomm/emac/ + QUALCOMM HEXAGON ARCHITECTURE M: Richard Kuo L: linux-hexagon@vger.kernel.org @@ -9992,6 +10006,7 @@ F: net/rfkill/ RHASHTABLE M: Thomas Graf +M: Herbert Xu L: netdev@vger.kernel.org S: Maintained F: lib/rhashtable.c @@ -12332,6 +12347,7 @@ F: drivers/net/usb/smsc75xx.* USB SMSC95XX ETHERNET DRIVER M: Steve Glendinning +M: Microchip Linux Driver Support L: netdev@vger.kernel.org S: Maintained F: drivers/net/usb/smsc95xx.* diff --git a/arch/arm64/boot/dts/apm/apm-mustang.dts b/arch/arm64/boot/dts/apm/apm-mustang.dts index b7fb5d9295c2..32a961c5e98a 100644 --- a/arch/arm64/boot/dts/apm/apm-mustang.dts +++ b/arch/arm64/boot/dts/apm/apm-mustang.dts @@ -74,6 +74,7 @@ &xgenet { status = "ok"; + rxlos-gpios = <&sbgpio 12 1>; }; &mmc0 { diff --git a/arch/arm64/boot/dts/apm/apm-storm.dtsi b/arch/arm64/boot/dts/apm/apm-storm.dtsi index c29dab9d1834..31ea70a5a3ff 100644 --- a/arch/arm64/boot/dts/apm/apm-storm.dtsi +++ b/arch/arm64/boot/dts/apm/apm-storm.dtsi @@ -923,7 +923,7 @@ /* mac address will be overwritten by the bootloader */ local-mac-address = [00 00 00 00 00 00]; phy-connection-type = "rgmii"; - phy-handle = <&menet0phy>,<&menetphy>; + phy-handle = <&menetphy>,<&menet0phy>; mdio { compatible = "apm,xgene-mdio"; #address-cells = <1>; diff --git a/drivers/atm/eni.c b/drivers/atm/eni.c index 6339efd32697..f2aaf9e32a36 100644 --- a/drivers/atm/eni.c +++ b/drivers/atm/eni.c @@ -1845,8 +1845,9 @@ static int eni_start(struct atm_dev *dev) /* initialize memory management */ buffer_mem = eni_dev->mem - (buf - eni_dev->ram); eni_dev->free_list_size = buffer_mem/MID_MIN_BUF_SIZE/2; - eni_dev->free_list = kmalloc( - sizeof(struct eni_free)*(eni_dev->free_list_size+1),GFP_KERNEL); + eni_dev->free_list = kmalloc_array(eni_dev->free_list_size + 1, + sizeof(*eni_dev->free_list), + GFP_KERNEL); if (!eni_dev->free_list) { printk(KERN_ERR DEV_LABEL "(itf %d): couldn't get free page\n", dev->number); diff --git a/drivers/atm/fore200e.c b/drivers/atm/fore200e.c index 75dde903b238..81aaa505862c 100644 --- a/drivers/atm/fore200e.c +++ b/drivers/atm/fore200e.c @@ -2489,7 +2489,7 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e) { const struct firmware *firmware; struct device *device; - struct fw_header *fw_header; + const struct fw_header *fw_header; const __le32 *fw_data; u32 fw_size; u32 __iomem *load_addr; @@ -2511,9 +2511,9 @@ static int fore200e_load_and_start_fw(struct fore200e *fore200e) return err; } - fw_data = (__le32 *) firmware->data; + fw_data = (const __le32 *)firmware->data; fw_size = firmware->size / sizeof(u32); - fw_header = (struct fw_header *) firmware->data; + fw_header = (const struct fw_header *)firmware->data; load_addr = fore200e->virt_base + le32_to_cpu(fw_header->load_offset); DPRINTK(2, "device %s firmware being loaded at 0x%p (%d words)\n", diff --git a/drivers/atm/he.c b/drivers/atm/he.c index 0f5cb37636bc..31b513a23ae0 100644 --- a/drivers/atm/he.c +++ b/drivers/atm/he.c @@ -779,8 +779,9 @@ static int he_init_group(struct he_dev *he_dev, int group) G0_RBPS_BS + (group * 32)); /* bitmap table */ - he_dev->rbpl_table = kmalloc(BITS_TO_LONGS(RBPL_TABLE_SIZE) - * sizeof(unsigned long), GFP_KERNEL); + he_dev->rbpl_table = kmalloc_array(BITS_TO_LONGS(RBPL_TABLE_SIZE), + sizeof(*he_dev->rbpl_table), + GFP_KERNEL); if (!he_dev->rbpl_table) { hprintk("unable to allocate rbpl bitmap table\n"); return -ENOMEM; @@ -788,8 +789,9 @@ static int he_init_group(struct he_dev *he_dev, int group) bitmap_zero(he_dev->rbpl_table, RBPL_TABLE_SIZE); /* rbpl_virt 64-bit pointers */ - he_dev->rbpl_virt = kmalloc(RBPL_TABLE_SIZE - * sizeof(struct he_buff *), GFP_KERNEL); + he_dev->rbpl_virt = kmalloc_array(RBPL_TABLE_SIZE, + sizeof(*he_dev->rbpl_virt), + GFP_KERNEL); if (!he_dev->rbpl_virt) { hprintk("unable to allocate rbpl virt table\n"); goto out_free_rbpl_table; diff --git a/drivers/atm/iphase.c b/drivers/atm/iphase.c index 809dd1e02091..b2756765950e 100644 --- a/drivers/atm/iphase.c +++ b/drivers/atm/iphase.c @@ -1885,9 +1885,9 @@ static int open_tx(struct atm_vcc *vcc) if ((ret = ia_cbr_setup (iadev, vcc)) < 0) { return ret; } - } - else - printk("iadev: Non UBR, ABR and CBR traffic not supportedn"); + } else { + printk("iadev: Non UBR, ABR and CBR traffic not supported\n"); + } iadev->testTable[vcc->vci]->vc_status |= VC_ACTIVE; IF_EVENT(printk("ia open_tx returning \n");) @@ -1975,7 +1975,9 @@ static int tx_init(struct atm_dev *dev) buf_desc_ptr++; tx_pkt_start += iadev->tx_buf_sz; } - iadev->tx_buf = kmalloc(iadev->num_tx_desc*sizeof(struct cpcs_trailer_desc), GFP_KERNEL); + iadev->tx_buf = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->tx_buf), + GFP_KERNEL); if (!iadev->tx_buf) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_dle; @@ -1995,8 +1997,9 @@ static int tx_init(struct atm_dev *dev) sizeof(*cpcs), DMA_TO_DEVICE); } - iadev->desc_tbl = kmalloc(iadev->num_tx_desc * - sizeof(struct desc_tbl_t), GFP_KERNEL); + iadev->desc_tbl = kmalloc_array(iadev->num_tx_desc, + sizeof(*iadev->desc_tbl), + GFP_KERNEL); if (!iadev->desc_tbl) { printk(KERN_ERR DEV_LABEL " couldn't get mem\n"); goto err_free_all_tx_bufs; @@ -2124,7 +2127,9 @@ static int tx_init(struct atm_dev *dev) memset((caddr_t)(iadev->seg_ram+i), 0, iadev->num_vc*4); vc = (struct main_vc *)iadev->MAIN_VC_TABLE_ADDR; evc = (struct ext_vc *)iadev->EXT_VC_TABLE_ADDR; - iadev->testTable = kmalloc(sizeof(long)*iadev->num_vc, GFP_KERNEL); + iadev->testTable = kmalloc_array(iadev->num_vc, + sizeof(*iadev->testTable), + GFP_KERNEL); if (!iadev->testTable) { printk("Get freepage failed\n"); goto err_free_desc_tbl; diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index 700ed15c2362..c7296b583787 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -370,7 +370,8 @@ static int ns_init_card(int i, struct pci_dev *pcidev) return error; } - if ((card = kmalloc(sizeof(ns_dev), GFP_KERNEL)) == NULL) { + card = kmalloc(sizeof(*card), GFP_KERNEL); + if (!card) { printk ("nicstar%d: can't allocate memory for device structure.\n", i); @@ -611,7 +612,7 @@ static int ns_init_card(int i, struct pci_dev *pcidev) for (j = 0; j < card->rct_size; j++) ns_write_sram(card, j * 4, u32d, 4); - memset(card->vcmap, 0, NS_MAX_RCTSIZE * sizeof(vc_map)); + memset(card->vcmap, 0, sizeof(card->vcmap)); for (j = 0; j < NS_FRSCD_NUM; j++) card->scd2vc[j] = NULL; @@ -862,7 +863,7 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) if (size != VBR_SCQSIZE && size != CBR_SCQSIZE) return NULL; - scq = kmalloc(sizeof(scq_info), GFP_KERNEL); + scq = kmalloc(sizeof(*scq), GFP_KERNEL); if (!scq) return NULL; scq->org = dma_alloc_coherent(&card->pcidev->dev, @@ -871,8 +872,9 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) kfree(scq); return NULL; } - scq->skb = kmalloc(sizeof(struct sk_buff *) * - (size / NS_SCQE_SIZE), GFP_KERNEL); + scq->skb = kmalloc_array(size / NS_SCQE_SIZE, + sizeof(*scq->skb), + GFP_KERNEL); if (!scq->skb) { dma_free_coherent(&card->pcidev->dev, 2 * size, scq->org, scq->dma); @@ -2021,7 +2023,8 @@ static void dequeue_rx(ns_dev * card, ns_rsqe * rsqe) cell = skb->data; for (i = ns_rsqe_cellcount(rsqe); i; i--) { - if ((sb = dev_alloc_skb(NS_SMSKBSIZE)) == NULL) { + sb = dev_alloc_skb(NS_SMSKBSIZE); + if (!sb) { printk ("nicstar%d: Can't allocate buffers for aal0.\n", card->index); diff --git a/drivers/atm/zatm.c b/drivers/atm/zatm.c index cecfb943762f..d3dc95484161 100644 --- a/drivers/atm/zatm.c +++ b/drivers/atm/zatm.c @@ -598,12 +598,13 @@ static void close_rx(struct atm_vcc *vcc) static int start_rx(struct atm_dev *dev) { struct zatm_dev *zatm_dev; - int size,i; + int i; -DPRINTK("start_rx\n"); + DPRINTK("start_rx\n"); zatm_dev = ZATM_DEV(dev); - size = sizeof(struct atm_vcc *)*zatm_dev->chans; - zatm_dev->rx_map = kzalloc(size,GFP_KERNEL); + zatm_dev->rx_map = kcalloc(zatm_dev->chans, + sizeof(*zatm_dev->rx_map), + GFP_KERNEL); if (!zatm_dev->rx_map) return -ENOMEM; /* set VPI/VCI split (use all VCIs and give what's left to VPIs) */ zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR); @@ -998,8 +999,9 @@ static int start_tx(struct atm_dev *dev) DPRINTK("start_tx\n"); zatm_dev = ZATM_DEV(dev); - zatm_dev->tx_map = kmalloc(sizeof(struct atm_vcc *)* - zatm_dev->chans,GFP_KERNEL); + zatm_dev->tx_map = kmalloc_array(zatm_dev->chans, + sizeof(*zatm_dev->tx_map), + GFP_KERNEL); if (!zatm_dev->tx_map) return -ENOMEM; zatm_dev->tx_bw = ATM_OC3_PCR; zatm_dev->free_shapers = (1 << NR_SHAPERS)-1; @@ -1398,7 +1400,7 @@ static int zatm_open(struct atm_vcc *vcc) DPRINTK(DEV_LABEL "(itf %d): open %d.%d\n",vcc->dev->number,vcc->vpi, vcc->vci); if (!test_bit(ATM_VF_PARTIAL,&vcc->flags)) { - zatm_vcc = kmalloc(sizeof(struct zatm_vcc),GFP_KERNEL); + zatm_vcc = kmalloc(sizeof(*zatm_vcc), GFP_KERNEL); if (!zatm_vcc) { clear_bit(ATM_VF_ADDR,&vcc->flags); return -ENOMEM; diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c index 921ce1834673..b4f6520e74f0 100644 --- a/drivers/bcma/driver_chipcommon.c +++ b/drivers/bcma/driver_chipcommon.c @@ -36,12 +36,31 @@ u32 bcma_chipco_get_alp_clock(struct bcma_drv_cc *cc) } EXPORT_SYMBOL_GPL(bcma_chipco_get_alp_clock); +static bool bcma_core_cc_has_pmu_watchdog(struct bcma_drv_cc *cc) +{ + struct bcma_bus *bus = cc->core->bus; + + if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573) { + WARN(bus->chipinfo.rev <= 1, "No watchdog available\n"); + /* 53573B0 and 53573B1 have bugged PMU watchdog. It can + * be enabled but timer can't be bumped. Use CC one + * instead. + */ + return false; + } + return true; + } else { + return false; + } +} + static u32 bcma_chipco_watchdog_get_max_timer(struct bcma_drv_cc *cc) { struct bcma_bus *bus = cc->core->bus; u32 nb; - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (bus->chipinfo.id == BCMA_CHIP_ID_BCM4706) nb = 32; else if (cc->core->id.rev < 26) @@ -95,9 +114,16 @@ static int bcma_chipco_watchdog_ticks_per_ms(struct bcma_drv_cc *cc) int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) { + struct bcma_bus *bus = cc->core->bus; struct bcm47xx_wdt wdt = {}; struct platform_device *pdev; + if (bus->chipinfo.id == BCMA_CHIP_ID_BCM53573 && + bus->chipinfo.rev <= 1) { + pr_debug("No watchdog on 53573A0 / 53573A1\n"); + return 0; + } + wdt.driver_data = cc; wdt.timer_set = bcma_chipco_watchdog_timer_set_wdt; wdt.timer_set_ms = bcma_chipco_watchdog_timer_set_ms_wdt; @@ -105,7 +131,7 @@ int bcma_chipco_watchdog_register(struct bcma_drv_cc *cc) bcma_chipco_watchdog_get_max_timer(cc) / cc->ticks_per_ms; pdev = platform_device_register_data(NULL, "bcm47xx-wdt", - cc->core->bus->num, &wdt, + bus->num, &wdt, sizeof(wdt)); if (IS_ERR(pdev)) return PTR_ERR(pdev); @@ -217,7 +243,7 @@ u32 bcma_chipco_watchdog_timer_set(struct bcma_drv_cc *cc, u32 ticks) u32 maxt; maxt = bcma_chipco_watchdog_get_max_timer(cc); - if (cc->capabilities & BCMA_CC_CAP_PMU) { + if (bcma_core_cc_has_pmu_watchdog(cc)) { if (ticks == 1) ticks = 2; else if (ticks > maxt) diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c index 1f635471f318..2c1798e38abd 100644 --- a/drivers/bcma/main.c +++ b/drivers/bcma/main.c @@ -209,6 +209,8 @@ static void bcma_of_fill_device(struct platform_device *parent, core->dev.of_node = node; core->irq = bcma_of_get_irq(parent, core, 0); + + of_dma_configure(&core->dev, node); } unsigned int bcma_core_irq(struct bcma_device *core, int num) @@ -248,12 +250,12 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core) core->irq = bus->host_pci->irq; break; case BCMA_HOSTTYPE_SOC: - core->dev.dma_mask = &core->dev.coherent_dma_mask; - if (bus->host_pdev) { + if (IS_ENABLED(CONFIG_OF) && bus->host_pdev) { core->dma_dev = &bus->host_pdev->dev; core->dev.parent = &bus->host_pdev->dev; bcma_of_fill_device(bus->host_pdev, core); } else { + core->dev.dma_mask = &core->dev.coherent_dma_mask; core->dma_dev = &core->dev; } break; diff --git a/drivers/bluetooth/Kconfig b/drivers/bluetooth/Kconfig index cf50fd2e96df..3cc9bff9d99d 100644 --- a/drivers/bluetooth/Kconfig +++ b/drivers/bluetooth/Kconfig @@ -180,6 +180,17 @@ config BT_HCIUART_AG6XX Say Y here to compile support for Intel AG6XX protocol. +config BT_HCIUART_MRVL + bool "Marvell protocol support" + depends on BT_HCIUART + select BT_HCIUART_H4 + help + Marvell is serial protocol for communication between Bluetooth + device and host. This protocol is required for most Marvell Bluetooth + devices with UART interface. + + Say Y here to compile support for HCI MRVL protocol. + config BT_HCIBCM203X tristate "HCI BCM203x USB driver" depends on USB @@ -331,4 +342,16 @@ config BT_WILINK Say Y here to compile support for Texas Instrument's WiLink7 driver into the kernel or say M to compile it as module (btwilink). +config BT_QCOMSMD + tristate "Qualcomm SMD based HCI support" + depends on QCOM_SMD && QCOM_WCNSS_CTRL + select BT_QCA + help + Qualcomm SMD based HCI driver. + This driver is used to bridge HCI data onto the shared memory + channels to the WCNSS core. + + Say Y here to compile support for HCI over Qualcomm SMD into the + kernel or say M to compile as a module. + endmenu diff --git a/drivers/bluetooth/Makefile b/drivers/bluetooth/Makefile index 9c18939fc5c9..b1fc29a697b7 100644 --- a/drivers/bluetooth/Makefile +++ b/drivers/bluetooth/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_BT_ATH3K) += ath3k.o obj-$(CONFIG_BT_MRVL) += btmrvl.o obj-$(CONFIG_BT_MRVL_SDIO) += btmrvl_sdio.o obj-$(CONFIG_BT_WILINK) += btwilink.o +obj-$(CONFIG_BT_QCOMSMD) += btqcomsmd.o obj-$(CONFIG_BT_BCM) += btbcm.o obj-$(CONFIG_BT_RTL) += btrtl.o obj-$(CONFIG_BT_QCA) += btqca.o @@ -37,6 +38,7 @@ hci_uart-$(CONFIG_BT_HCIUART_INTEL) += hci_intel.o hci_uart-$(CONFIG_BT_HCIUART_BCM) += hci_bcm.o hci_uart-$(CONFIG_BT_HCIUART_QCA) += hci_qca.o hci_uart-$(CONFIG_BT_HCIUART_AG6XX) += hci_ag6xx.o +hci_uart-$(CONFIG_BT_HCIUART_MRVL) += hci_mrvl.o hci_uart-objs := $(hci_uart-y) ccflags-y += -D__CHECK_ENDIAN__ diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c index 4a6208168850..28afd5d585f9 100644 --- a/drivers/bluetooth/btqca.c +++ b/drivers/bluetooth/btqca.c @@ -55,8 +55,8 @@ static int rome_patch_ver_req(struct hci_dev *hdev, u32 *rome_version) } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } @@ -224,8 +224,8 @@ static int rome_tlv_send_segment(struct hci_dev *hdev, int idx, int seg_size, } edl = (struct edl_event_hdr *)(skb->data); - if (!edl || !edl->data) { - BT_ERR("%s: TLV with no header or no data", hdev->name); + if (!edl) { + BT_ERR("%s: TLV with no header", hdev->name); err = -EILSEQ; goto out; } diff --git a/drivers/bluetooth/btqcomsmd.c b/drivers/bluetooth/btqcomsmd.c new file mode 100644 index 000000000000..08c2c93887c1 --- /dev/null +++ b/drivers/bluetooth/btqcomsmd.c @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2016, Linaro Ltd. + * Copyright (c) 2015, Sony Mobile Communications Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "btqca.h" + +struct btqcomsmd { + struct hci_dev *hdev; + + struct qcom_smd_channel *acl_channel; + struct qcom_smd_channel *cmd_channel; +}; + +static int btqcomsmd_recv(struct hci_dev *hdev, unsigned int type, + const void *data, size_t count) +{ + struct sk_buff *skb; + + /* Use GFP_ATOMIC as we're in IRQ context */ + skb = bt_skb_alloc(count, GFP_ATOMIC); + if (!skb) { + hdev->stat.err_rx++; + return -ENOMEM; + } + + hci_skb_pkt_type(skb) = type; + memcpy(skb_put(skb, count), data, count); + + return hci_recv_frame(hdev, skb); +} + +static int btqcomsmd_acl_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + btq->hdev->stat.byte_rx += count; + return btqcomsmd_recv(btq->hdev, HCI_ACLDATA_PKT, data, count); +} + +static int btqcomsmd_cmd_callback(struct qcom_smd_channel *channel, + const void *data, size_t count) +{ + struct btqcomsmd *btq = qcom_smd_get_drvdata(channel); + + return btqcomsmd_recv(btq->hdev, HCI_EVENT_PKT, data, count); +} + +static int btqcomsmd_send(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct btqcomsmd *btq = hci_get_drvdata(hdev); + int ret; + + switch (hci_skb_pkt_type(skb)) { + case HCI_ACLDATA_PKT: + ret = qcom_smd_send(btq->acl_channel, skb->data, skb->len); + hdev->stat.acl_tx++; + hdev->stat.byte_tx += skb->len; + break; + case HCI_COMMAND_PKT: + ret = qcom_smd_send(btq->cmd_channel, skb->data, skb->len); + hdev->stat.cmd_tx++; + break; + default: + ret = -EILSEQ; + break; + } + + kfree_skb(skb); + + return ret; +} + +static int btqcomsmd_open(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_close(struct hci_dev *hdev) +{ + return 0; +} + +static int btqcomsmd_probe(struct platform_device *pdev) +{ + struct btqcomsmd *btq; + struct hci_dev *hdev; + void *wcnss; + int ret; + + btq = devm_kzalloc(&pdev->dev, sizeof(*btq), GFP_KERNEL); + if (!btq) + return -ENOMEM; + + wcnss = dev_get_drvdata(pdev->dev.parent); + + btq->acl_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_ACL", + btqcomsmd_acl_callback); + if (IS_ERR(btq->acl_channel)) + return PTR_ERR(btq->acl_channel); + + btq->cmd_channel = qcom_wcnss_open_channel(wcnss, "APPS_RIVA_BT_CMD", + btqcomsmd_cmd_callback); + if (IS_ERR(btq->cmd_channel)) + return PTR_ERR(btq->cmd_channel); + + qcom_smd_set_drvdata(btq->acl_channel, btq); + qcom_smd_set_drvdata(btq->cmd_channel, btq); + + hdev = hci_alloc_dev(); + if (!hdev) + return -ENOMEM; + + hci_set_drvdata(hdev, btq); + btq->hdev = hdev; + SET_HCIDEV_DEV(hdev, &pdev->dev); + + hdev->bus = HCI_SMD; + hdev->open = btqcomsmd_open; + hdev->close = btqcomsmd_close; + hdev->send = btqcomsmd_send; + hdev->set_bdaddr = qca_set_bdaddr_rome; + + ret = hci_register_dev(hdev); + if (ret < 0) { + hci_free_dev(hdev); + return ret; + } + + platform_set_drvdata(pdev, btq); + + return 0; +} + +static int btqcomsmd_remove(struct platform_device *pdev) +{ + struct btqcomsmd *btq = platform_get_drvdata(pdev); + + hci_unregister_dev(btq->hdev); + hci_free_dev(btq->hdev); + + return 0; +} + +static const struct of_device_id btqcomsmd_of_match[] = { + { .compatible = "qcom,wcnss-bt", }, + { }, +}; + +static struct platform_driver btqcomsmd_driver = { + .probe = btqcomsmd_probe, + .remove = btqcomsmd_remove, + .driver = { + .name = "btqcomsmd", + .of_match_table = btqcomsmd_of_match, + }, +}; + +module_platform_driver(btqcomsmd_driver); + +MODULE_AUTHOR("Bjorn Andersson "); +MODULE_DESCRIPTION("Qualcomm SMD HCI driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c index 84288938f7f2..fc9b25703c67 100644 --- a/drivers/bluetooth/btrtl.c +++ b/drivers/bluetooth/btrtl.c @@ -33,6 +33,7 @@ #define RTL_ROM_LMP_8723B 0x8723 #define RTL_ROM_LMP_8821A 0x8821 #define RTL_ROM_LMP_8761A 0x8761 +#define RTL_ROM_LMP_8822B 0x8822 static int rtl_read_rom_version(struct hci_dev *hdev, u8 *version) { @@ -78,11 +79,15 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, const unsigned char *patch_length_base, *patch_offset_base; u32 patch_offset = 0; u16 patch_length, num_patches; - const u16 project_id_to_lmp_subver[] = { - RTL_ROM_LMP_8723A, - RTL_ROM_LMP_8723B, - RTL_ROM_LMP_8821A, - RTL_ROM_LMP_8761A + static const struct { + __u16 lmp_subver; + __u8 id; + } project_id_to_lmp_subver[] = { + { RTL_ROM_LMP_8723A, 0 }, + { RTL_ROM_LMP_8723B, 1 }, + { RTL_ROM_LMP_8821A, 2 }, + { RTL_ROM_LMP_8761A, 3 }, + { RTL_ROM_LMP_8822B, 8 }, }; ret = rtl_read_rom_version(hdev, &rom_version); @@ -134,14 +139,20 @@ static int rtl8723b_parse_firmware(struct hci_dev *hdev, u16 lmp_subver, return -EINVAL; } - if (project_id >= ARRAY_SIZE(project_id_to_lmp_subver)) { + /* Find project_id in table */ + for (i = 0; i < ARRAY_SIZE(project_id_to_lmp_subver); i++) { + if (project_id == project_id_to_lmp_subver[i].id) + break; + } + + if (i >= ARRAY_SIZE(project_id_to_lmp_subver)) { BT_ERR("%s: unknown project id %d", hdev->name, project_id); return -EINVAL; } - if (lmp_subver != project_id_to_lmp_subver[project_id]) { + if (lmp_subver != project_id_to_lmp_subver[i].lmp_subver) { BT_ERR("%s: firmware is for %x but this is a %x", hdev->name, - project_id_to_lmp_subver[project_id], lmp_subver); + project_id_to_lmp_subver[i].lmp_subver, lmp_subver); return -EINVAL; } @@ -257,6 +268,26 @@ out: return ret; } +static int rtl_load_config(struct hci_dev *hdev, const char *name, u8 **buff) +{ + const struct firmware *fw; + int ret; + + BT_INFO("%s: rtl: loading %s", hdev->name, name); + ret = request_firmware(&fw, name, &hdev->dev); + if (ret < 0) { + BT_ERR("%s: Failed to load %s", hdev->name, name); + return ret; + } + + ret = fw->size; + *buff = kmemdup(fw->data, ret, GFP_KERNEL); + + release_firmware(fw); + + return ret; +} + static int btrtl_setup_rtl8723a(struct hci_dev *hdev) { const struct firmware *fw; @@ -296,25 +327,74 @@ static int btrtl_setup_rtl8723b(struct hci_dev *hdev, u16 lmp_subver, unsigned char *fw_data = NULL; const struct firmware *fw; int ret; + int cfg_sz; + u8 *cfg_buff = NULL; + u8 *tbuff; + char *cfg_name = NULL; + + switch (lmp_subver) { + case RTL_ROM_LMP_8723B: + cfg_name = "rtl_bt/rtl8723b_config.bin"; + break; + case RTL_ROM_LMP_8821A: + cfg_name = "rtl_bt/rtl8821a_config.bin"; + break; + case RTL_ROM_LMP_8761A: + cfg_name = "rtl_bt/rtl8761a_config.bin"; + break; + case RTL_ROM_LMP_8822B: + cfg_name = "rtl_bt/rtl8822b_config.bin"; + break; + default: + BT_ERR("%s: rtl: no config according to lmp_subver %04x", + hdev->name, lmp_subver); + break; + } + + if (cfg_name) { + cfg_sz = rtl_load_config(hdev, cfg_name, &cfg_buff); + if (cfg_sz < 0) + cfg_sz = 0; + } else + cfg_sz = 0; BT_INFO("%s: rtl: loading %s", hdev->name, fw_name); ret = request_firmware(&fw, fw_name, &hdev->dev); if (ret < 0) { BT_ERR("%s: Failed to load %s", hdev->name, fw_name); - return ret; + goto err_req_fw; } ret = rtl8723b_parse_firmware(hdev, lmp_subver, fw, &fw_data); if (ret < 0) goto out; + if (cfg_sz) { + tbuff = kzalloc(ret + cfg_sz, GFP_KERNEL); + if (!tbuff) { + ret = -ENOMEM; + goto out; + } + + memcpy(tbuff, fw_data, ret); + kfree(fw_data); + + memcpy(tbuff + ret, cfg_buff, cfg_sz); + ret += cfg_sz; + + fw_data = tbuff; + } + + BT_INFO("cfg_sz %d, total size %d", cfg_sz, ret); + ret = rtl_download_firmware(hdev, fw_data, ret); - kfree(fw_data); - if (ret < 0) - goto out; out: release_firmware(fw); + kfree(fw_data); +err_req_fw: + if (cfg_sz) + kfree(cfg_buff); return ret; } @@ -377,6 +457,9 @@ int btrtl_setup_realtek(struct hci_dev *hdev) case RTL_ROM_LMP_8761A: return btrtl_setup_rtl8723b(hdev, lmp_subver, "rtl_bt/rtl8761a_fw.bin"); + case RTL_ROM_LMP_8822B: + return btrtl_setup_rtl8723b(hdev, lmp_subver, + "rtl_bt/rtl8822b_fw.bin"); default: BT_INFO("rtl: assuming no firmware upload needed."); return 0; diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c index 811f9b97e360..6bd63b84abd0 100644 --- a/drivers/bluetooth/btusb.c +++ b/drivers/bluetooth/btusb.c @@ -62,6 +62,7 @@ static struct usb_driver btusb_driver; #define BTUSB_REALTEK 0x20000 #define BTUSB_BCM2045 0x40000 #define BTUSB_IFNUM_2 0x80000 +#define BTUSB_CW6622 0x100000 static const struct usb_device_id btusb_table[] = { /* Generic Bluetooth USB device */ @@ -248,9 +249,11 @@ static const struct usb_device_id blacklist_table[] = { /* QCA ROME chipset */ { USB_DEVICE(0x0cf3, 0xe007), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x0cf3, 0xe009), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe300), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0cf3, 0xe360), .driver_info = BTUSB_QCA_ROME }, { USB_DEVICE(0x0489, 0xe092), .driver_info = BTUSB_QCA_ROME }, + { USB_DEVICE(0x04ca, 0x3011), .driver_info = BTUSB_QCA_ROME }, /* Broadcom BCM2035 */ { USB_DEVICE(0x0a5c, 0x2009), .driver_info = BTUSB_BCM92035 }, @@ -290,7 +293,8 @@ static const struct usb_device_id blacklist_table[] = { { USB_DEVICE(0x0400, 0x080a), .driver_info = BTUSB_BROKEN_ISOC }, /* CONWISE Technology based adapters with buggy SCO support */ - { USB_DEVICE(0x0e5e, 0x6622), .driver_info = BTUSB_BROKEN_ISOC }, + { USB_DEVICE(0x0e5e, 0x6622), + .driver_info = BTUSB_BROKEN_ISOC | BTUSB_CW6622}, /* Roper Class 1 Bluetooth Dongle (Silicon Wave based) */ { USB_DEVICE(0x1310, 0x0001), .driver_info = BTUSB_SWAVE }, @@ -2221,9 +2225,8 @@ static int btusb_setup_intel_new(struct hci_dev *hdev) err = wait_on_bit_timeout(&data->flags, BTUSB_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Firmware loading interrupted", hdev->name); - err = -EINTR; goto done; } @@ -2275,7 +2278,7 @@ done: TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { BT_ERR("%s: Device boot interrupted", hdev->name); return -EINTR; } @@ -2845,6 +2848,9 @@ static int btusb_probe(struct usb_interface *intf, hdev->send = btusb_send_frame; hdev->notify = btusb_notify; + if (id->driver_info & BTUSB_CW6622) + set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); + if (id->driver_info & BTUSB_BCM2045) set_bit(HCI_QUIRK_BROKEN_STORED_LINK_KEY, &hdev->quirks); diff --git a/drivers/bluetooth/btwilink.c b/drivers/bluetooth/btwilink.c index 485281b3f167..ef51c9c864c5 100644 --- a/drivers/bluetooth/btwilink.c +++ b/drivers/bluetooth/btwilink.c @@ -245,6 +245,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) { struct ti_st *hst; long len; + int pkt_type; hst = hci_get_drvdata(hdev); @@ -258,6 +259,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) * Freeing skb memory is taken care in shared transport layer, * so don't free skb memory here. */ + pkt_type = hci_skb_pkt_type(skb); len = hst->st_write(skb); if (len < 0) { kfree_skb(skb); @@ -268,7 +270,7 @@ static int ti_st_send_frame(struct hci_dev *hdev, struct sk_buff *skb) /* ST accepted our skb. So, Go ahead and do rest */ hdev->stat.byte_tx += len; - ti_st_tx_complete(hst, hci_skb_pkt_type(skb)); + ti_st_tx_complete(hst, pkt_type); return 0; } diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c index 1c97eda8bae3..5ccb90ef0146 100644 --- a/drivers/bluetooth/hci_bcm.c +++ b/drivers/bluetooth/hci_bcm.c @@ -798,7 +798,7 @@ static int bcm_remove(struct platform_device *pdev) static const struct hci_uart_proto bcm_proto = { .id = HCI_UART_BCM, - .name = "BCM", + .name = "Broadcom", .manufacturer = 15, .init_speed = 115200, .oper_speed = 4000000, diff --git a/drivers/bluetooth/hci_bcsp.c b/drivers/bluetooth/hci_bcsp.c index d7d23ceba4d1..a2c921faaa12 100644 --- a/drivers/bluetooth/hci_bcsp.c +++ b/drivers/bluetooth/hci_bcsp.c @@ -90,7 +90,8 @@ struct bcsp_struct { /* ---- BCSP CRC calculation ---- */ /* Table for calculating CRC for polynomial 0x1021, LSB processed first, -initial value 0xffff, bits shifted in reverse order. */ + * initial value 0xffff, bits shifted in reverse order. + */ static const u16 crc_table[] = { 0x0000, 0x1081, 0x2102, 0x3183, @@ -174,7 +175,7 @@ static int bcsp_enqueue(struct hci_uart *hu, struct sk_buff *skb) } static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, - int len, int pkt_type) + int len, int pkt_type) { struct sk_buff *nskb; u8 hdr[4], chan; @@ -213,6 +214,7 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Vendor specific commands */ if (hci_opcode_ogf(__le16_to_cpu(opcode)) == 0x3f) { u8 desc = *(data + HCI_COMMAND_HDR_SIZE); + if ((desc & 0xf0) == 0xc0) { data += HCI_COMMAND_HDR_SIZE + 1; len -= HCI_COMMAND_HDR_SIZE + 1; @@ -271,8 +273,8 @@ static struct sk_buff *bcsp_prepare_pkt(struct bcsp_struct *bcsp, u8 *data, /* Put CRC */ if (bcsp->use_crc) { bcsp_txmsg_crc = bitrev16(bcsp_txmsg_crc); - bcsp_slip_one_byte(nskb, (u8) ((bcsp_txmsg_crc >> 8) & 0x00ff)); - bcsp_slip_one_byte(nskb, (u8) (bcsp_txmsg_crc & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)((bcsp_txmsg_crc >> 8) & 0x00ff)); + bcsp_slip_one_byte(nskb, (u8)(bcsp_txmsg_crc & 0x00ff)); } bcsp_slip_msgdelim(nskb); @@ -287,7 +289,8 @@ static struct sk_buff *bcsp_dequeue(struct hci_uart *hu) struct sk_buff *skb; /* First of all, check for unreliable messages in the queue, - since they have priority */ + * since they have priority + */ skb = skb_dequeue(&bcsp->unrel); if (skb != NULL) { @@ -414,7 +417,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) /* spot "conf" pkts and reply with a "conf rsp" pkt */ if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], conf_pkt, 4)) { struct sk_buff *nskb = alloc_skb(4, GFP_ATOMIC); BT_DBG("Found a LE conf pkt"); @@ -428,7 +431,7 @@ static void bcsp_handle_le_pkt(struct hci_uart *hu) } /* Spot "sync" pkts. If we find one...disaster! */ else if (bcsp->rx_skb->data[1] >> 4 == 4 && bcsp->rx_skb->data[2] == 0 && - !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { + !memcmp(&bcsp->rx_skb->data[4], sync_pkt, 4)) { BT_ERR("Found a LE sync pkt, card has reset"); } } @@ -446,7 +449,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char default: memcpy(skb_put(bcsp->rx_skb, 1), &byte, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, byte); bcsp->rx_count--; } @@ -457,7 +460,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdc: memcpy(skb_put(bcsp->rx_skb, 1), &c0, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xc0); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -466,7 +469,7 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char case 0xdd: memcpy(skb_put(bcsp->rx_skb, 1), &db, 1); if ((bcsp->rx_skb->data[0] & 0x40) != 0 && - bcsp->rx_state != BCSP_W4_CRC) + bcsp->rx_state != BCSP_W4_CRC) bcsp_crc_update(&bcsp->message_crc, 0xdb); bcsp->rx_esc_state = BCSP_ESCSTATE_NOESC; bcsp->rx_count--; @@ -485,13 +488,28 @@ static inline void bcsp_unslip_one_byte(struct bcsp_struct *bcsp, unsigned char static void bcsp_complete_rx_pkt(struct hci_uart *hu) { struct bcsp_struct *bcsp = hu->priv; - int pass_up; + int pass_up = 0; if (bcsp->rx_skb->data[0] & 0x80) { /* reliable pkt */ BT_DBG("Received seqno %u from card", bcsp->rxseq_txack); - bcsp->rxseq_txack++; - bcsp->rxseq_txack %= 0x8; - bcsp->txack_req = 1; + + /* check the rx sequence number is as expected */ + if ((bcsp->rx_skb->data[0] & 0x07) == bcsp->rxseq_txack) { + bcsp->rxseq_txack++; + bcsp->rxseq_txack %= 0x8; + } else { + /* handle re-transmitted packet or + * when packet was missed + */ + BT_ERR("Out-of-order packet arrived, got %u expected %u", + bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); + + /* do not process out-of-order packet payload */ + pass_up = 2; + } + + /* send current txack value to all received reliable packets */ + bcsp->txack_req = 1; /* If needed, transmit an ack pkt */ hci_uart_tx_wakeup(hu); @@ -500,26 +518,33 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) bcsp->rxack = (bcsp->rx_skb->data[0] >> 3) & 0x07; BT_DBG("Request for pkt %u from card", bcsp->rxack); + /* handle received ACK indications, + * including those from out-of-order packets + */ bcsp_pkt_cull(bcsp); - if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && - bcsp->rx_skb->data[0] & 0x80) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { - hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; - pass_up = 1; - } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && - !(bcsp->rx_skb->data[0] & 0x80)) { - bcsp_handle_le_pkt(hu); - pass_up = 0; - } else - pass_up = 0; - if (!pass_up) { + if (pass_up != 2) { + if ((bcsp->rx_skb->data[1] & 0x0f) == 6 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_ACLDATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 5 && + (bcsp->rx_skb->data[0] & 0x80)) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_EVENT_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 7) { + hci_skb_pkt_type(bcsp->rx_skb) = HCI_SCODATA_PKT; + pass_up = 1; + } else if ((bcsp->rx_skb->data[1] & 0x0f) == 1 && + !(bcsp->rx_skb->data[0] & 0x80)) { + bcsp_handle_le_pkt(hu); + pass_up = 0; + } else { + pass_up = 0; + } + } + + if (pass_up == 0) { struct hci_event_hdr hdr; u8 desc = (bcsp->rx_skb->data[1] & 0x0f); @@ -537,18 +562,23 @@ static void bcsp_complete_rx_pkt(struct hci_uart *hu) hci_recv_frame(hu->hdev, bcsp->rx_skb); } else { BT_ERR("Packet for unknown channel (%u %s)", - bcsp->rx_skb->data[1] & 0x0f, - bcsp->rx_skb->data[0] & 0x80 ? - "reliable" : "unreliable"); + bcsp->rx_skb->data[1] & 0x0f, + bcsp->rx_skb->data[0] & 0x80 ? + "reliable" : "unreliable"); kfree_skb(bcsp->rx_skb); } } else kfree_skb(bcsp->rx_skb); - } else { + } else if (pass_up == 1) { /* Pull out BCSP hdr */ skb_pull(bcsp->rx_skb, 4); hci_recv_frame(hu->hdev, bcsp->rx_skb); + } else { + /* ignore packet payload of already ACKed re-transmitted + * packets or when a packet was missed in the BCSP window + */ + kfree_skb(bcsp->rx_skb); } bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -567,7 +597,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) const unsigned char *ptr; BT_DBG("hu %p count %d rx_state %d rx_count %ld", - hu, count, bcsp->rx_state, bcsp->rx_count); + hu, count, bcsp->rx_state, bcsp->rx_count); ptr = data; while (count) { @@ -586,24 +616,14 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) switch (bcsp->rx_state) { case BCSP_W4_BCSP_HDR: - if ((0xff & (u8) ~ (bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + - bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { + if ((0xff & (u8)~(bcsp->rx_skb->data[0] + bcsp->rx_skb->data[1] + + bcsp->rx_skb->data[2])) != bcsp->rx_skb->data[3]) { BT_ERR("Error in BCSP hdr checksum"); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; bcsp->rx_count = 0; continue; } - if (bcsp->rx_skb->data[0] & 0x80 /* reliable pkt */ - && (bcsp->rx_skb->data[0] & 0x07) != bcsp->rxseq_txack) { - BT_ERR("Out-of-order packet arrived, got %u expected %u", - bcsp->rx_skb->data[0] & 0x07, bcsp->rxseq_txack); - - kfree_skb(bcsp->rx_skb); - bcsp->rx_state = BCSP_W4_PKT_DELIMITER; - bcsp->rx_count = 0; - continue; - } bcsp->rx_state = BCSP_W4_DATA; bcsp->rx_count = (bcsp->rx_skb->data[1] >> 4) + (bcsp->rx_skb->data[2] << 4); /* May be 0 */ @@ -620,8 +640,8 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) case BCSP_W4_CRC: if (bitrev16(bcsp->message_crc) != bscp_get_crc(bcsp)) { BT_ERR("Checksum failed: computed %04x received %04x", - bitrev16(bcsp->message_crc), - bscp_get_crc(bcsp)); + bitrev16(bcsp->message_crc), + bscp_get_crc(bcsp)); kfree_skb(bcsp->rx_skb); bcsp->rx_state = BCSP_W4_PKT_DELIMITER; @@ -679,7 +699,7 @@ static int bcsp_recv(struct hci_uart *hu, const void *data, int count) /* Arrange to retransmit all messages in the relq. */ static void bcsp_timed_event(unsigned long arg) { - struct hci_uart *hu = (struct hci_uart *) arg; + struct hci_uart *hu = (struct hci_uart *)arg; struct bcsp_struct *bcsp = hu->priv; struct sk_buff *skb; unsigned long flags; @@ -715,7 +735,7 @@ static int bcsp_open(struct hci_uart *hu) init_timer(&bcsp->tbcsp); bcsp->tbcsp.function = bcsp_timed_event; - bcsp->tbcsp.data = (u_long) hu; + bcsp->tbcsp.data = (u_long)hu; bcsp->rx_state = BCSP_W4_PKT_DELIMITER; diff --git a/drivers/bluetooth/hci_intel.c b/drivers/bluetooth/hci_intel.c index ed0a4201b551..9e271286c5e5 100644 --- a/drivers/bluetooth/hci_intel.c +++ b/drivers/bluetooth/hci_intel.c @@ -128,7 +128,7 @@ static int intel_wait_booting(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "Device boot interrupted"); return -EINTR; } @@ -151,7 +151,7 @@ static int intel_wait_lpm_transaction(struct hci_uart *hu) TASK_INTERRUPTIBLE, msecs_to_jiffies(1000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hu->hdev, "LPM transaction interrupted"); return -EINTR; } @@ -813,7 +813,7 @@ static int intel_setup(struct hci_uart *hu) err = wait_on_bit_timeout(&intel->flags, STATE_DOWNLOADING, TASK_INTERRUPTIBLE, msecs_to_jiffies(5000)); - if (err == 1) { + if (err == -EINTR) { bt_dev_err(hdev, "Firmware loading interrupted"); err = -EINTR; goto done; diff --git a/drivers/bluetooth/hci_ldisc.c b/drivers/bluetooth/hci_ldisc.c index dda97398c59a..9497c469efd2 100644 --- a/drivers/bluetooth/hci_ldisc.c +++ b/drivers/bluetooth/hci_ldisc.c @@ -697,34 +697,36 @@ static int hci_uart_tty_ioctl(struct tty_struct *tty, struct file *file, case HCIUARTSETPROTO: if (!test_and_set_bit(HCI_UART_PROTO_SET, &hu->flags)) { err = hci_uart_set_proto(hu, arg); - if (err) { + if (err) clear_bit(HCI_UART_PROTO_SET, &hu->flags); - return err; - } } else - return -EBUSY; + err = -EBUSY; break; case HCIUARTGETPROTO: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return hu->proto->id; - return -EUNATCH; + err = hu->proto->id; + else + err = -EUNATCH; + break; case HCIUARTGETDEVICE: if (test_bit(HCI_UART_REGISTERED, &hu->flags)) - return hu->hdev->id; - return -EUNATCH; + err = hu->hdev->id; + else + err = -EUNATCH; + break; case HCIUARTSETFLAGS: if (test_bit(HCI_UART_PROTO_SET, &hu->flags)) - return -EBUSY; - err = hci_uart_set_flags(hu, arg); - if (err) - return err; + err = -EBUSY; + else + err = hci_uart_set_flags(hu, arg); break; case HCIUARTGETFLAGS: - return hu->hdev_flags; + err = hu->hdev_flags; + break; default: err = n_tty_ioctl_helper(tty, file, cmd, arg); @@ -810,6 +812,9 @@ static int __init hci_uart_init(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_init(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_init(); +#endif return 0; } @@ -845,6 +850,9 @@ static void __exit hci_uart_exit(void) #ifdef CONFIG_BT_HCIUART_AG6XX ag6xx_deinit(); #endif +#ifdef CONFIG_BT_HCIUART_MRVL + mrvl_deinit(); +#endif /* Release tty registration of line discipline */ err = tty_unregister_ldisc(N_HCI); diff --git a/drivers/bluetooth/hci_mrvl.c b/drivers/bluetooth/hci_mrvl.c new file mode 100644 index 000000000000..bbc4b39b1dbf --- /dev/null +++ b/drivers/bluetooth/hci_mrvl.c @@ -0,0 +1,387 @@ +/* + * + * Bluetooth HCI UART driver for marvell devices + * + * Copyright (C) 2016 Marvell International Ltd. + * Copyright (C) 2016 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "hci_uart.h" + +#define HCI_FW_REQ_PKT 0xA5 +#define HCI_CHIP_VER_PKT 0xAA + +#define MRVL_ACK 0x5A +#define MRVL_NAK 0xBF +#define MRVL_RAW_DATA 0x1F + +enum { + STATE_CHIP_VER_PENDING, + STATE_FW_REQ_PENDING, +}; + +struct mrvl_data { + struct sk_buff *rx_skb; + struct sk_buff_head txq; + struct sk_buff_head rawq; + unsigned long flags; + unsigned int tx_len; + u8 id, rev; +}; + +struct hci_mrvl_pkt { + __le16 lhs; + __le16 rhs; +} __packed; +#define HCI_MRVL_PKT_SIZE 4 + +static int mrvl_open(struct hci_uart *hu) +{ + struct mrvl_data *mrvl; + + BT_DBG("hu %p", hu); + + mrvl = kzalloc(sizeof(*mrvl), GFP_KERNEL); + if (!mrvl) + return -ENOMEM; + + skb_queue_head_init(&mrvl->txq); + skb_queue_head_init(&mrvl->rawq); + + set_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + + hu->priv = mrvl; + return 0; +} + +static int mrvl_close(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + kfree_skb(mrvl->rx_skb); + kfree(mrvl); + + hu->priv = NULL; + return 0; +} + +static int mrvl_flush(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + + BT_DBG("hu %p", hu); + + skb_queue_purge(&mrvl->txq); + skb_queue_purge(&mrvl->rawq); + + return 0; +} + +static struct sk_buff *mrvl_dequeue(struct hci_uart *hu) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + skb = skb_dequeue(&mrvl->txq); + if (!skb) { + /* Any raw data ? */ + skb = skb_dequeue(&mrvl->rawq); + } else { + /* Prepend skb with frame type */ + memcpy(skb_push(skb, 1), &bt_cb(skb)->pkt_type, 1); + } + + return skb; +} + +static int mrvl_enqueue(struct hci_uart *hu, struct sk_buff *skb) +{ + struct mrvl_data *mrvl = hu->priv; + + skb_queue_tail(&mrvl->txq, skb); + return 0; +} + +static void mrvl_send_ack(struct hci_uart *hu, unsigned char type) +{ + struct mrvl_data *mrvl = hu->priv; + struct sk_buff *skb; + + /* No H4 payload, only 1 byte header */ + skb = bt_skb_alloc(0, GFP_ATOMIC); + if (!skb) { + bt_dev_err(hu->hdev, "Unable to alloc ack/nak packet"); + return; + } + hci_skb_pkt_type(skb) = type; + + skb_queue_tail(&mrvl->txq, skb); + hci_uart_tx_wakeup(hu); +} + +static int mrvl_recv_fw_req(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_FW_REQ_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected firmware request"); + ret = -EINVAL; + goto done; + } + + mrvl->tx_len = le16_to_cpu(pkt->lhs); + + clear_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_FW_REQ_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +static int mrvl_recv_chip_ver(struct hci_dev *hdev, struct sk_buff *skb) +{ + struct hci_mrvl_pkt *pkt = (void *)skb->data; + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + u16 version = le16_to_cpu(pkt->lhs); + int ret = 0; + + if ((pkt->lhs ^ pkt->rhs) != 0xffff) { + bt_dev_err(hdev, "Corrupted mrvl header"); + mrvl_send_ack(hu, MRVL_NAK); + ret = -EINVAL; + goto done; + } + mrvl_send_ack(hu, MRVL_ACK); + + if (!test_bit(STATE_CHIP_VER_PENDING, &mrvl->flags)) { + bt_dev_err(hdev, "Received unexpected chip version"); + goto done; + } + + mrvl->id = version; + mrvl->rev = version >> 8; + + bt_dev_info(hdev, "Controller id = %x, rev = %x", mrvl->id, mrvl->rev); + + clear_bit(STATE_CHIP_VER_PENDING, &mrvl->flags); + smp_mb__after_atomic(); + wake_up_bit(&mrvl->flags, STATE_CHIP_VER_PENDING); + +done: + kfree_skb(skb); + return ret; +} + +#define HCI_RECV_CHIP_VER \ + .type = HCI_CHIP_VER_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +#define HCI_RECV_FW_REQ \ + .type = HCI_FW_REQ_PKT, \ + .hlen = HCI_MRVL_PKT_SIZE, \ + .loff = 0, \ + .lsize = 0, \ + .maxlen = HCI_MRVL_PKT_SIZE + +static const struct h4_recv_pkt mrvl_recv_pkts[] = { + { H4_RECV_ACL, .recv = hci_recv_frame }, + { H4_RECV_SCO, .recv = hci_recv_frame }, + { H4_RECV_EVENT, .recv = hci_recv_frame }, + { HCI_RECV_FW_REQ, .recv = mrvl_recv_fw_req }, + { HCI_RECV_CHIP_VER, .recv = mrvl_recv_chip_ver }, +}; + +static int mrvl_recv(struct hci_uart *hu, const void *data, int count) +{ + struct mrvl_data *mrvl = hu->priv; + + if (!test_bit(HCI_UART_REGISTERED, &hu->flags)) + return -EUNATCH; + + mrvl->rx_skb = h4_recv_buf(hu->hdev, mrvl->rx_skb, data, count, + mrvl_recv_pkts, + ARRAY_SIZE(mrvl_recv_pkts)); + if (IS_ERR(mrvl->rx_skb)) { + int err = PTR_ERR(mrvl->rx_skb); + bt_dev_err(hu->hdev, "Frame reassembly failed (%d)", err); + mrvl->rx_skb = NULL; + return err; + } + + return count; +} + +static int mrvl_load_firmware(struct hci_dev *hdev, const char *name) +{ + struct hci_uart *hu = hci_get_drvdata(hdev); + struct mrvl_data *mrvl = hu->priv; + const struct firmware *fw = NULL; + const u8 *fw_ptr, *fw_max; + int err; + + err = request_firmware(&fw, name, &hdev->dev); + if (err < 0) { + bt_dev_err(hdev, "Failed to load firmware file %s", name); + return err; + } + + fw_ptr = fw->data; + fw_max = fw->data + fw->size; + + bt_dev_info(hdev, "Loading %s", name); + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + while (fw_ptr <= fw_max) { + struct sk_buff *skb; + + /* Controller drives the firmware load by sending firmware + * request packets containing the expected fragment size. + */ + err = wait_on_bit_timeout(&mrvl->flags, STATE_FW_REQ_PENDING, + TASK_INTERRUPTIBLE, + msecs_to_jiffies(2000)); + if (err == 1) { + bt_dev_err(hdev, "Firmware load interrupted"); + err = -EINTR; + break; + } else if (err) { + bt_dev_err(hdev, "Firmware request timeout"); + err = -ETIMEDOUT; + break; + } + + bt_dev_dbg(hdev, "Firmware request, expecting %d bytes", + mrvl->tx_len); + + if (fw_ptr == fw_max) { + /* Controller requests a null size once firmware is + * fully loaded. If controller expects more data, there + * is an issue. + */ + if (!mrvl->tx_len) { + bt_dev_info(hdev, "Firmware loading complete"); + } else { + bt_dev_err(hdev, "Firmware loading failure"); + err = -EINVAL; + } + break; + } + + if (fw_ptr + mrvl->tx_len > fw_max) { + mrvl->tx_len = fw_max - fw_ptr; + bt_dev_dbg(hdev, "Adjusting tx_len to %d", + mrvl->tx_len); + } + + skb = bt_skb_alloc(mrvl->tx_len, GFP_KERNEL); + if (!skb) { + bt_dev_err(hdev, "Failed to alloc mem for FW packet"); + err = -ENOMEM; + break; + } + bt_cb(skb)->pkt_type = MRVL_RAW_DATA; + + memcpy(skb_put(skb, mrvl->tx_len), fw_ptr, mrvl->tx_len); + fw_ptr += mrvl->tx_len; + + set_bit(STATE_FW_REQ_PENDING, &mrvl->flags); + + skb_queue_tail(&mrvl->rawq, skb); + hci_uart_tx_wakeup(hu); + } + + release_firmware(fw); + return err; +} + +static int mrvl_setup(struct hci_uart *hu) +{ + int err; + + hci_uart_set_flow_control(hu, true); + + err = mrvl_load_firmware(hu->hdev, "mrvl/helper_uart_3000000.bin"); + if (err) { + bt_dev_err(hu->hdev, "Unable to download firmware helper"); + return -EINVAL; + } + + hci_uart_set_baudrate(hu, 3000000); + hci_uart_set_flow_control(hu, false); + + err = mrvl_load_firmware(hu->hdev, "mrvl/uart8897_bt.bin"); + if (err) + return err; + + return 0; +} + +static const struct hci_uart_proto mrvl_proto = { + .id = HCI_UART_MRVL, + .name = "Marvell", + .init_speed = 115200, + .open = mrvl_open, + .close = mrvl_close, + .flush = mrvl_flush, + .setup = mrvl_setup, + .recv = mrvl_recv, + .enqueue = mrvl_enqueue, + .dequeue = mrvl_dequeue, +}; + +int __init mrvl_init(void) +{ + return hci_uart_register_proto(&mrvl_proto); +} + +int __exit mrvl_deinit(void) +{ + return hci_uart_unregister_proto(&mrvl_proto); +} diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c index 683c2b642057..6c867fbc56a7 100644 --- a/drivers/bluetooth/hci_qca.c +++ b/drivers/bluetooth/hci_qca.c @@ -397,7 +397,7 @@ static int qca_open(struct hci_uart *hu) skb_queue_head_init(&qca->txq); skb_queue_head_init(&qca->tx_wait_q); spin_lock_init(&qca->hci_ibs_lock); - qca->workqueue = create_singlethread_workqueue("qca_wq"); + qca->workqueue = alloc_ordered_workqueue("qca_wq", 0); if (!qca->workqueue) { BT_ERR("QCA Workqueue not initialized properly"); kfree(qca); diff --git a/drivers/bluetooth/hci_uart.h b/drivers/bluetooth/hci_uart.h index 839bad1d8152..070139513e65 100644 --- a/drivers/bluetooth/hci_uart.h +++ b/drivers/bluetooth/hci_uart.h @@ -35,7 +35,7 @@ #define HCIUARTGETFLAGS _IOR('U', 204, int) /* UART protocols */ -#define HCI_UART_MAX_PROTO 10 +#define HCI_UART_MAX_PROTO 12 #define HCI_UART_H4 0 #define HCI_UART_BCSP 1 @@ -47,6 +47,8 @@ #define HCI_UART_BCM 7 #define HCI_UART_QCA 8 #define HCI_UART_AG6XX 9 +#define HCI_UART_NOKIA 10 +#define HCI_UART_MRVL 11 #define HCI_UART_RAW_DEVICE 0 #define HCI_UART_RESET_ON_INIT 1 @@ -189,3 +191,8 @@ int qca_deinit(void); int ag6xx_init(void); int ag6xx_deinit(void); #endif + +#ifdef CONFIG_BT_HCIUART_MRVL +int mrvl_init(void); +int mrvl_deinit(void); +#endif diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 1af94e2d1a25..9b035b7d7f4f 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -550,4 +550,6 @@ config CRYPTO_DEV_ROCKCHIP This driver interfaces with the hardware crypto accelerator. Supporting cbc/ecb chainmode, and aes/des/des3_ede cipher mode. +source "drivers/crypto/chelsio/Kconfig" + endif # CRYPTO_HW diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 3c6432dd09d9..ad7250fa1348 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -31,3 +31,4 @@ obj-$(CONFIG_CRYPTO_DEV_QCE) += qce/ obj-$(CONFIG_CRYPTO_DEV_VMX) += vmx/ obj-$(CONFIG_CRYPTO_DEV_SUN4I_SS) += sunxi-ss/ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/ +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chelsio/ diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig new file mode 100644 index 000000000000..4ce67fb9a880 --- /dev/null +++ b/drivers/crypto/chelsio/Kconfig @@ -0,0 +1,19 @@ +config CRYPTO_DEV_CHELSIO + tristate "Chelsio Crypto Co-processor Driver" + depends on CHELSIO_T4 + select CRYPTO_SHA1 + select CRYPTO_SHA256 + select CRYPTO_SHA512 + ---help--- + The Chelsio Crypto Co-processor driver for T6 adapters. + + For general information about Chelsio and our products, visit + our website at . + + For customer support, please visit our customer support page at + . + + Please send feedback to . + + To compile this driver as a module, choose M here: the module + will be called chcr. diff --git a/drivers/crypto/chelsio/Makefile b/drivers/crypto/chelsio/Makefile new file mode 100644 index 000000000000..bebdf06687ad --- /dev/null +++ b/drivers/crypto/chelsio/Makefile @@ -0,0 +1,4 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + +obj-$(CONFIG_CRYPTO_DEV_CHELSIO) += chcr.o +chcr-objs := chcr_core.o chcr_algo.o diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c new file mode 100644 index 000000000000..e4ddb921d7b3 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_algo.c @@ -0,0 +1,1525 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Written and Maintained by: + * Manoj Malviya (manojmalviya@chelsio.com) + * Atul Gupta (atul.gupta@chelsio.com) + * Jitendra Lulla (jlulla@chelsio.com) + * Yeshaswi M R Gowda (yeshaswi@chelsio.com) + * Harsh Jain (harsh@chelsio.com) + */ + +#define pr_fmt(fmt) "chcr:" fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "t4fw_api.h" +#include "t4_msg.h" +#include "chcr_core.h" +#include "chcr_algo.h" +#include "chcr_crypto.h" + +static inline struct ablk_ctx *ABLK_CTX(struct chcr_context *ctx) +{ + return ctx->crypto_ctx->ablkctx; +} + +static inline struct hmac_ctx *HMAC_CTX(struct chcr_context *ctx) +{ + return ctx->crypto_ctx->hmacctx; +} + +static inline struct uld_ctx *ULD_CTX(struct chcr_context *ctx) +{ + return ctx->dev->u_ctx; +} + +static inline int is_ofld_imm(const struct sk_buff *skb) +{ + return (skb->len <= CRYPTO_MAX_IMM_TX_PKT_LEN); +} + +/* + * sgl_len - calculates the size of an SGL of the given capacity + * @n: the number of SGL entries + * Calculates the number of flits needed for a scatter/gather list that + * can hold the given number of entries. + */ +static inline unsigned int sgl_len(unsigned int n) +{ + n--; + return (3 * n) / 2 + (n & 1) + 2; +} + +/* + * chcr_handle_resp - Unmap the DMA buffers associated with the request + * @req: crypto request + */ +int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, + int error_status) +{ + struct crypto_tfm *tfm = req->tfm; + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct chcr_req_ctx ctx_req; + struct cpl_fw6_pld *fw6_pld; + unsigned int digestsize, updated_digestsize; + + switch (tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_BLKCIPHER: + ctx_req.req.ablk_req = (struct ablkcipher_request *)req; + ctx_req.ctx.ablk_ctx = + ablkcipher_request_ctx(ctx_req.req.ablk_req); + if (!error_status) { + fw6_pld = (struct cpl_fw6_pld *)input; + memcpy(ctx_req.req.ablk_req->info, &fw6_pld->data[2], + AES_BLOCK_SIZE); + } + dma_unmap_sg(&u_ctx->lldi.pdev->dev, ctx_req.req.ablk_req->dst, + ABLK_CTX(ctx)->dst_nents, DMA_FROM_DEVICE); + if (ctx_req.ctx.ablk_ctx->skb) { + kfree_skb(ctx_req.ctx.ablk_ctx->skb); + ctx_req.ctx.ablk_ctx->skb = NULL; + } + break; + + case CRYPTO_ALG_TYPE_AHASH: + ctx_req.req.ahash_req = (struct ahash_request *)req; + ctx_req.ctx.ahash_ctx = + ahash_request_ctx(ctx_req.req.ahash_req); + digestsize = + crypto_ahash_digestsize(crypto_ahash_reqtfm( + ctx_req.req.ahash_req)); + updated_digestsize = digestsize; + if (digestsize == SHA224_DIGEST_SIZE) + updated_digestsize = SHA256_DIGEST_SIZE; + else if (digestsize == SHA384_DIGEST_SIZE) + updated_digestsize = SHA512_DIGEST_SIZE; + if (ctx_req.ctx.ahash_ctx->skb) + ctx_req.ctx.ahash_ctx->skb = NULL; + if (ctx_req.ctx.ahash_ctx->result == 1) { + ctx_req.ctx.ahash_ctx->result = 0; + memcpy(ctx_req.req.ahash_req->result, input + + sizeof(struct cpl_fw6_pld), + digestsize); + } else { + memcpy(ctx_req.ctx.ahash_ctx->partial_hash, input + + sizeof(struct cpl_fw6_pld), + updated_digestsize); + } + kfree(ctx_req.ctx.ahash_ctx->dummy_payload_ptr); + ctx_req.ctx.ahash_ctx->dummy_payload_ptr = NULL; + break; + } + return 0; +} + +/* + * calc_tx_flits_ofld - calculate # of flits for an offload packet + * @skb: the packet + * Returns the number of flits needed for the given offload packet. + * These packets are already fully constructed and no additional headers + * will be added. + */ +static inline unsigned int calc_tx_flits_ofld(const struct sk_buff *skb) +{ + unsigned int flits, cnt; + + if (is_ofld_imm(skb)) + return DIV_ROUND_UP(skb->len, 8); + + flits = skb_transport_offset(skb) / 8; /* headers */ + cnt = skb_shinfo(skb)->nr_frags; + if (skb_tail_pointer(skb) != skb_transport_header(skb)) + cnt++; + return flits + sgl_len(cnt); +} + +static struct shash_desc *chcr_alloc_shash(unsigned int ds) +{ + struct crypto_shash *base_hash = NULL; + struct shash_desc *desc; + + switch (ds) { + case SHA1_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha1-generic", 0, 0); + break; + case SHA224_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha224-generic", 0, 0); + break; + case SHA256_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha256-generic", 0, 0); + break; + case SHA384_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha384-generic", 0, 0); + break; + case SHA512_DIGEST_SIZE: + base_hash = crypto_alloc_shash("sha512-generic", 0, 0); + break; + } + if (IS_ERR(base_hash)) { + pr_err("Can not allocate sha-generic algo.\n"); + return (void *)base_hash; + } + + desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(base_hash), + GFP_KERNEL); + if (!desc) + return ERR_PTR(-ENOMEM); + desc->tfm = base_hash; + desc->flags = crypto_shash_get_flags(base_hash); + return desc; +} + +static int chcr_compute_partial_hash(struct shash_desc *desc, + char *iopad, char *result_hash, + int digest_size) +{ + struct sha1_state sha1_st; + struct sha256_state sha256_st; + struct sha512_state sha512_st; + int error; + + if (digest_size == SHA1_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA1_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha1_st); + memcpy(result_hash, sha1_st.state, SHA1_DIGEST_SIZE); + } else if (digest_size == SHA224_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha256_st); + memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); + + } else if (digest_size == SHA256_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA256_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha256_st); + memcpy(result_hash, sha256_st.state, SHA256_DIGEST_SIZE); + + } else if (digest_size == SHA384_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha512_st); + memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); + + } else if (digest_size == SHA512_DIGEST_SIZE) { + error = crypto_shash_init(desc) ?: + crypto_shash_update(desc, iopad, SHA512_BLOCK_SIZE) ?: + crypto_shash_export(desc, (void *)&sha512_st); + memcpy(result_hash, sha512_st.state, SHA512_DIGEST_SIZE); + } else { + error = -EINVAL; + pr_err("Unknown digest size %d\n", digest_size); + } + return error; +} + +static void chcr_change_order(char *buf, int ds) +{ + int i; + + if (ds == SHA512_DIGEST_SIZE) { + for (i = 0; i < (ds / sizeof(u64)); i++) + *((__be64 *)buf + i) = + cpu_to_be64(*((u64 *)buf + i)); + } else { + for (i = 0; i < (ds / sizeof(u32)); i++) + *((__be32 *)buf + i) = + cpu_to_be32(*((u32 *)buf + i)); + } +} + +static inline int is_hmac(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct chcr_alg_template *chcr_crypto_alg = + container_of(__crypto_ahash_alg(alg), struct chcr_alg_template, + alg.hash); + if ((chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK) == + CRYPTO_ALG_SUB_TYPE_HASH_HMAC) + return 1; + return 0; +} + +static inline unsigned int ch_nents(struct scatterlist *sg, + unsigned int *total_size) +{ + unsigned int nents; + + for (nents = 0, *total_size = 0; sg; sg = sg_next(sg)) { + nents++; + *total_size += sg->length; + } + return nents; +} + +static void write_phys_cpl(struct cpl_rx_phys_dsgl *phys_cpl, + struct scatterlist *sg, + struct phys_sge_parm *sg_param) +{ + struct phys_sge_pairs *to; + unsigned int out_buf_size = sg_param->obsize; + unsigned int nents = sg_param->nents, i, j, tot_len = 0; + + phys_cpl->op_to_tid = htonl(CPL_RX_PHYS_DSGL_OPCODE_V(CPL_RX_PHYS_DSGL) + | CPL_RX_PHYS_DSGL_ISRDMA_V(0)); + phys_cpl->pcirlxorder_to_noofsgentr = + htonl(CPL_RX_PHYS_DSGL_PCIRLXORDER_V(0) | + CPL_RX_PHYS_DSGL_PCINOSNOOP_V(0) | + CPL_RX_PHYS_DSGL_PCITPHNTENB_V(0) | + CPL_RX_PHYS_DSGL_PCITPHNT_V(0) | + CPL_RX_PHYS_DSGL_DCAID_V(0) | + CPL_RX_PHYS_DSGL_NOOFSGENTR_V(nents)); + phys_cpl->rss_hdr_int.opcode = CPL_RX_PHYS_ADDR; + phys_cpl->rss_hdr_int.qid = htons(sg_param->qid); + phys_cpl->rss_hdr_int.hash_val = 0; + to = (struct phys_sge_pairs *)((unsigned char *)phys_cpl + + sizeof(struct cpl_rx_phys_dsgl)); + + for (i = 0; nents; to++) { + for (j = i; (nents && (j < (8 + i))); j++, nents--) { + to->len[j] = htons(sg->length); + to->addr[j] = cpu_to_be64(sg_dma_address(sg)); + if (out_buf_size) { + if (tot_len + sg_dma_len(sg) >= out_buf_size) { + to->len[j] = htons(out_buf_size - + tot_len); + return; + } + tot_len += sg_dma_len(sg); + } + sg = sg_next(sg); + } + } +} + +static inline unsigned +int map_writesg_phys_cpl(struct device *dev, struct cpl_rx_phys_dsgl *phys_cpl, + struct scatterlist *sg, struct phys_sge_parm *sg_param) +{ + if (!sg || !sg_param->nents) + return 0; + + sg_param->nents = dma_map_sg(dev, sg, sg_param->nents, DMA_FROM_DEVICE); + if (sg_param->nents == 0) { + pr_err("CHCR : DMA mapping failed\n"); + return -EINVAL; + } + write_phys_cpl(phys_cpl, sg, sg_param); + return 0; +} + +static inline int get_cryptoalg_subtype(struct crypto_tfm *tfm) +{ + struct crypto_alg *alg = tfm->__crt_alg; + struct chcr_alg_template *chcr_crypto_alg = + container_of(alg, struct chcr_alg_template, alg.crypto); + + return chcr_crypto_alg->type & CRYPTO_ALG_SUB_TYPE_MASK; +} + +static inline void +write_sg_data_page_desc(struct sk_buff *skb, unsigned int *frags, + struct scatterlist *sg, unsigned int count) +{ + struct page *spage; + unsigned int page_len; + + skb->len += count; + skb->data_len += count; + skb->truesize += count; + while (count > 0) { + if (sg && (!(sg->length))) + break; + spage = sg_page(sg); + get_page(spage); + page_len = min(sg->length, count); + skb_fill_page_desc(skb, *frags, spage, sg->offset, page_len); + (*frags)++; + count -= page_len; + sg = sg_next(sg); + } +} + +static int generate_copy_rrkey(struct ablk_ctx *ablkctx, + struct _key_ctx *key_ctx) +{ + if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { + get_aes_decrypt_key(key_ctx->key, ablkctx->key, + ablkctx->enckey_len << 3); + memset(key_ctx->key + ablkctx->enckey_len, 0, + CHCR_AES_MAX_KEY_LEN - ablkctx->enckey_len); + } else { + memcpy(key_ctx->key, + ablkctx->key + (ablkctx->enckey_len >> 1), + ablkctx->enckey_len >> 1); + get_aes_decrypt_key(key_ctx->key + (ablkctx->enckey_len >> 1), + ablkctx->key, ablkctx->enckey_len << 2); + } + return 0; +} + +static inline void create_wreq(struct chcr_context *ctx, + struct fw_crypto_lookaside_wr *wreq, + void *req, struct sk_buff *skb, + int kctx_len, int hash_sz, + unsigned int phys_dsgl) +{ + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct ulp_txpkt *ulptx = (struct ulp_txpkt *)(wreq + 1); + struct ulptx_idata *sc_imm = (struct ulptx_idata *)(ulptx + 1); + int iv_loc = IV_DSGL; + int qid = u_ctx->lldi.rxq_ids[ctx->tx_channel_id]; + unsigned int immdatalen = 0, nr_frags = 0; + + if (is_ofld_imm(skb)) { + immdatalen = skb->data_len; + iv_loc = IV_IMMEDIATE; + } else { + nr_frags = skb_shinfo(skb)->nr_frags; + } + + wreq->op_to_cctx_size = FILL_WR_OP_CCTX_SIZE(immdatalen, + (kctx_len >> 4)); + wreq->pld_size_hash_size = + htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(sgl_lengths[nr_frags]) | + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(hash_sz)); + wreq->len16_pkd = htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP( + (calc_tx_flits_ofld(skb) * 8), 16))); + wreq->cookie = cpu_to_be64((uintptr_t)req); + wreq->rx_chid_to_rx_q_id = + FILL_WR_RX_Q_ID(ctx->dev->tx_channel_id, qid, + (hash_sz) ? IV_NOP : iv_loc); + + ulptx->cmd_dest = FILL_ULPTX_CMD_DEST(ctx->dev->tx_channel_id); + ulptx->len = htonl((DIV_ROUND_UP((calc_tx_flits_ofld(skb) * 8), + 16) - ((sizeof(*wreq)) >> 4))); + + sc_imm->cmd_more = FILL_CMD_MORE(immdatalen); + sc_imm->len = cpu_to_be32(sizeof(struct cpl_tx_sec_pdu) + kctx_len + + ((hash_sz) ? DUMMY_BYTES : + (sizeof(struct cpl_rx_phys_dsgl) + + phys_dsgl)) + immdatalen); +} + +/** + * create_cipher_wr - form the WR for cipher operations + * @req: cipher req. + * @ctx: crypto driver context of the request. + * @qid: ingress qid where response of this WR should be received. + * @op_type: encryption or decryption + */ +static struct sk_buff +*create_cipher_wr(struct crypto_async_request *req_base, + struct chcr_context *ctx, unsigned short qid, + unsigned short op_type) +{ + struct ablkcipher_request *req = (struct ablkcipher_request *)req_base; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + struct sk_buff *skb = NULL; + struct _key_ctx *key_ctx; + struct fw_crypto_lookaside_wr *wreq; + struct cpl_tx_sec_pdu *sec_cpl; + struct cpl_rx_phys_dsgl *phys_cpl; + struct chcr_blkcipher_req_ctx *req_ctx = ablkcipher_request_ctx(req); + struct phys_sge_parm sg_param; + unsigned int frags = 0, transhdr_len, phys_dsgl, dst_bufsize = 0; + unsigned int ivsize = crypto_ablkcipher_ivsize(tfm), kctx_len; + + if (!req->info) + return ERR_PTR(-EINVAL); + ablkctx->dst_nents = ch_nents(req->dst, &dst_bufsize); + ablkctx->enc = op_type; + + if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) || + (req->nbytes <= 0) || (req->nbytes % AES_BLOCK_SIZE)) + return ERR_PTR(-EINVAL); + + phys_dsgl = get_space_for_phys_dsgl(ablkctx->dst_nents); + + kctx_len = sizeof(*key_ctx) + + (DIV_ROUND_UP(ablkctx->enckey_len, 16) * 16); + transhdr_len = CIPHER_TRANSHDR_SIZE(kctx_len, phys_dsgl); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), + GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); + + sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); + sec_cpl->op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 1, 1); + + sec_cpl->pldlen = htonl(ivsize + req->nbytes); + sec_cpl->aadstart_cipherstop_hi = FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, + ivsize + 1, 0); + + sec_cpl->cipherstop_lo_authinsert = FILL_SEC_CPL_AUTHINSERT(0, 0, + 0, 0); + sec_cpl->seqno_numivs = FILL_SEC_CPL_SCMD0_SEQNO(op_type, 0, + ablkctx->ciph_mode, + 0, 0, ivsize >> 1, 1); + sec_cpl->ivgen_hdrlen = FILL_SEC_CPL_IVGEN_HDRLEN(0, 0, 0, + 0, 1, phys_dsgl); + + key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); + key_ctx->ctx_hdr = ablkctx->key_ctx_hdr; + if (op_type == CHCR_DECRYPT_OP) { + if (generate_copy_rrkey(ablkctx, key_ctx)) + goto map_fail1; + } else { + if (ablkctx->ciph_mode == CHCR_SCMD_CIPHER_MODE_AES_CBC) { + memcpy(key_ctx->key, ablkctx->key, ablkctx->enckey_len); + } else { + memcpy(key_ctx->key, ablkctx->key + + (ablkctx->enckey_len >> 1), + ablkctx->enckey_len >> 1); + memcpy(key_ctx->key + + (ablkctx->enckey_len >> 1), + ablkctx->key, + ablkctx->enckey_len >> 1); + } + } + phys_cpl = (struct cpl_rx_phys_dsgl *)((u8 *)key_ctx + kctx_len); + + memcpy(ablkctx->iv, req->info, ivsize); + sg_init_table(&ablkctx->iv_sg, 1); + sg_set_buf(&ablkctx->iv_sg, ablkctx->iv, ivsize); + sg_param.nents = ablkctx->dst_nents; + sg_param.obsize = dst_bufsize; + sg_param.qid = qid; + sg_param.align = 1; + if (map_writesg_phys_cpl(&u_ctx->lldi.pdev->dev, phys_cpl, req->dst, + &sg_param)) + goto map_fail1; + + skb_set_transport_header(skb, transhdr_len); + write_sg_data_page_desc(skb, &frags, &ablkctx->iv_sg, ivsize); + write_sg_data_page_desc(skb, &frags, req->src, req->nbytes); + create_wreq(ctx, wreq, req, skb, kctx_len, 0, phys_dsgl); + req_ctx->skb = skb; + skb_get(skb); + return skb; +map_fail1: + kfree_skb(skb); + return ERR_PTR(-ENOMEM); +} + +static int chcr_aes_cbc_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + struct ablkcipher_alg *alg = crypto_ablkcipher_alg(tfm); + unsigned int ck_size, context_size; + u16 alignment = 0; + + if ((keylen < alg->min_keysize) || (keylen > alg->max_keysize)) + goto badkey_err; + + memcpy(ablkctx->key, key, keylen); + ablkctx->enckey_len = keylen; + if (keylen == AES_KEYSIZE_128) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_128; + } else if (keylen == AES_KEYSIZE_192) { + alignment = 8; + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_192; + } else if (keylen == AES_KEYSIZE_256) { + ck_size = CHCR_KEYCTX_CIPHER_KEY_SIZE_256; + } else { + goto badkey_err; + } + + context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + + keylen + alignment) >> 4; + + ablkctx->key_ctx_hdr = FILL_KEY_CTX_HDR(ck_size, CHCR_KEYCTX_NO_KEY, + 0, 0, context_size); + ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_CBC; + return 0; +badkey_err: + crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); + ablkctx->enckey_len = 0; + return -EINVAL; +} + +static int cxgb4_is_crypto_q_full(struct net_device *dev, unsigned int idx) +{ + int ret = 0; + struct sge_ofld_txq *q; + struct adapter *adap = netdev2adap(dev); + + local_bh_disable(); + q = &adap->sge.ofldtxq[idx]; + spin_lock(&q->sendq.lock); + if (q->full) + ret = -1; + spin_unlock(&q->sendq.lock); + local_bh_enable(); + return ret; +} + +static int chcr_aes_encrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_async_request *req_base = &req->base; + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct sk_buff *skb; + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + skb = create_cipher_wr(req_base, ctx, + u_ctx->lldi.rxq_ids[ctx->tx_channel_id], + CHCR_ENCRYPT_OP); + if (IS_ERR(skb)) { + pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); + return PTR_ERR(skb); + } + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_aes_decrypt(struct ablkcipher_request *req) +{ + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct crypto_async_request *req_base = &req->base; + struct uld_ctx *u_ctx = ULD_CTX(ctx); + struct sk_buff *skb; + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + skb = create_cipher_wr(req_base, ctx, u_ctx->lldi.rxq_ids[0], + CHCR_DECRYPT_OP); + if (IS_ERR(skb)) { + pr_err("chcr : %s : Failed to form WR. No memory\n", __func__); + return PTR_ERR(skb); + } + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_device_init(struct chcr_context *ctx) +{ + struct uld_ctx *u_ctx; + unsigned int id; + int err = 0, rxq_perchan, rxq_idx; + + id = smp_processor_id(); + if (!ctx->dev) { + err = assign_chcr_device(&ctx->dev); + if (err) { + pr_err("chcr device assignment fails\n"); + goto out; + } + u_ctx = ULD_CTX(ctx); + rxq_perchan = u_ctx->lldi.nrxq / u_ctx->lldi.nchan; + ctx->dev->tx_channel_id = 0; + rxq_idx = ctx->dev->tx_channel_id * rxq_perchan; + rxq_idx += id % rxq_perchan; + spin_lock(&ctx->dev->lock_chcr_dev); + ctx->tx_channel_id = rxq_idx; + spin_unlock(&ctx->dev->lock_chcr_dev); + } +out: + return err; +} + +static int chcr_cra_init(struct crypto_tfm *tfm) +{ + tfm->crt_ablkcipher.reqsize = sizeof(struct chcr_blkcipher_req_ctx); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static int get_alg_config(struct algo_param *params, + unsigned int auth_size) +{ + switch (auth_size) { + case SHA1_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_160; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA1; + params->result_size = SHA1_DIGEST_SIZE; + break; + case SHA224_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA224; + params->result_size = SHA256_DIGEST_SIZE; + break; + case SHA256_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_256; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA256; + params->result_size = SHA256_DIGEST_SIZE; + break; + case SHA384_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_384; + params->result_size = SHA512_DIGEST_SIZE; + break; + case SHA512_DIGEST_SIZE: + params->mk_size = CHCR_KEYCTX_MAC_KEY_SIZE_512; + params->auth_mode = CHCR_SCMD_AUTH_MODE_SHA512_512; + params->result_size = SHA512_DIGEST_SIZE; + break; + default: + pr_err("chcr : ERROR, unsupported digest size\n"); + return -EINVAL; + } + return 0; +} + +static inline int +write_buffer_data_page_desc(struct chcr_ahash_req_ctx *req_ctx, + struct sk_buff *skb, unsigned int *frags, char *bfr, + u8 bfr_len) +{ + void *page_ptr = NULL; + + skb->len += bfr_len; + skb->data_len += bfr_len; + skb->truesize += bfr_len; + page_ptr = kmalloc(CHCR_HASH_MAX_BLOCK_SIZE_128, GFP_ATOMIC | GFP_DMA); + if (!page_ptr) + return -ENOMEM; + get_page(virt_to_page(page_ptr)); + req_ctx->dummy_payload_ptr = page_ptr; + memcpy(page_ptr, bfr, bfr_len); + skb_fill_page_desc(skb, *frags, virt_to_page(page_ptr), + offset_in_page(page_ptr), bfr_len); + (*frags)++; + return 0; +} + +/** + * create_final_hash_wr - Create hash work request + * @req - Cipher req base + */ +static struct sk_buff *create_final_hash_wr(struct ahash_request *req, + struct hash_wr_param *param) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + struct sk_buff *skb = NULL; + struct _key_ctx *key_ctx; + struct fw_crypto_lookaside_wr *wreq; + struct cpl_tx_sec_pdu *sec_cpl; + unsigned int frags = 0, transhdr_len, iopad_alignment = 0; + unsigned int digestsize = crypto_ahash_digestsize(tfm); + unsigned int kctx_len = sizeof(*key_ctx); + u8 hash_size_in_response = 0; + + iopad_alignment = KEYCTX_ALIGN_PAD(digestsize); + kctx_len += param->alg_prm.result_size + iopad_alignment; + if (param->opad_needed) + kctx_len += param->alg_prm.result_size + iopad_alignment; + + if (req_ctx->result) + hash_size_in_response = digestsize; + else + hash_size_in_response = param->alg_prm.result_size; + transhdr_len = HASH_TRANSHDR_SIZE(kctx_len); + skb = alloc_skb((transhdr_len + sizeof(struct sge_opaque_hdr)), + GFP_ATOMIC); + if (!skb) + return skb; + + skb_reserve(skb, sizeof(struct sge_opaque_hdr)); + wreq = (struct fw_crypto_lookaside_wr *)__skb_put(skb, transhdr_len); + memset(wreq, 0, transhdr_len); + + sec_cpl = (struct cpl_tx_sec_pdu *)((u8 *)wreq + SEC_CPL_OFFSET); + sec_cpl->op_ivinsrtofst = + FILL_SEC_CPL_OP_IVINSR(ctx->dev->tx_channel_id, 2, 0, 0); + sec_cpl->pldlen = htonl(param->bfr_len + param->sg_len); + + sec_cpl->aadstart_cipherstop_hi = + FILL_SEC_CPL_CIPHERSTOP_HI(0, 0, 0, 0); + sec_cpl->cipherstop_lo_authinsert = + FILL_SEC_CPL_AUTHINSERT(0, 1, 0, 0); + sec_cpl->seqno_numivs = + FILL_SEC_CPL_SCMD0_SEQNO(0, 0, 0, param->alg_prm.auth_mode, + param->opad_needed, 0, 0); + + sec_cpl->ivgen_hdrlen = + FILL_SEC_CPL_IVGEN_HDRLEN(param->last, param->more, 0, 1, 0, 0); + + key_ctx = (struct _key_ctx *)((u8 *)sec_cpl + sizeof(*sec_cpl)); + memcpy(key_ctx->key, req_ctx->partial_hash, param->alg_prm.result_size); + + if (param->opad_needed) + memcpy(key_ctx->key + ((param->alg_prm.result_size <= 32) ? 32 : + CHCR_HASH_MAX_DIGEST_SIZE), + hmacctx->opad, param->alg_prm.result_size); + + key_ctx->ctx_hdr = FILL_KEY_CTX_HDR(CHCR_KEYCTX_NO_KEY, + param->alg_prm.mk_size, 0, + param->opad_needed, + (kctx_len >> 4)); + sec_cpl->scmd1 = cpu_to_be64((u64)param->scmd1); + + skb_set_transport_header(skb, transhdr_len); + if (param->bfr_len != 0) + write_buffer_data_page_desc(req_ctx, skb, &frags, req_ctx->bfr, + param->bfr_len); + if (param->sg_len != 0) + write_sg_data_page_desc(skb, &frags, req->src, param->sg_len); + + create_wreq(ctx, wreq, req, skb, kctx_len, hash_size_in_response, + 0); + req_ctx->skb = skb; + skb_get(skb); + return skb; +} + +static int chcr_ahash_update(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + u8 remainder = 0, bs; + unsigned int nbytes = req->nbytes; + struct hash_wr_param params; + + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (nbytes + req_ctx->bfr_len >= bs) { + remainder = (nbytes + req_ctx->bfr_len) % bs; + nbytes = nbytes + req_ctx->bfr_len - remainder; + } else { + sg_pcopy_to_buffer(req->src, sg_nents(req->src), req_ctx->bfr + + req_ctx->bfr_len, nbytes, 0); + req_ctx->bfr_len += nbytes; + return 0; + } + + params.opad_needed = 0; + params.more = 1; + params.last = 0; + params.sg_len = nbytes - req_ctx->bfr_len; + params.bfr_len = req_ctx->bfr_len; + params.scmd1 = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 0; + req_ctx->data_len += params.sg_len + params.bfr_len; + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + + req_ctx->bfr_len = remainder; + if (remainder) + sg_pcopy_to_buffer(req->src, sg_nents(req->src), + req_ctx->bfr, remainder, req->nbytes - + remainder); + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + + return -EINPROGRESS; +} + +static void create_last_hash_block(char *bfr_ptr, unsigned int bs, u64 scmd1) +{ + memset(bfr_ptr, 0, bs); + *bfr_ptr = 0x80; + if (bs == 64) + *(__be64 *)(bfr_ptr + 56) = cpu_to_be64(scmd1 << 3); + else + *(__be64 *)(bfr_ptr + 120) = cpu_to_be64(scmd1 << 3); +} + +static int chcr_ahash_final(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct hash_wr_param params; + struct sk_buff *skb; + struct uld_ctx *u_ctx = NULL; + u8 bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + params.sg_len = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 1; + params.bfr_len = req_ctx->bfr_len; + req_ctx->data_len += params.bfr_len + params.sg_len; + if (req_ctx->bfr && (req_ctx->bfr_len == 0)) { + create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + params.last = 0; + params.more = 1; + params.scmd1 = 0; + params.bfr_len = bs; + + } else { + params.scmd1 = req_ctx->data_len; + params.last = 1; + params.more = 0; + } + skb = create_final_hash_wr(req, ¶ms); + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_ahash_finup(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + struct hash_wr_param params; + u8 bs; + + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + u_ctx = ULD_CTX(ctx); + + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + + params.sg_len = req->nbytes; + params.bfr_len = req_ctx->bfr_len; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->data_len += params.bfr_len + params.sg_len; + req_ctx->result = 1; + if (req_ctx->bfr && (req_ctx->bfr_len + req->nbytes) == 0) { + create_last_hash_block(req_ctx->bfr, bs, req_ctx->data_len); + params.last = 0; + params.more = 1; + params.scmd1 = 0; + params.bfr_len = bs; + } else { + params.scmd1 = req_ctx->data_len; + params.last = 1; + params.more = 0; + } + + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + + return -EINPROGRESS; +} + +static int chcr_ahash_digest(struct ahash_request *req) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(req); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(req); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct uld_ctx *u_ctx = NULL; + struct sk_buff *skb; + struct hash_wr_param params; + u8 bs; + + rtfm->init(req); + bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + u_ctx = ULD_CTX(ctx); + if (unlikely(cxgb4_is_crypto_q_full(u_ctx->lldi.ports[0], + ctx->tx_channel_id))) { + if (!(req->base.flags & CRYPTO_TFM_REQ_MAY_BACKLOG)) + return -EBUSY; + } + + if (is_hmac(crypto_ahash_tfm(rtfm))) + params.opad_needed = 1; + else + params.opad_needed = 0; + + params.last = 0; + params.more = 0; + params.sg_len = req->nbytes; + params.bfr_len = 0; + params.scmd1 = 0; + get_alg_config(¶ms.alg_prm, crypto_ahash_digestsize(rtfm)); + req_ctx->result = 1; + req_ctx->data_len += params.bfr_len + params.sg_len; + + if (req_ctx->bfr && req->nbytes == 0) { + create_last_hash_block(req_ctx->bfr, bs, 0); + params.more = 1; + params.bfr_len = bs; + } + + skb = create_final_hash_wr(req, ¶ms); + if (!skb) + return -ENOMEM; + + skb->dev = u_ctx->lldi.ports[0]; + set_wr_txq(skb, CPL_PRIORITY_DATA, ctx->tx_channel_id); + chcr_send_wr(skb); + return -EINPROGRESS; +} + +static int chcr_ahash_export(struct ahash_request *areq, void *out) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct chcr_ahash_req_ctx *state = out; + + state->bfr_len = req_ctx->bfr_len; + state->data_len = req_ctx->data_len; + memcpy(state->bfr, req_ctx->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + memcpy(state->partial_hash, req_ctx->partial_hash, + CHCR_HASH_MAX_DIGEST_SIZE); + return 0; +} + +static int chcr_ahash_import(struct ahash_request *areq, const void *in) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct chcr_ahash_req_ctx *state = (struct chcr_ahash_req_ctx *)in; + + req_ctx->bfr_len = state->bfr_len; + req_ctx->data_len = state->data_len; + req_ctx->dummy_payload_ptr = NULL; + memcpy(req_ctx->bfr, state->bfr, CHCR_HASH_MAX_BLOCK_SIZE_128); + memcpy(req_ctx->partial_hash, state->partial_hash, + CHCR_HASH_MAX_DIGEST_SIZE); + return 0; +} + +static int chcr_ahash_setkey(struct crypto_ahash *tfm, const u8 *key, + unsigned int keylen) +{ + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = crypto_ahash_digestsize(tfm); + unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); + unsigned int i, err = 0, updated_digestsize; + + /* + * use the key to calculate the ipad and opad. ipad will sent with the + * first request's data. opad will be sent with the final hash result + * ipad in hmacctx->ipad and opad in hmacctx->opad location + */ + if (!hmacctx->desc) + return -EINVAL; + if (keylen > bs) { + err = crypto_shash_digest(hmacctx->desc, key, keylen, + hmacctx->ipad); + if (err) + goto out; + keylen = digestsize; + } else { + memcpy(hmacctx->ipad, key, keylen); + } + memset(hmacctx->ipad + keylen, 0, bs - keylen); + memcpy(hmacctx->opad, hmacctx->ipad, bs); + + for (i = 0; i < bs / sizeof(int); i++) { + *((unsigned int *)(&hmacctx->ipad) + i) ^= IPAD_DATA; + *((unsigned int *)(&hmacctx->opad) + i) ^= OPAD_DATA; + } + + updated_digestsize = digestsize; + if (digestsize == SHA224_DIGEST_SIZE) + updated_digestsize = SHA256_DIGEST_SIZE; + else if (digestsize == SHA384_DIGEST_SIZE) + updated_digestsize = SHA512_DIGEST_SIZE; + err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->ipad, + hmacctx->ipad, digestsize); + if (err) + goto out; + chcr_change_order(hmacctx->ipad, updated_digestsize); + + err = chcr_compute_partial_hash(hmacctx->desc, hmacctx->opad, + hmacctx->opad, digestsize); + if (err) + goto out; + chcr_change_order(hmacctx->opad, updated_digestsize); +out: + return err; +} + +static int chcr_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int key_len) +{ + struct chcr_context *ctx = crypto_ablkcipher_ctx(tfm); + struct ablk_ctx *ablkctx = ABLK_CTX(ctx); + int status = 0; + unsigned short context_size = 0; + + if ((key_len == (AES_KEYSIZE_128 << 1)) || + (key_len == (AES_KEYSIZE_256 << 1))) { + memcpy(ablkctx->key, key, key_len); + ablkctx->enckey_len = key_len; + context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4; + ablkctx->key_ctx_hdr = + FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ? + CHCR_KEYCTX_CIPHER_KEY_SIZE_128 : + CHCR_KEYCTX_CIPHER_KEY_SIZE_256, + CHCR_KEYCTX_NO_KEY, 1, + 0, context_size); + ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS; + } else { + crypto_tfm_set_flags((struct crypto_tfm *)tfm, + CRYPTO_TFM_RES_BAD_KEY_LEN); + ablkctx->enckey_len = 0; + status = -EINVAL; + } + return status; +} + +static int chcr_sha_init(struct ahash_request *areq) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *tfm = crypto_ahash_reqtfm(areq); + int digestsize = crypto_ahash_digestsize(tfm); + + req_ctx->data_len = 0; + req_ctx->dummy_payload_ptr = NULL; + req_ctx->bfr_len = 0; + req_ctx->skb = NULL; + req_ctx->result = 0; + copy_hash_init_values(req_ctx->partial_hash, digestsize); + return 0; +} + +static int chcr_sha_cra_init(struct crypto_tfm *tfm) +{ + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct chcr_ahash_req_ctx)); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static int chcr_hmac_init(struct ahash_request *areq) +{ + struct chcr_ahash_req_ctx *req_ctx = ahash_request_ctx(areq); + struct crypto_ahash *rtfm = crypto_ahash_reqtfm(areq); + struct chcr_context *ctx = crypto_tfm_ctx(crypto_ahash_tfm(rtfm)); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = crypto_ahash_digestsize(rtfm); + unsigned int bs = crypto_tfm_alg_blocksize(crypto_ahash_tfm(rtfm)); + + chcr_sha_init(areq); + req_ctx->data_len = bs; + if (is_hmac(crypto_ahash_tfm(rtfm))) { + if (digestsize == SHA224_DIGEST_SIZE) + memcpy(req_ctx->partial_hash, hmacctx->ipad, + SHA256_DIGEST_SIZE); + else if (digestsize == SHA384_DIGEST_SIZE) + memcpy(req_ctx->partial_hash, hmacctx->ipad, + SHA512_DIGEST_SIZE); + else + memcpy(req_ctx->partial_hash, hmacctx->ipad, + digestsize); + } + return 0; +} + +static int chcr_hmac_cra_init(struct crypto_tfm *tfm) +{ + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + unsigned int digestsize = + crypto_ahash_digestsize(__crypto_ahash_cast(tfm)); + + crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), + sizeof(struct chcr_ahash_req_ctx)); + hmacctx->desc = chcr_alloc_shash(digestsize); + if (IS_ERR(hmacctx->desc)) + return PTR_ERR(hmacctx->desc); + return chcr_device_init(crypto_tfm_ctx(tfm)); +} + +static void chcr_free_shash(struct shash_desc *desc) +{ + crypto_free_shash(desc->tfm); + kfree(desc); +} + +static void chcr_hmac_cra_exit(struct crypto_tfm *tfm) +{ + struct chcr_context *ctx = crypto_tfm_ctx(tfm); + struct hmac_ctx *hmacctx = HMAC_CTX(ctx); + + if (hmacctx->desc) { + chcr_free_shash(hmacctx->desc); + hmacctx->desc = NULL; + } +} + +static struct chcr_alg_template driver_algs[] = { + /* AES-CBC */ + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .is_registered = 0, + .alg.crypto = { + .cra_name = "cbc(aes)", + .cra_driver_name = "cbc(aes-chcr)", + .cra_priority = CHCR_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct ablk_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = chcr_cra_init, + .cra_exit = NULL, + .cra_u.ablkcipher = { + .min_keysize = AES_MIN_KEY_SIZE, + .max_keysize = AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = chcr_aes_cbc_setkey, + .encrypt = chcr_aes_encrypt, + .decrypt = chcr_aes_decrypt, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_ABLKCIPHER, + .is_registered = 0, + .alg.crypto = { + .cra_name = "xts(aes)", + .cra_driver_name = "xts(aes-chcr)", + .cra_priority = CHCR_CRA_PRIORITY, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | + CRYPTO_ALG_ASYNC, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct chcr_context) + + sizeof(struct ablk_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = chcr_cra_init, + .cra_exit = NULL, + .cra_u = { + .ablkcipher = { + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .ivsize = AES_BLOCK_SIZE, + .setkey = chcr_aes_xts_setkey, + .encrypt = chcr_aes_encrypt, + .decrypt = chcr_aes_decrypt, + } + } + } + }, + /* SHA */ + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-chcr", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-chcr", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-chcr", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-chcr", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_AHASH, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-chcr", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + } + }, + /* HMAC */ + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA1_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha1)", + .cra_driver_name = "hmac(sha1-chcr)", + .cra_blocksize = SHA1_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA224_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha224)", + .cra_driver_name = "hmac(sha224-chcr)", + .cra_blocksize = SHA224_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA256_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha256)", + .cra_driver_name = "hmac(sha256-chcr)", + .cra_blocksize = SHA256_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA384_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha384)", + .cra_driver_name = "hmac(sha384-chcr)", + .cra_blocksize = SHA384_BLOCK_SIZE, + } + } + }, + { + .type = CRYPTO_ALG_TYPE_HMAC, + .is_registered = 0, + .alg.hash = { + .halg.digestsize = SHA512_DIGEST_SIZE, + .halg.base = { + .cra_name = "hmac(sha512)", + .cra_driver_name = "hmac(sha512-chcr)", + .cra_blocksize = SHA512_BLOCK_SIZE, + } + } + }, +}; + +/* + * chcr_unregister_alg - Deregister crypto algorithms with + * kernel framework. + */ +static int chcr_unregister_alg(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + if (driver_algs[i].is_registered) + crypto_unregister_alg( + &driver_algs[i].alg.crypto); + break; + case CRYPTO_ALG_TYPE_AHASH: + if (driver_algs[i].is_registered) + crypto_unregister_ahash( + &driver_algs[i].alg.hash); + break; + } + driver_algs[i].is_registered = 0; + } + return 0; +} + +#define SZ_AHASH_CTX sizeof(struct chcr_context) +#define SZ_AHASH_H_CTX (sizeof(struct chcr_context) + sizeof(struct hmac_ctx)) +#define SZ_AHASH_REQ_CTX sizeof(struct chcr_ahash_req_ctx) +#define AHASH_CRA_FLAGS (CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC) + +/* + * chcr_register_alg - Register crypto algorithms with kernel framework. + */ +static int chcr_register_alg(void) +{ + struct crypto_alg ai; + struct ahash_alg *a_hash; + int err = 0, i; + char *name = NULL; + + for (i = 0; i < ARRAY_SIZE(driver_algs); i++) { + if (driver_algs[i].is_registered) + continue; + switch (driver_algs[i].type & CRYPTO_ALG_TYPE_MASK) { + case CRYPTO_ALG_TYPE_ABLKCIPHER: + err = crypto_register_alg(&driver_algs[i].alg.crypto); + name = driver_algs[i].alg.crypto.cra_driver_name; + break; + case CRYPTO_ALG_TYPE_AHASH: + a_hash = &driver_algs[i].alg.hash; + a_hash->update = chcr_ahash_update; + a_hash->final = chcr_ahash_final; + a_hash->finup = chcr_ahash_finup; + a_hash->digest = chcr_ahash_digest; + a_hash->export = chcr_ahash_export; + a_hash->import = chcr_ahash_import; + a_hash->halg.statesize = SZ_AHASH_REQ_CTX; + a_hash->halg.base.cra_priority = CHCR_CRA_PRIORITY; + a_hash->halg.base.cra_module = THIS_MODULE; + a_hash->halg.base.cra_flags = AHASH_CRA_FLAGS; + a_hash->halg.base.cra_alignmask = 0; + a_hash->halg.base.cra_exit = NULL; + a_hash->halg.base.cra_type = &crypto_ahash_type; + + if (driver_algs[i].type == CRYPTO_ALG_TYPE_HMAC) { + a_hash->halg.base.cra_init = chcr_hmac_cra_init; + a_hash->halg.base.cra_exit = chcr_hmac_cra_exit; + a_hash->init = chcr_hmac_init; + a_hash->setkey = chcr_ahash_setkey; + a_hash->halg.base.cra_ctxsize = SZ_AHASH_H_CTX; + } else { + a_hash->init = chcr_sha_init; + a_hash->halg.base.cra_ctxsize = SZ_AHASH_CTX; + a_hash->halg.base.cra_init = chcr_sha_cra_init; + } + err = crypto_register_ahash(&driver_algs[i].alg.hash); + ai = driver_algs[i].alg.hash.halg.base; + name = ai.cra_driver_name; + break; + } + if (err) { + pr_err("chcr : %s : Algorithm registration failed\n", + name); + goto register_err; + } else { + driver_algs[i].is_registered = 1; + } + } + return 0; + +register_err: + chcr_unregister_alg(); + return err; +} + +/* + * start_crypto - Register the crypto algorithms. + * This should called once when the first device comesup. After this + * kernel will start calling driver APIs for crypto operations. + */ +int start_crypto(void) +{ + return chcr_register_alg(); +} + +/* + * stop_crypto - Deregister all the crypto algorithms with kernel. + * This should be called once when the last device goes down. After this + * kernel will not call the driver API for crypto operations. + */ +int stop_crypto(void) +{ + chcr_unregister_alg(); + return 0; +} diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h new file mode 100644 index 000000000000..ec64fbcdeb49 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_algo.h @@ -0,0 +1,471 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_ALGO_H__ +#define __CHCR_ALGO_H__ + +/* Crypto key context */ +#define KEY_CONTEXT_CTX_LEN_S 24 +#define KEY_CONTEXT_CTX_LEN_M 0xff +#define KEY_CONTEXT_CTX_LEN_V(x) ((x) << KEY_CONTEXT_CTX_LEN_S) +#define KEY_CONTEXT_CTX_LEN_G(x) \ + (((x) >> KEY_CONTEXT_CTX_LEN_S) & KEY_CONTEXT_CTX_LEN_M) + +#define KEY_CONTEXT_DUAL_CK_S 12 +#define KEY_CONTEXT_DUAL_CK_M 0x1 +#define KEY_CONTEXT_DUAL_CK_V(x) ((x) << KEY_CONTEXT_DUAL_CK_S) +#define KEY_CONTEXT_DUAL_CK_G(x) \ +(((x) >> KEY_CONTEXT_DUAL_CK_S) & KEY_CONTEXT_DUAL_CK_M) +#define KEY_CONTEXT_DUAL_CK_F KEY_CONTEXT_DUAL_CK_V(1U) + +#define KEY_CONTEXT_SALT_PRESENT_S 10 +#define KEY_CONTEXT_SALT_PRESENT_M 0x1 +#define KEY_CONTEXT_SALT_PRESENT_V(x) ((x) << KEY_CONTEXT_SALT_PRESENT_S) +#define KEY_CONTEXT_SALT_PRESENT_G(x) \ + (((x) >> KEY_CONTEXT_SALT_PRESENT_S) & \ + KEY_CONTEXT_SALT_PRESENT_M) +#define KEY_CONTEXT_SALT_PRESENT_F KEY_CONTEXT_SALT_PRESENT_V(1U) + +#define KEY_CONTEXT_VALID_S 0 +#define KEY_CONTEXT_VALID_M 0x1 +#define KEY_CONTEXT_VALID_V(x) ((x) << KEY_CONTEXT_VALID_S) +#define KEY_CONTEXT_VALID_G(x) \ + (((x) >> KEY_CONTEXT_VALID_S) & \ + KEY_CONTEXT_VALID_M) +#define KEY_CONTEXT_VALID_F KEY_CONTEXT_VALID_V(1U) + +#define KEY_CONTEXT_CK_SIZE_S 6 +#define KEY_CONTEXT_CK_SIZE_M 0xf +#define KEY_CONTEXT_CK_SIZE_V(x) ((x) << KEY_CONTEXT_CK_SIZE_S) +#define KEY_CONTEXT_CK_SIZE_G(x) \ + (((x) >> KEY_CONTEXT_CK_SIZE_S) & KEY_CONTEXT_CK_SIZE_M) + +#define KEY_CONTEXT_MK_SIZE_S 2 +#define KEY_CONTEXT_MK_SIZE_M 0xf +#define KEY_CONTEXT_MK_SIZE_V(x) ((x) << KEY_CONTEXT_MK_SIZE_S) +#define KEY_CONTEXT_MK_SIZE_G(x) \ + (((x) >> KEY_CONTEXT_MK_SIZE_S) & KEY_CONTEXT_MK_SIZE_M) + +#define KEY_CONTEXT_OPAD_PRESENT_S 11 +#define KEY_CONTEXT_OPAD_PRESENT_M 0x1 +#define KEY_CONTEXT_OPAD_PRESENT_V(x) ((x) << KEY_CONTEXT_OPAD_PRESENT_S) +#define KEY_CONTEXT_OPAD_PRESENT_G(x) \ + (((x) >> KEY_CONTEXT_OPAD_PRESENT_S) & \ + KEY_CONTEXT_OPAD_PRESENT_M) +#define KEY_CONTEXT_OPAD_PRESENT_F KEY_CONTEXT_OPAD_PRESENT_V(1U) + +#define CHCR_HASH_MAX_DIGEST_SIZE 64 +#define CHCR_MAX_SHA_DIGEST_SIZE 64 + +#define IPSEC_TRUNCATED_ICV_SIZE 12 +#define TLS_TRUNCATED_HMAC_SIZE 10 +#define CBCMAC_DIGEST_SIZE 16 +#define MAX_HASH_NAME 20 + +#define SHA1_INIT_STATE_5X4B 5 +#define SHA256_INIT_STATE_8X4B 8 +#define SHA512_INIT_STATE_8X8B 8 +#define SHA1_INIT_STATE SHA1_INIT_STATE_5X4B +#define SHA224_INIT_STATE SHA256_INIT_STATE_8X4B +#define SHA256_INIT_STATE SHA256_INIT_STATE_8X4B +#define SHA384_INIT_STATE SHA512_INIT_STATE_8X8B +#define SHA512_INIT_STATE SHA512_INIT_STATE_8X8B + +#define DUMMY_BYTES 16 + +#define IPAD_DATA 0x36363636 +#define OPAD_DATA 0x5c5c5c5c + +#define TRANSHDR_SIZE(alignedkctx_len)\ + (sizeof(struct ulptx_idata) +\ + sizeof(struct ulp_txpkt) +\ + sizeof(struct fw_crypto_lookaside_wr) +\ + sizeof(struct cpl_tx_sec_pdu) +\ + (alignedkctx_len)) +#define CIPHER_TRANSHDR_SIZE(alignedkctx_len, sge_pairs) \ + (TRANSHDR_SIZE(alignedkctx_len) + sge_pairs +\ + sizeof(struct cpl_rx_phys_dsgl)) +#define HASH_TRANSHDR_SIZE(alignedkctx_len)\ + (TRANSHDR_SIZE(alignedkctx_len) + DUMMY_BYTES) + +#define SEC_CPL_OFFSET (sizeof(struct fw_crypto_lookaside_wr) + \ + sizeof(struct ulp_txpkt) + \ + sizeof(struct ulptx_idata)) + +#define FILL_SEC_CPL_OP_IVINSR(id, len, hldr, ofst) \ + htonl( \ + CPL_TX_SEC_PDU_OPCODE_V(CPL_TX_SEC_PDU) | \ + CPL_TX_SEC_PDU_RXCHID_V((id)) | \ + CPL_TX_SEC_PDU_ACKFOLLOWS_V(0) | \ + CPL_TX_SEC_PDU_ULPTXLPBK_V(1) | \ + CPL_TX_SEC_PDU_CPLLEN_V((len)) | \ + CPL_TX_SEC_PDU_PLACEHOLDER_V((hldr)) | \ + CPL_TX_SEC_PDU_IVINSRTOFST_V((ofst))) + +#define FILL_SEC_CPL_CIPHERSTOP_HI(a_start, a_stop, c_start, c_stop_hi) \ + htonl( \ + CPL_TX_SEC_PDU_AADSTART_V((a_start)) | \ + CPL_TX_SEC_PDU_AADSTOP_V((a_stop)) | \ + CPL_TX_SEC_PDU_CIPHERSTART_V((c_start)) | \ + CPL_TX_SEC_PDU_CIPHERSTOP_HI_V((c_stop_hi))) + +#define FILL_SEC_CPL_AUTHINSERT(c_stop_lo, a_start, a_stop, a_inst) \ + htonl( \ + CPL_TX_SEC_PDU_CIPHERSTOP_LO_V((c_stop_lo)) | \ + CPL_TX_SEC_PDU_AUTHSTART_V((a_start)) | \ + CPL_TX_SEC_PDU_AUTHSTOP_V((a_stop)) | \ + CPL_TX_SEC_PDU_AUTHINSERT_V((a_inst))) + +#define FILL_SEC_CPL_SCMD0_SEQNO(ctrl, seq, cmode, amode, opad, size, nivs) \ + htonl( \ + SCMD_SEQ_NO_CTRL_V(0) | \ + SCMD_STATUS_PRESENT_V(0) | \ + SCMD_PROTO_VERSION_V(CHCR_SCMD_PROTO_VERSION_GENERIC) | \ + SCMD_ENC_DEC_CTRL_V((ctrl)) | \ + SCMD_CIPH_AUTH_SEQ_CTRL_V((seq)) | \ + SCMD_CIPH_MODE_V((cmode)) | \ + SCMD_AUTH_MODE_V((amode)) | \ + SCMD_HMAC_CTRL_V((opad)) | \ + SCMD_IV_SIZE_V((size)) | \ + SCMD_NUM_IVS_V((nivs))) + +#define FILL_SEC_CPL_IVGEN_HDRLEN(last, more, ctx_in, mac, ivdrop, len) htonl( \ + SCMD_ENB_DBGID_V(0) | \ + SCMD_IV_GEN_CTRL_V(0) | \ + SCMD_LAST_FRAG_V((last)) | \ + SCMD_MORE_FRAGS_V((more)) | \ + SCMD_TLS_COMPPDU_V(0) | \ + SCMD_KEY_CTX_INLINE_V((ctx_in)) | \ + SCMD_TLS_FRAG_ENABLE_V(0) | \ + SCMD_MAC_ONLY_V((mac)) | \ + SCMD_AADIVDROP_V((ivdrop)) | \ + SCMD_HDR_LEN_V((len))) + +#define FILL_KEY_CTX_HDR(ck_size, mk_size, d_ck, opad, ctx_len) \ + htonl(KEY_CONTEXT_VALID_V(1) | \ + KEY_CONTEXT_CK_SIZE_V((ck_size)) | \ + KEY_CONTEXT_MK_SIZE_V(mk_size) | \ + KEY_CONTEXT_DUAL_CK_V((d_ck)) | \ + KEY_CONTEXT_OPAD_PRESENT_V((opad)) | \ + KEY_CONTEXT_SALT_PRESENT_V(1) | \ + KEY_CONTEXT_CTX_LEN_V((ctx_len))) + +#define FILL_WR_OP_CCTX_SIZE(len, ctx_len) \ + htonl( \ + FW_CRYPTO_LOOKASIDE_WR_OPCODE_V( \ + FW_CRYPTO_LOOKASIDE_WR) | \ + FW_CRYPTO_LOOKASIDE_WR_COMPL_V(0) | \ + FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V((len)) | \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(1) | \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V((ctx_len))) + +#define FILL_WR_RX_Q_ID(cid, qid, wr_iv) \ + htonl( \ + FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V((cid)) | \ + FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V((qid)) | \ + FW_CRYPTO_LOOKASIDE_WR_LCB_V(0) | \ + FW_CRYPTO_LOOKASIDE_WR_IV_V((wr_iv))) + +#define FILL_ULPTX_CMD_DEST(cid) \ + htonl(ULPTX_CMD_V(ULP_TX_PKT) | \ + ULP_TXPKT_DEST_V(0) | \ + ULP_TXPKT_DATAMODIFY_V(0) | \ + ULP_TXPKT_CHANNELID_V((cid)) | \ + ULP_TXPKT_RO_V(1) | \ + ULP_TXPKT_FID_V(0)) + +#define KEYCTX_ALIGN_PAD(bs) ({unsigned int _bs = (bs);\ + _bs == SHA1_DIGEST_SIZE ? 12 : 0; }) + +#define FILL_PLD_SIZE_HASH_SIZE(payload_sgl_len, sgl_lengths, total_frags) \ + htonl(FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(payload_sgl_len ? \ + sgl_lengths[total_frags] : 0) |\ + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(0)) + +#define FILL_LEN_PKD(calc_tx_flits_ofld, skb) \ + htonl(FW_CRYPTO_LOOKASIDE_WR_LEN16_V(DIV_ROUND_UP((\ + calc_tx_flits_ofld(skb) * 8), 16))) + +#define FILL_CMD_MORE(immdatalen) htonl(ULPTX_CMD_V(ULP_TX_SC_IMM) |\ + ULP_TX_SC_MORE_V((immdatalen) ? 0 : 1)) + +#define MAX_NK 8 +#define CRYPTO_MAX_IMM_TX_PKT_LEN 256 + +struct algo_param { + unsigned int auth_mode; + unsigned int mk_size; + unsigned int result_size; +}; + +struct hash_wr_param { + unsigned int opad_needed; + unsigned int more; + unsigned int last; + struct algo_param alg_prm; + unsigned int sg_len; + unsigned int bfr_len; + u64 scmd1; +}; + +enum { + AES_KEYLENGTH_128BIT = 128, + AES_KEYLENGTH_192BIT = 192, + AES_KEYLENGTH_256BIT = 256 +}; + +enum { + KEYLENGTH_3BYTES = 3, + KEYLENGTH_4BYTES = 4, + KEYLENGTH_6BYTES = 6, + KEYLENGTH_8BYTES = 8 +}; + +enum { + NUMBER_OF_ROUNDS_10 = 10, + NUMBER_OF_ROUNDS_12 = 12, + NUMBER_OF_ROUNDS_14 = 14, +}; + +/* + * CCM defines values of 4, 6, 8, 10, 12, 14, and 16 octets, + * where they indicate the size of the integrity check value (ICV) + */ +enum { + AES_CCM_ICV_4 = 4, + AES_CCM_ICV_6 = 6, + AES_CCM_ICV_8 = 8, + AES_CCM_ICV_10 = 10, + AES_CCM_ICV_12 = 12, + AES_CCM_ICV_14 = 14, + AES_CCM_ICV_16 = 16 +}; + +struct hash_op_params { + unsigned char mk_size; + unsigned char pad_align; + unsigned char auth_mode; + char hash_name[MAX_HASH_NAME]; + unsigned short block_size; + unsigned short word_size; + unsigned short ipad_size; +}; + +struct phys_sge_pairs { + __be16 len[8]; + __be64 addr[8]; +}; + +struct phys_sge_parm { + unsigned int nents; + unsigned int obsize; + unsigned short qid; + unsigned char align; +}; + +struct crypto_result { + struct completion completion; + int err; +}; + +static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = { + SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, +}; + +static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = { + SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3, + SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7, +}; + +static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = { + SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, + SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7, +}; + +static const u64 sha384_init[SHA512_DIGEST_SIZE / 8] = { + SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3, + SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7, +}; + +static const u64 sha512_init[SHA512_DIGEST_SIZE / 8] = { + SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3, + SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7, +}; + +static inline void copy_hash_init_values(char *key, int digestsize) +{ + u8 i; + __be32 *dkey = (__be32 *)key; + u64 *ldkey = (u64 *)key; + __be64 *sha384 = (__be64 *)sha384_init; + __be64 *sha512 = (__be64 *)sha512_init; + + switch (digestsize) { + case SHA1_DIGEST_SIZE: + for (i = 0; i < SHA1_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha1_init[i]); + break; + case SHA224_DIGEST_SIZE: + for (i = 0; i < SHA224_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha224_init[i]); + break; + case SHA256_DIGEST_SIZE: + for (i = 0; i < SHA256_INIT_STATE; i++) + dkey[i] = cpu_to_be32(sha256_init[i]); + break; + case SHA384_DIGEST_SIZE: + for (i = 0; i < SHA384_INIT_STATE; i++) + ldkey[i] = be64_to_cpu(sha384[i]); + break; + case SHA512_DIGEST_SIZE: + for (i = 0; i < SHA512_INIT_STATE; i++) + ldkey[i] = be64_to_cpu(sha512[i]); + break; + } +} + +static const u8 sgl_lengths[20] = { + 0, 1, 2, 3, 4, 4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15 +}; + +/* Number of len fields(8) * size of one addr field */ +#define PHYSDSGL_MAX_LEN_SIZE 16 + +static inline u16 get_space_for_phys_dsgl(unsigned int sgl_entr) +{ + /* len field size + addr field size */ + return ((sgl_entr >> 3) + ((sgl_entr % 8) ? + 1 : 0)) * PHYSDSGL_MAX_LEN_SIZE + + (sgl_entr << 3) + ((sgl_entr % 2 ? 1 : 0) << 3); +} + +/* The AES s-transform matrix (s-box). */ +static const u8 aes_sbox[256] = { + 99, 124, 119, 123, 242, 107, 111, 197, 48, 1, 103, 43, 254, 215, + 171, 118, 202, 130, 201, 125, 250, 89, 71, 240, 173, 212, 162, 175, + 156, 164, 114, 192, 183, 253, 147, 38, 54, 63, 247, 204, 52, 165, + 229, 241, 113, 216, 49, 21, 4, 199, 35, 195, 24, 150, 5, 154, 7, + 18, 128, 226, 235, 39, 178, 117, 9, 131, 44, 26, 27, 110, 90, + 160, 82, 59, 214, 179, 41, 227, 47, 132, 83, 209, 0, 237, 32, + 252, 177, 91, 106, 203, 190, 57, 74, 76, 88, 207, 208, 239, 170, + 251, 67, 77, 51, 133, 69, 249, 2, 127, 80, 60, 159, 168, 81, + 163, 64, 143, 146, 157, 56, 245, 188, 182, 218, 33, 16, 255, 243, + 210, 205, 12, 19, 236, 95, 151, 68, 23, 196, 167, 126, 61, 100, + 93, 25, 115, 96, 129, 79, 220, 34, 42, 144, 136, 70, 238, 184, + 20, 222, 94, 11, 219, 224, 50, 58, 10, 73, 6, 36, 92, 194, + 211, 172, 98, 145, 149, 228, 121, 231, 200, 55, 109, 141, 213, 78, + 169, 108, 86, 244, 234, 101, 122, 174, 8, 186, 120, 37, 46, 28, 166, + 180, 198, 232, 221, 116, 31, 75, 189, 139, 138, 112, 62, 181, 102, + 72, 3, 246, 14, 97, 53, 87, 185, 134, 193, 29, 158, 225, 248, + 152, 17, 105, 217, 142, 148, 155, 30, 135, 233, 206, 85, 40, 223, + 140, 161, 137, 13, 191, 230, 66, 104, 65, 153, 45, 15, 176, 84, + 187, 22 +}; + +static u32 aes_ks_subword(const u32 w) +{ + u8 bytes[4]; + + *(u32 *)(&bytes[0]) = w; + bytes[0] = aes_sbox[bytes[0]]; + bytes[1] = aes_sbox[bytes[1]]; + bytes[2] = aes_sbox[bytes[2]]; + bytes[3] = aes_sbox[bytes[3]]; + return *(u32 *)(&bytes[0]); +} + +static u32 round_constant[11] = { + 0x01000000, 0x02000000, 0x04000000, 0x08000000, + 0x10000000, 0x20000000, 0x40000000, 0x80000000, + 0x1B000000, 0x36000000, 0x6C000000 +}; + +/* dec_key - OUTPUT - Reverse round key + * key - INPUT - key + * keylength - INPUT - length of the key in number of bits + */ +static inline void get_aes_decrypt_key(unsigned char *dec_key, + const unsigned char *key, + unsigned int keylength) +{ + u32 temp; + u32 w_ring[MAX_NK]; + int i, j, k = 0; + u8 nr, nk; + + switch (keylength) { + case AES_KEYLENGTH_128BIT: + nk = KEYLENGTH_4BYTES; + nr = NUMBER_OF_ROUNDS_10; + break; + + case AES_KEYLENGTH_192BIT: + nk = KEYLENGTH_6BYTES; + nr = NUMBER_OF_ROUNDS_12; + break; + case AES_KEYLENGTH_256BIT: + nk = KEYLENGTH_8BYTES; + nr = NUMBER_OF_ROUNDS_14; + break; + default: + return; + } + for (i = 0; i < nk; i++ ) + w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]); + + i = 0; + temp = w_ring[nk - 1]; + while(i + nk < (nr + 1) * 4) { + if(!(i % nk)) { + /* RotWord(temp) */ + temp = (temp << 8) | (temp >> 24); + temp = aes_ks_subword(temp); + temp ^= round_constant[i / nk]; + } + else if (nk == 8 && (i % 4 == 0)) + temp = aes_ks_subword(temp); + w_ring[i % nk] ^= temp; + temp = w_ring[i % nk]; + i++; + } + for (k = 0, j = i % nk; k < nk; k++) { + *((u32 *)dec_key + k) = htonl(w_ring[j]); + j--; + if(j < 0) + j += nk; + } +} + +#endif /* __CHCR_ALGO_H__ */ diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c new file mode 100644 index 000000000000..fb5f9bbfa09c --- /dev/null +++ b/drivers/crypto/chelsio/chcr_core.c @@ -0,0 +1,238 @@ +/** + * This file is part of the Chelsio T4/T5/T6 Ethernet driver for Linux. + * + * Copyright (C) 2011-2016 Chelsio Communications. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation. + * + * Written and Maintained by: + * Manoj Malviya (manojmalviya@chelsio.com) + * Atul Gupta (atul.gupta@chelsio.com) + * Jitendra Lulla (jlulla@chelsio.com) + * Yeshaswi M R Gowda (yeshaswi@chelsio.com) + * Harsh Jain (harsh@chelsio.com) + */ + +#include +#include +#include + +#include +#include + +#include "t4_msg.h" +#include "chcr_core.h" +#include "cxgb4_uld.h" + +static LIST_HEAD(uld_ctx_list); +static DEFINE_MUTEX(dev_mutex); +static atomic_t dev_count; + +typedef int (*chcr_handler_func)(struct chcr_dev *dev, unsigned char *input); +static int cpl_fw6_pld_handler(struct chcr_dev *dev, unsigned char *input); +static void *chcr_uld_add(const struct cxgb4_lld_info *lld); +static int chcr_uld_state_change(void *handle, enum cxgb4_state state); + +static chcr_handler_func work_handlers[NUM_CPL_CMDS] = { + [CPL_FW6_PLD] = cpl_fw6_pld_handler, +}; + +static struct cxgb4_uld_info chcr_uld_info = { + .name = DRV_MODULE_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .add = chcr_uld_add, + .state_change = chcr_uld_state_change, + .rx_handler = chcr_uld_rx_handler, +}; + +int assign_chcr_device(struct chcr_dev **dev) +{ + struct uld_ctx *u_ctx; + + /* + * Which device to use if multiple devices are available TODO + * May be select the device based on round robin. One session + * must go to the same device to maintain the ordering. + */ + mutex_lock(&dev_mutex); /* TODO ? */ + u_ctx = list_first_entry(&uld_ctx_list, struct uld_ctx, entry); + if (!u_ctx) { + mutex_unlock(&dev_mutex); + return -ENXIO; + } + + *dev = u_ctx->dev; + mutex_unlock(&dev_mutex); + return 0; +} + +static int chcr_dev_add(struct uld_ctx *u_ctx) +{ + struct chcr_dev *dev; + + dev = kzalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) + return -ENXIO; + + spin_lock_init(&dev->lock_chcr_dev); + u_ctx->dev = dev; + dev->u_ctx = u_ctx; + atomic_inc(&dev_count); + return 0; +} + +static int chcr_dev_remove(struct uld_ctx *u_ctx) +{ + kfree(u_ctx->dev); + u_ctx->dev = NULL; + atomic_dec(&dev_count); + return 0; +} + +static int cpl_fw6_pld_handler(struct chcr_dev *dev, + unsigned char *input) +{ + struct crypto_async_request *req; + struct cpl_fw6_pld *fw6_pld; + u32 ack_err_status = 0; + int error_status = 0; + + fw6_pld = (struct cpl_fw6_pld *)input; + req = (struct crypto_async_request *)(uintptr_t)be64_to_cpu( + fw6_pld->data[1]); + + ack_err_status = + ntohl(*(__be32 *)((unsigned char *)&fw6_pld->data[0] + 4)); + if (ack_err_status) { + if (CHK_MAC_ERR_BIT(ack_err_status) || + CHK_PAD_ERR_BIT(ack_err_status)) + error_status = -EINVAL; + } + /* call completion callback with failure status */ + if (req) { + if (!chcr_handle_resp(req, input, error_status)) + req->complete(req, error_status); + else + return -EINVAL; + } else { + pr_err("Incorrect request address from the firmware\n"); + return -EFAULT; + } + return 0; +} + +int chcr_send_wr(struct sk_buff *skb) +{ + return cxgb4_ofld_send(skb->dev, skb); +} + +static void *chcr_uld_add(const struct cxgb4_lld_info *lld) +{ + struct uld_ctx *u_ctx; + + /* Create the device and add it in the device list */ + u_ctx = kzalloc(sizeof(*u_ctx), GFP_KERNEL); + if (!u_ctx) { + u_ctx = ERR_PTR(-ENOMEM); + goto out; + } + u_ctx->lldi = *lld; + mutex_lock(&dev_mutex); + list_add_tail(&u_ctx->entry, &uld_ctx_list); + mutex_unlock(&dev_mutex); +out: + return u_ctx; +} + +int chcr_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *pgl) +{ + struct uld_ctx *u_ctx = (struct uld_ctx *)handle; + struct chcr_dev *dev = u_ctx->dev; + const struct cpl_act_establish *rpl = (struct cpl_act_establish + *)rsp; + + if (rpl->ot.opcode != CPL_FW6_PLD) { + pr_err("Unsupported opcode\n"); + return 0; + } + + if (!pgl) + work_handlers[rpl->ot.opcode](dev, (unsigned char *)&rsp[1]); + else + work_handlers[rpl->ot.opcode](dev, pgl->va); + return 0; +} + +static int chcr_uld_state_change(void *handle, enum cxgb4_state state) +{ + struct uld_ctx *u_ctx = handle; + int ret = 0; + + switch (state) { + case CXGB4_STATE_UP: + if (!u_ctx->dev) { + ret = chcr_dev_add(u_ctx); + if (ret != 0) + return ret; + } + if (atomic_read(&dev_count) == 1) + ret = start_crypto(); + break; + + case CXGB4_STATE_DETACH: + if (u_ctx->dev) { + mutex_lock(&dev_mutex); + chcr_dev_remove(u_ctx); + mutex_unlock(&dev_mutex); + } + if (!atomic_read(&dev_count)) + stop_crypto(); + break; + + case CXGB4_STATE_START_RECOVERY: + case CXGB4_STATE_DOWN: + default: + break; + } + return ret; +} + +static int __init chcr_crypto_init(void) +{ + if (cxgb4_register_uld(CXGB4_ULD_CRYPTO, &chcr_uld_info)) { + pr_err("ULD register fail: No chcr crypto support in cxgb4"); + return -1; + } + + return 0; +} + +static void __exit chcr_crypto_exit(void) +{ + struct uld_ctx *u_ctx, *tmp; + + if (atomic_read(&dev_count)) + stop_crypto(); + + /* Remove all devices from list */ + mutex_lock(&dev_mutex); + list_for_each_entry_safe(u_ctx, tmp, &uld_ctx_list, entry) { + if (u_ctx->dev) + chcr_dev_remove(u_ctx); + kfree(u_ctx); + } + mutex_unlock(&dev_mutex); + cxgb4_unregister_uld(CXGB4_ULD_CRYPTO); +} + +module_init(chcr_crypto_init); +module_exit(chcr_crypto_exit); + +MODULE_DESCRIPTION("Crypto Co-processor for Chelsio Terminator cards."); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Chelsio Communications"); +MODULE_VERSION(DRV_VERSION); diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h new file mode 100644 index 000000000000..2a5c671a4232 --- /dev/null +++ b/drivers/crypto/chelsio/chcr_core.h @@ -0,0 +1,80 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_CORE_H__ +#define __CHCR_CORE_H__ + +#include +#include "t4_hw.h" +#include "cxgb4.h" +#include "cxgb4_uld.h" + +#define DRV_MODULE_NAME "chcr" +#define DRV_VERSION "1.0.0.0" + +#define MAX_PENDING_REQ_TO_HW 20 +#define CHCR_TEST_RESPONSE_TIMEOUT 1000 + +#define PAD_ERROR_BIT 1 +#define CHK_PAD_ERR_BIT(x) (((x) >> PAD_ERROR_BIT) & 1) + +#define MAC_ERROR_BIT 0 +#define CHK_MAC_ERR_BIT(x) (((x) >> MAC_ERROR_BIT) & 1) + +struct uld_ctx; + +struct chcr_dev { + /* Request submited to h/w and waiting for response. */ + spinlock_t lock_chcr_dev; + struct crypto_queue pending_queue; + struct uld_ctx *u_ctx; + unsigned char tx_channel_id; +}; + +struct uld_ctx { + struct list_head entry; + struct cxgb4_lld_info lldi; + struct chcr_dev *dev; +}; + +int assign_chcr_device(struct chcr_dev **dev); +int chcr_send_wr(struct sk_buff *skb); +int start_crypto(void); +int stop_crypto(void); +int chcr_uld_rx_handler(void *handle, const __be64 *rsp, + const struct pkt_gl *pgl); +int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input, + int err); +#endif /* __CHCR_CORE_H__ */ diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h new file mode 100644 index 000000000000..d7d75605da8b --- /dev/null +++ b/drivers/crypto/chelsio/chcr_crypto.h @@ -0,0 +1,203 @@ +/* + * This file is part of the Chelsio T6 Crypto driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + */ + +#ifndef __CHCR_CRYPTO_H__ +#define __CHCR_CRYPTO_H__ + +/* Define following if h/w is not dropping the AAD and IV data before + * giving the processed data + */ + +#define CHCR_CRA_PRIORITY 300 + +#define CHCR_AES_MAX_KEY_LEN (2 * (AES_MAX_KEY_SIZE)) /* consider xts */ +#define CHCR_MAX_CRYPTO_IV_LEN 16 /* AES IV len */ + +#define CHCR_MAX_AUTHENC_AES_KEY_LEN 32 /* max aes key length*/ +#define CHCR_MAX_AUTHENC_SHA_KEY_LEN 128 /* max sha key length*/ + +#define CHCR_GIVENCRYPT_OP 2 +/* CPL/SCMD parameters */ + +#define CHCR_ENCRYPT_OP 0 +#define CHCR_DECRYPT_OP 1 + +#define CHCR_SCMD_SEQ_NO_CTRL_32BIT 1 +#define CHCR_SCMD_SEQ_NO_CTRL_48BIT 2 +#define CHCR_SCMD_SEQ_NO_CTRL_64BIT 3 + +#define CHCR_SCMD_PROTO_VERSION_GENERIC 4 + +#define CHCR_SCMD_AUTH_CTRL_AUTH_CIPHER 0 +#define CHCR_SCMD_AUTH_CTRL_CIPHER_AUTH 1 + +#define CHCR_SCMD_CIPHER_MODE_NOP 0 +#define CHCR_SCMD_CIPHER_MODE_AES_CBC 1 +#define CHCR_SCMD_CIPHER_MODE_GENERIC_AES 4 +#define CHCR_SCMD_CIPHER_MODE_AES_XTS 6 + +#define CHCR_SCMD_AUTH_MODE_NOP 0 +#define CHCR_SCMD_AUTH_MODE_SHA1 1 +#define CHCR_SCMD_AUTH_MODE_SHA224 2 +#define CHCR_SCMD_AUTH_MODE_SHA256 3 +#define CHCR_SCMD_AUTH_MODE_SHA512_224 5 +#define CHCR_SCMD_AUTH_MODE_SHA512_256 6 +#define CHCR_SCMD_AUTH_MODE_SHA512_384 7 +#define CHCR_SCMD_AUTH_MODE_SHA512_512 8 + +#define CHCR_SCMD_HMAC_CTRL_NOP 0 +#define CHCR_SCMD_HMAC_CTRL_NO_TRUNC 1 + +#define CHCR_SCMD_IVGEN_CTRL_HW 0 +#define CHCR_SCMD_IVGEN_CTRL_SW 1 +/* This are not really mac key size. They are intermediate values + * of sha engine and its size + */ +#define CHCR_KEYCTX_MAC_KEY_SIZE_128 0 +#define CHCR_KEYCTX_MAC_KEY_SIZE_160 1 +#define CHCR_KEYCTX_MAC_KEY_SIZE_192 2 +#define CHCR_KEYCTX_MAC_KEY_SIZE_256 3 +#define CHCR_KEYCTX_MAC_KEY_SIZE_512 4 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_128 0 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_192 1 +#define CHCR_KEYCTX_CIPHER_KEY_SIZE_256 2 +#define CHCR_KEYCTX_NO_KEY 15 + +#define CHCR_CPL_FW4_PLD_IV_OFFSET (5 * 64) /* bytes. flt #5 and #6 */ +#define CHCR_CPL_FW4_PLD_HASH_RESULT_OFFSET (7 * 64) /* bytes. flt #7 */ +#define CHCR_CPL_FW4_PLD_DATA_SIZE (4 * 64) /* bytes. flt #4 to #7 */ + +#define KEY_CONTEXT_HDR_SALT_AND_PAD 16 +#define flits_to_bytes(x) (x * 8) + +#define IV_NOP 0 +#define IV_IMMEDIATE 1 +#define IV_DSGL 2 + +#define CRYPTO_ALG_SUB_TYPE_MASK 0x0f000000 +#define CRYPTO_ALG_SUB_TYPE_HASH_HMAC 0x01000000 +#define CRYPTO_ALG_TYPE_HMAC (CRYPTO_ALG_TYPE_AHASH |\ + CRYPTO_ALG_SUB_TYPE_HASH_HMAC) + +#define MAX_SALT 4 +#define MAX_SCRATCH_PAD_SIZE 32 + +#define CHCR_HASH_MAX_BLOCK_SIZE_64 64 +#define CHCR_HASH_MAX_BLOCK_SIZE_128 128 + +/* Aligned to 128 bit boundary */ +struct _key_ctx { + __be32 ctx_hdr; + u8 salt[MAX_SALT]; + __be64 reserverd; + unsigned char key[0]; +}; + +struct ablk_ctx { + u8 enc; + unsigned int processed_len; + __be32 key_ctx_hdr; + unsigned int enckey_len; + unsigned int dst_nents; + struct scatterlist iv_sg; + u8 key[CHCR_AES_MAX_KEY_LEN]; + u8 iv[CHCR_MAX_CRYPTO_IV_LEN]; + unsigned char ciph_mode; +}; + +struct hmac_ctx { + struct shash_desc *desc; + u8 ipad[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 opad[CHCR_HASH_MAX_BLOCK_SIZE_128]; +}; + +struct __crypto_ctx { + struct hmac_ctx hmacctx[0]; + struct ablk_ctx ablkctx[0]; +}; + +struct chcr_context { + struct chcr_dev *dev; + unsigned char tx_channel_id; + struct __crypto_ctx crypto_ctx[0]; +}; + +struct chcr_ahash_req_ctx { + u32 result; + char bfr[CHCR_HASH_MAX_BLOCK_SIZE_128]; + u8 bfr_len; + /* DMA the partial hash in it */ + u8 partial_hash[CHCR_HASH_MAX_DIGEST_SIZE]; + u64 data_len; /* Data len till time */ + void *dummy_payload_ptr; + /* SKB which is being sent to the hardware for processing */ + struct sk_buff *skb; +}; + +struct chcr_blkcipher_req_ctx { + struct sk_buff *skb; +}; + +struct chcr_alg_template { + u32 type; + u32 is_registered; + union { + struct crypto_alg crypto; + struct ahash_alg hash; + } alg; +}; + +struct chcr_req_ctx { + union { + struct ahash_request *ahash_req; + struct ablkcipher_request *ablk_req; + } req; + union { + struct chcr_ahash_req_ctx *ahash_ctx; + struct chcr_blkcipher_req_ctx *ablk_ctx; + } ctx; +}; + +struct sge_opaque_hdr { + void *dev; + dma_addr_t addr[MAX_SKB_FRAGS + 1]; +}; + +typedef struct sk_buff *(*create_wr_t)(struct crypto_async_request *req, + struct chcr_context *ctx, + unsigned short qid, + unsigned short op_type); + +#endif /* __CHCR_CRYPTO_H__ */ diff --git a/drivers/infiniband/hw/cxgb4/Kconfig b/drivers/infiniband/hw/cxgb4/Kconfig index 23f38cf2c5cd..afe8b28e0878 100644 --- a/drivers/infiniband/hw/cxgb4/Kconfig +++ b/drivers/infiniband/hw/cxgb4/Kconfig @@ -1,6 +1,7 @@ config INFINIBAND_CXGB4 tristate "Chelsio T4/T5 RDMA Driver" depends on CHELSIO_T4 && INET && (IPV6 || IPV6=n) + select CHELSIO_LIB select GENERIC_ALLOCATOR ---help--- This is an iWARP/RDMA driver for the Chelsio T4 and T5 diff --git a/drivers/infiniband/hw/cxgb4/Makefile b/drivers/infiniband/hw/cxgb4/Makefile index e11cf7299945..fa40b685831b 100644 --- a/drivers/infiniband/hw/cxgb4/Makefile +++ b/drivers/infiniband/hw/cxgb4/Makefile @@ -1,4 +1,5 @@ ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 +ccflags-y += -Idrivers/net/ethernet/chelsio/libcxgb obj-$(CONFIG_INFINIBAND_CXGB4) += iw_cxgb4.o diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c index 80f988984f44..71c8867ef66b 100644 --- a/drivers/infiniband/hw/cxgb4/cm.c +++ b/drivers/infiniband/hw/cxgb4/cm.c @@ -49,6 +49,7 @@ #include +#include #include "iw_cxgb4.h" #include "clip_tbl.h" @@ -239,15 +240,13 @@ int c4iw_ofld_send(struct c4iw_rdev *rdev, struct sk_buff *skb) static void release_tid(struct c4iw_rdev *rdev, u32 hwtid, struct sk_buff *skb) { - struct cpl_tid_release *req; + u32 len = roundup(sizeof(struct cpl_tid_release), 16); - skb = get_skb(skb, sizeof *req, GFP_KERNEL); + skb = get_skb(skb, len, GFP_KERNEL); if (!skb) return; - req = (struct cpl_tid_release *) skb_put(skb, sizeof(*req)); - INIT_TP_WR(req, hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, hwtid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + + cxgb_mk_tid_release(skb, len, hwtid, 0); c4iw_ofld_send(rdev, skb); return; } @@ -466,72 +465,6 @@ static struct net_device *get_real_dev(struct net_device *egress_dev) return rdma_vlan_dev_real_dev(egress_dev) ? : egress_dev; } -static int our_interface(struct c4iw_dev *dev, struct net_device *egress_dev) -{ - int i; - - egress_dev = get_real_dev(egress_dev); - for (i = 0; i < dev->rdev.lldi.nports; i++) - if (dev->rdev.lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry *find_route6(struct c4iw_dev *dev, __u8 *local_ip, - __u8 *peer_ip, __be16 local_port, - __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!our_interface(dev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } - -out: - return dst; -} - -static struct dst_entry *find_route(struct c4iw_dev *dev, __be32 local_ip, - __be32 peer_ip, __be16 local_port, - __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!our_interface(dev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void arp_failure_discard(void *handle, struct sk_buff *skb) { pr_err(MOD "ARP failure\n"); @@ -706,58 +639,34 @@ static int send_flowc(struct c4iw_ep *ep) static int send_halfclose(struct c4iw_ep *ep) { - struct cpl_close_con_req *req; struct sk_buff *skb = skb_dequeue(&ep->com.ep_skb_list); - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_close_con_req), 16); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!skb)) return -ENOMEM; - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(skb, NULL, arp_failure_discard); - req = (struct cpl_close_con_req *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - ep->hwtid)); + cxgb_mk_close_con_req(skb, wrlen, ep->hwtid, ep->txq_idx, + NULL, arp_failure_discard); + return c4iw_l2t_send(&ep->com.dev->rdev, skb, ep->l2t); } static int send_abort(struct c4iw_ep *ep) { - struct cpl_abort_req *req; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_abort_req), 16); struct sk_buff *req_skb = skb_dequeue(&ep->com.ep_skb_list); PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid); if (WARN_ON(!req_skb)) return -ENOMEM; - set_wr_txq(req_skb, CPL_PRIORITY_DATA, ep->txq_idx); - t4_set_arp_err_handler(req_skb, ep, abort_arp_failure); - req = (struct cpl_abort_req *)skb_put(req_skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, ep->hwtid)); - req->cmd = CPL_ABORT_SEND_RST; + cxgb_mk_abort_req(req_skb, wrlen, ep->hwtid, ep->txq_idx, + ep, abort_arp_failure); + return c4iw_l2t_send(&ep->com.dev->rdev, req_skb, ep->l2t); } -static void best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? - sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? - round_up(TCPOLEN_TIMESTAMP, 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static int send_connect(struct c4iw_ep *ep) { struct cpl_act_open_req *req = NULL; @@ -770,7 +679,7 @@ static int send_connect(struct c4iw_ep *ep) u64 opt0; u32 opt2; unsigned int mtu_idx; - int wscale; + u32 wscale; int win, sizev4, sizev6, wrlen; struct sockaddr_in *la = (struct sockaddr_in *) &ep->com.local_addr; @@ -817,10 +726,10 @@ static int send_connect(struct c4iw_ep *ep) } set_wr_txq(skb, CPL_PRIORITY_SETUP, ep->ctrlq_idx); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -1447,9 +1356,9 @@ static void established_upcall(struct c4iw_ep *ep) static int update_rx_credits(struct c4iw_ep *ep, u32 credits) { - struct cpl_rx_data_ack *req; struct sk_buff *skb; - int wrlen = roundup(sizeof *req, 16); + u32 wrlen = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; PDBG("%s ep %p tid %u credits %u\n", __func__, ep, ep->hwtid, credits); skb = get_skb(NULL, wrlen, GFP_KERNEL); @@ -1466,15 +1375,12 @@ static int update_rx_credits(struct c4iw_ep *ep, u32 credits) if (ep->rcv_win > RCV_BUFSIZ_M * 1024) credits += ep->rcv_win - RCV_BUFSIZ_M * 1024; - req = (struct cpl_rx_data_ack *) skb_put(skb, wrlen); - memset(req, 0, wrlen); - INIT_TP_WR(req, ep->hwtid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - ep->hwtid)); - req->credit_dack = cpu_to_be32(credits | RX_FORCE_ACK_F | - RX_DACK_CHANGE_F | - RX_DACK_MODE_V(dack_mode)); - set_wr_txq(skb, CPL_PRIORITY_ACK, ep->ctrlq_idx); + credit_dack = credits | RX_FORCE_ACK_F | RX_DACK_CHANGE_F | + RX_DACK_MODE_V(dack_mode); + + cxgb_mk_rx_data_ack(skb, wrlen, ep->hwtid, ep->ctrlq_idx, + credit_dack); + c4iw_ofld_send(&ep->com.dev->rdev, skb); return credits; } @@ -1972,7 +1878,7 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) struct sk_buff *skb; struct fw_ofld_connection_wr *req; unsigned int mtu_idx; - int wscale; + u32 wscale; struct sockaddr_in *sin; int win; @@ -1997,10 +1903,10 @@ static int send_fw_act_open_req(struct c4iw_ep *ep, unsigned int atid) htons(FW_OFLD_CONNECTION_WR_CPLRXDATAACK_F); req->tcb.tx_max = (__force __be32) jiffies; req->tcb.rcv_adv = htons(1); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2054,15 +1960,6 @@ static inline int act_open_has_tid(int status) status != CPL_ERR_CONN_EXIST); } -/* Returns whether a CPL status conveys negative advice. - */ -static int is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static char *neg_adv_str(unsigned int status) { switch (status) { @@ -2218,16 +2115,21 @@ static int c4iw_reconnect(struct c4iw_ep *ep) /* find a route */ if (ep->com.cm_id->m_local_addr.ss_family == AF_INET) { - ep->dst = find_route(ep->com.dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, ep->com.cm_id->tos); + ep->dst = cxgb_find_route(&ep->com.dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, ep->com.cm_id->tos); iptype = 4; ra = (__u8 *)&raddr->sin_addr; } else { - ep->dst = find_route6(ep->com.dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&ep->com.dev->rdev.lldi, + get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; } @@ -2299,7 +2201,7 @@ static int act_open_rpl(struct c4iw_dev *dev, struct sk_buff *skb) PDBG("%s ep %p atid %u status %u errno %d\n", __func__, ep, atid, status, status2errno(status)); - if (is_neg_adv(status)) { + if (cxgb_is_neg_adv(status)) { PDBG("%s Connection problems for atid %u status %u (%s)\n", __func__, atid, status, neg_adv_str(status)); ep->stats.connect_neg_adv++; @@ -2426,7 +2328,7 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, unsigned int mtu_idx; u64 opt0; u32 opt2; - int wscale; + u32 wscale; struct cpl_t5_pass_accept_rpl *rpl5 = NULL; int win; enum chip_type adapter_type = ep->com.dev->rdev.lldi.adapter_type; @@ -2447,10 +2349,10 @@ static int accept_cr(struct c4iw_ep *ep, struct sk_buff *skb, OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, ep->hwtid)); - best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, - enable_tcp_timestamps && req->tcpopt.tstamp, - (AF_INET == ep->com.remote_addr.ss_family) ? 0 : 1); - wscale = compute_wscale(rcv_win); + cxgb_best_mtu(ep->com.dev->rdev.lldi.mtus, ep->mtu, &mtu_idx, + enable_tcp_timestamps && req->tcpopt.tstamp, + (ep->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(rcv_win); /* * Specify the largest window that will fit in opt0. The @@ -2522,42 +2424,6 @@ static void reject_cr(struct c4iw_dev *dev, u32 hwtid, struct sk_buff *skb) return; } -static void get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, - int *iptype, __u8 *local_ip, __u8 *peer_ip, - __be16 *local_port, __be16 *peer_port) -{ - int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? - IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : - T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - PDBG("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", __func__, - ntohl(ip->saddr), ntohl(ip->daddr), ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - PDBG("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - *peer_port = tcp->source; - *local_port = tcp->dest; - - return; -} - static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) { struct c4iw_ep *child_ep = NULL, *parent_ep; @@ -2586,8 +2452,8 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) goto reject; } - get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, &iptype, - local_ip, peer_ip, &local_port, &peer_port); + cxgb_get_4tuple(req, parent_ep->com.dev->rdev.lldi.adapter_type, + &iptype, local_ip, peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -2595,18 +2461,19 @@ static int pass_accept_req(struct c4iw_dev *dev, struct sk_buff *skb) , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route(dev, *(__be32 *)local_ip, *(__be32 *)peer_ip, - local_port, peer_port, - tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + *(__be32 *)local_ip, *(__be32 *)peer_ip, + local_port, peer_port, tos); } else { PDBG("%s parent ep %p hwtid %u laddr %pI6 raddr %pI6 lport %d rport %d peer_mss %d\n" , __func__, parent_ep, hwtid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = find_route6(dev, local_ip, peer_ip, local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &parent_ep->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + local_ip, peer_ip, local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &parent_ep->com.local_addr)->sin6_scope_id); } if (!dst) { printk(KERN_ERR MOD "%s - failed to find dst entry!\n", @@ -2839,18 +2706,18 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) { struct cpl_abort_req_rss *req = cplhdr(skb); struct c4iw_ep *ep; - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; struct c4iw_qp_attributes attrs; int ret; int release = 0; unsigned int tid = GET_TID(req); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); ep = get_ep_from_tid(dev, tid); if (!ep) return 0; - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); @@ -2943,11 +2810,9 @@ static int peer_abort(struct c4iw_dev *dev, struct sk_buff *skb) release = 1; goto out; } - set_wr_txq(skb, CPL_PRIORITY_DATA, ep->txq_idx); - rpl = (struct cpl_abort_rpl *) skb_put(rpl_skb, sizeof(*rpl)); - INIT_TP_WR(rpl, ep->hwtid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, ep->hwtid)); - rpl->cmd = CPL_ABORT_NO_RST; + + cxgb_mk_abort_rpl(rpl_skb, len, ep->hwtid, ep->txq_idx); + c4iw_ofld_send(&ep->com.dev->rdev, rpl_skb); out: if (release) @@ -3379,9 +3244,11 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) PDBG("%s saddr %pI4 sport 0x%x raddr %pI4 rport 0x%x\n", __func__, &laddr->sin_addr, ntohs(laddr->sin_port), ra, ntohs(raddr->sin_port)); - ep->dst = find_route(dev, laddr->sin_addr.s_addr, - raddr->sin_addr.s_addr, laddr->sin_port, - raddr->sin_port, cm_id->tos); + ep->dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + laddr->sin_addr.s_addr, + raddr->sin_addr.s_addr, + laddr->sin_port, + raddr->sin_port, cm_id->tos); } else { iptype = 6; ra = (__u8 *)&raddr6->sin6_addr; @@ -3400,10 +3267,12 @@ int c4iw_connect(struct iw_cm_id *cm_id, struct iw_cm_conn_param *conn_param) __func__, laddr6->sin6_addr.s6_addr, ntohs(laddr6->sin6_port), raddr6->sin6_addr.s6_addr, ntohs(raddr6->sin6_port)); - ep->dst = find_route6(dev, laddr6->sin6_addr.s6_addr, - raddr6->sin6_addr.s6_addr, - laddr6->sin6_port, raddr6->sin6_port, 0, - raddr6->sin6_scope_id); + ep->dst = cxgb_find_route6(&dev->rdev.lldi, get_real_dev, + laddr6->sin6_addr.s6_addr, + raddr6->sin6_addr.s6_addr, + laddr6->sin6_port, + raddr6->sin6_port, 0, + raddr6->sin6_scope_id); } if (!ep->dst) { printk(KERN_ERR MOD "%s - cannot find route.\n", __func__); @@ -4045,8 +3914,9 @@ static int rx_pkt(struct c4iw_dev *dev, struct sk_buff *skb) ntohl(iph->daddr), ntohs(tcph->dest), ntohl(iph->saddr), ntohs(tcph->source), iph->tos); - dst = find_route(dev, iph->daddr, iph->saddr, tcph->dest, tcph->source, - iph->tos); + dst = cxgb_find_route(&dev->rdev.lldi, get_real_dev, + iph->daddr, iph->saddr, tcph->dest, + tcph->source, iph->tos); if (!dst) { pr_err("%s - failed to find dst entry!\n", __func__); @@ -4321,7 +4191,7 @@ static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb) kfree_skb(skb); return 0; } - if (is_neg_adv(req->status)) { + if (cxgb_is_neg_adv(req->status)) { PDBG("%s Negative advice on abort- tid %u status %d (%s)\n", __func__, ep->hwtid, req->status, neg_adv_str(req->status)); diff --git a/drivers/infiniband/hw/cxgb4/device.c b/drivers/infiniband/hw/cxgb4/device.c index 3c4b2126e0d1..93e3d270a98a 100644 --- a/drivers/infiniband/hw/cxgb4/device.c +++ b/drivers/infiniband/hw/cxgb4/device.c @@ -1480,6 +1480,10 @@ static int c4iw_uld_control(void *handle, enum cxgb4_control control, ...) static struct cxgb4_uld_info c4iw_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 511, + .ciq = true, + .lro = false, .add = c4iw_uld_add, .rx_handler = c4iw_uld_rx_handler, .state_change = c4iw_uld_state_change, diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index 4b83b84f7ddf..cdcf3eeb6f4a 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -882,15 +882,6 @@ static inline struct c4iw_listen_ep *to_listen_ep(struct iw_cm_id *cm_id) return cm_id->provider_data; } -static inline int compute_wscale(int win) -{ - int wscale = 0; - - while (wscale < 14 && (65535<pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * ncont; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_db; } - mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, (*cqb)->pas, 0); - (*cqb)->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; + + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_ib_populate_pas(dev, cq->buf.umem, page_shift, pas, 0); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); *index = to_mucontext(context)->uuari.uars[0].index; @@ -816,9 +824,10 @@ static void init_cq_buf(struct mlx5_ib_cq *cq, struct mlx5_ib_cq_buf *buf) static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, int entries, int cqe_size, - struct mlx5_create_cq_mbox_in **cqb, - int *index, int *inlen) + u32 **cqb, int *index, int *inlen) { + __be64 *pas; + void *cqc; int err; err = mlx5_db_alloc(dev->mdev, &cq->db); @@ -835,15 +844,21 @@ static int create_cq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, init_cq_buf(cq, &cq->buf); - *inlen = sizeof(**cqb) + sizeof(*(*cqb)->pas) * cq->buf.buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_cq_in) + + MLX5_FLD_SZ_BYTES(create_cq_in, pas[0]) * cq->buf.buf.npages; *cqb = mlx5_vzalloc(*inlen); if (!*cqb) { err = -ENOMEM; goto err_buf; } - mlx5_fill_page_array(&cq->buf.buf, (*cqb)->pas); - (*cqb)->ctx.log_pg_sz = cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; + pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, *cqb, pas); + mlx5_fill_page_array(&cq->buf.buf, pas); + + cqc = MLX5_ADDR_OF(create_cq_in, *cqb, cq_context); + MLX5_SET(cqc, cqc, log_page_size, + cq->buf.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + *index = dev->mdev->priv.uuari.uars[0].index; return 0; @@ -877,11 +892,12 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, { int entries = attr->cqe; int vector = attr->comp_vector; - struct mlx5_create_cq_mbox_in *cqb = NULL; struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_cq *cq; int uninitialized_var(index); int uninitialized_var(inlen); + u32 *cqb = NULL; + void *cqc; int cqe_size; unsigned int irqn; int eqn; @@ -927,19 +943,20 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, INIT_WORK(&cq->notify_work, notify_soft_wc_handler); } - cq->cqe_size = cqe_size; - cqb->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - - if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) - cqb->ctx.cqe_sz_flags |= (1 << 1); - - cqb->ctx.log_sz_usr_page = cpu_to_be32((ilog2(entries) << 24) | index); err = mlx5_vector2eqn(dev->mdev, vector, &eqn, &irqn); if (err) goto err_cqb; - cqb->ctx.c_eqn = cpu_to_be16(eqn); - cqb->ctx.db_record_addr = cpu_to_be64(cq->db.dma); + cq->cqe_size = cqe_size; + + cqc = MLX5_ADDR_OF(create_cq_in, cqb, cq_context); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + MLX5_SET(cqc, cqc, uar_page, index); + MLX5_SET(cqc, cqc, c_eqn, eqn); + MLX5_SET64(cqc, cqc, dbr_addr, cq->db.dma); + if (cq->create_flags & IB_CQ_FLAGS_IGNORE_OVERRUN) + MLX5_SET(cqc, cqc, oi, 1); err = mlx5_core_create_cq(dev->mdev, &cq->mcq, cqb, inlen); if (err) @@ -1070,27 +1087,15 @@ void mlx5_ib_cq_clean(struct mlx5_ib_cq *cq, u32 qpn, struct mlx5_ib_srq *srq) int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period) { - struct mlx5_modify_cq_mbox_in *in; struct mlx5_ib_dev *dev = to_mdev(cq->device); struct mlx5_ib_cq *mcq = to_mcq(cq); int err; - u32 fsel; if (!MLX5_CAP_GEN(dev->mdev, cq_moderation)) return -ENOSYS; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - in->cqn = cpu_to_be32(mcq->mcq.cqn); - fsel = (MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); - in->ctx.cq_period = cpu_to_be16(cq_period); - in->ctx.cq_max_count = cpu_to_be16(cq_count); - in->field_select = cpu_to_be32(fsel); - err = mlx5_core_modify_cq(dev->mdev, &mcq->mcq, in, sizeof(*in)); - kfree(in); - + err = mlx5_core_modify_cq_moderation(dev->mdev, &mcq->mcq, + cq_period, cq_count); if (err) mlx5_ib_warn(dev, "modify cq 0x%x failed\n", mcq->mcq.cqn); @@ -1223,9 +1228,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(ibcq->device); struct mlx5_ib_cq *cq = to_mcq(ibcq); - struct mlx5_modify_cq_mbox_in *in; + void *cqc; + u32 *in; int err; int npas; + __be64 *pas; int page_shift; int inlen; int uninitialized_var(cqe_size); @@ -1267,28 +1274,37 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata) if (err) goto ex; - inlen = sizeof(*in) + npas * sizeof(in->pas[0]); + inlen = MLX5_ST_SZ_BYTES(modify_cq_in) + + MLX5_FLD_SZ_BYTES(modify_cq_in, pas[0]) * npas; + in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto ex_resize; } + pas = (__be64 *)MLX5_ADDR_OF(modify_cq_in, in, pas); if (udata) mlx5_ib_populate_pas(dev, cq->resize_umem, page_shift, - in->pas, 0); + pas, 0); else - mlx5_fill_page_array(&cq->resize_buf->buf, in->pas); + mlx5_fill_page_array(&cq->resize_buf->buf, pas); - in->field_select = cpu_to_be32(MLX5_MODIFY_CQ_MASK_LOG_SIZE | - MLX5_MODIFY_CQ_MASK_PG_OFFSET | - MLX5_MODIFY_CQ_MASK_PG_SIZE); - in->ctx.log_pg_sz = page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->ctx.cqe_sz_flags = cqe_sz_to_mlx_sz(cqe_size) << 5; - in->ctx.page_offset = 0; - in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(entries) << 24); - in->hdr.opmod = cpu_to_be16(MLX5_CQ_OPMOD_RESIZE); - in->cqn = cpu_to_be32(cq->mcq.cqn); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.resize_field_select.resize_field_select, + MLX5_MODIFY_CQ_MASK_LOG_SIZE | + MLX5_MODIFY_CQ_MASK_PG_OFFSET | + MLX5_MODIFY_CQ_MASK_PG_SIZE); + + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + + MLX5_SET(cqc, cqc, log_page_size, + page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(cqc, cqc, cqe_sz, cqe_sz_to_mlx_sz(cqe_size)); + MLX5_SET(cqc, cqc, log_cq_size, ilog2(entries)); + + MLX5_SET(modify_cq_in, in, op_mod, MLX5_CQ_OPMOD_RESIZE); + MLX5_SET(modify_cq_in, in, cqn, cq->mcq.cqn); err = mlx5_core_modify_cq(dev->mdev, &cq->mcq, in, inlen); if (err) diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index e19537cf44ab..551aa0e789aa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -232,23 +232,19 @@ static int set_roce_addr(struct ib_device *device, u8 port_num, const union ib_gid *gid, const struct ib_gid_attr *attr) { - struct mlx5_ib_dev *dev = to_mdev(device); - u32 in[MLX5_ST_SZ_DW(set_roce_address_in)]; - u32 out[MLX5_ST_SZ_DW(set_roce_address_out)]; + struct mlx5_ib_dev *dev = to_mdev(device); + u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0}; void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address); enum rdma_link_layer ll = mlx5_ib_port_link_layer(device, port_num); if (ll != IB_LINK_LAYER_ETHERNET) return -EINVAL; - memset(in, 0, sizeof(in)); - ib_gid_to_mlx5_roce_addr(gid, attr, in_addr); MLX5_SET(set_roce_address_in, in, roce_address_index, index); MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS); - - memset(out, 0, sizeof(out)); return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, sizeof(out)); } @@ -753,8 +749,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port, &props->active_width); if (err) goto out; - err = mlx5_query_port_proto_oper(mdev, &props->active_speed, MLX5_PTYS_IB, - port); + err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port); if (err) goto out; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 95146f4aa3e3..67cc7416fdff 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -505,7 +505,7 @@ struct mlx5_ib_mr { int umred; int npages; struct mlx5_ib_dev *dev; - struct mlx5_create_mkey_mbox_out out; + u32 out[MLX5_ST_SZ_DW(create_mkey_out)]; struct mlx5_core_sig_ctx *sig; int live; void *descs_alloc; diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 4b021305c321..6f7e34753abc 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -135,20 +135,10 @@ static void reg_mr_callback(int status, void *context) return; } - if (mr->out.hdr.status) { - mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", - mr->out.hdr.status, - be32_to_cpu(mr->out.hdr.syndrome)); - kfree(mr); - dev->fill_delay = 1; - mod_timer(&dev->delay_timer, jiffies + HZ); - return; - } - spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); key = dev->mdev->priv.mkey_key++; spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); - mr->mmkey.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; + mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; cache->last_add = jiffies; @@ -170,16 +160,19 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) { struct mlx5_mr_cache *cache = &dev->cache; struct mlx5_cache_ent *ent = &cache->ent[c]; - struct mlx5_create_mkey_mbox_in *in; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mr *mr; int npages = 1 << ent->order; + void *mkc; + u32 *in; int err = 0; int i; - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) return -ENOMEM; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); for (i = 0; i < num; i++) { if (ent->pending >= MAX_PENDING_REG_MR) { err = -EAGAIN; @@ -194,18 +187,22 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num) mr->order = ent->order; mr->umred = 1; mr->dev = dev; - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; - in->seg.log2_page_size = 12; + + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2); + MLX5_SET(mkc, mkc, log_page_size, 12); spin_lock_irq(&ent->lock); ent->pending++; spin_unlock_irq(&ent->lock); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, - sizeof(*in), reg_mr_callback, - mr, &mr->out); + err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, + in, inlen, + mr->out, sizeof(mr->out), + reg_mr_callback, mr); if (err) { spin_lock_irq(&ent->lock); ent->pending--; @@ -670,30 +667,38 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) { struct mlx5_ib_dev *dev = to_mdev(pd->device); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *seg; struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - seg = &in->seg; - seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; - seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); - seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - seg->start_addr = 0; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, sizeof(*in), NULL, NULL, - NULL); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET64(mkc, mkc, start_addr, 0); + + err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); if (err) goto err_in; @@ -1063,9 +1068,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, int page_shift, int access_flags) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; struct mlx5_ib_mr *mr; + __be64 *pas; + void *mkc; int inlen; + u32 *in; int err; bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); @@ -1073,31 +1080,41 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, if (!mr) return ERR_PTR(-ENOMEM); - inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; + inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + + sizeof(*pas) * ((npages + 1) / 2) * 2; in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_1; } - mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, + pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); + mlx5_ib_populate_pas(dev, umem, page_shift, pas, pg_cap ? MLX5_IB_MTT_PRESENT : 0); - /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags + /* The pg_access bit allows setting the access flags * in the page list submitted with the command. */ - in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; - in->seg.flags = convert_access(access_flags) | - MLX5_ACCESS_MODE_MTT; - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.start_addr = cpu_to_be64(virt_addr); - in->seg.len = cpu_to_be64(length); - in->seg.bsfs_octo_size = 0; - in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); - in->seg.log2_page_size = page_shift; - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, - 1 << page_shift)); - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen, NULL, - NULL, NULL); + MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); + + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); + MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); + MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); + MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); + MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); + MLX5_SET(mkc, mkc, lr, 1); + + MLX5_SET64(mkc, mkc, start_addr, virt_addr); + MLX5_SET64(mkc, mkc, len, length); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, bsf_octword_size, 0); + MLX5_SET(mkc, mkc, translations_octword_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + MLX5_SET(mkc, mkc, log_page_size, page_shift); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(create_mkey_in, in, translations_octword_actual_size, + get_octo_len(virt_addr, length, 1 << page_shift)); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) { mlx5_ib_warn(dev, "create mkey failed\n"); goto err_2; @@ -1523,30 +1540,32 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, u32 max_num_sg) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_ib_mr *mr; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); int ndescs = ALIGN(max_num_sg, 4); + struct mlx5_ib_mr *mr; + void *mkc; + u32 *in; int err; mr = kzalloc(sizeof(*mr), GFP_KERNEL); if (!mr) return ERR_PTR(-ENOMEM); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!in) { err = -ENOMEM; goto err_free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); if (mr_type == IB_MR_TYPE_MEM_REG) { - mr->access_mode = MLX5_ACCESS_MODE_MTT; - in->seg.log2_page_size = PAGE_SHIFT; - + mr->access_mode = MLX5_MKC_ACCESS_MODE_MTT; + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(u64)); if (err) @@ -1555,7 +1574,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, mr->desc_size = sizeof(u64); mr->max_descs = ndescs; } else if (mr_type == IB_MR_TYPE_SG_GAPS) { - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, sizeof(struct mlx5_klm)); @@ -1566,9 +1585,8 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, } else if (mr_type == IB_MR_TYPE_SIGNATURE) { u32 psv_index[2]; - in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | - MLX5_MKEY_BSF_EN); - in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); + MLX5_SET(mkc, mkc, bsf_en, 1); + MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); if (!mr->sig) { err = -ENOMEM; @@ -1581,7 +1599,7 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, if (err) goto err_free_sig; - mr->access_mode = MLX5_ACCESS_MODE_KLM; + mr->access_mode = MLX5_MKC_ACCESS_MODE_KLMS; mr->sig->psv_memory.psv_idx = psv_index[0]; mr->sig->psv_wire.psv_idx = psv_index[1]; @@ -1595,9 +1613,10 @@ struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, goto err_free_in; } - in->seg.flags = MLX5_PERM_UMR_EN | mr->access_mode; - err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + MLX5_SET(mkc, mkc, access_mode, mr->access_mode); + MLX5_SET(mkc, mkc, umr_en, 1); + + err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); if (err) goto err_destroy_psv; @@ -1633,8 +1652,10 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, struct ib_udata *udata) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct mlx5_create_mkey_mbox_in *in = NULL; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); struct mlx5_ib_mw *mw = NULL; + u32 *in = NULL; + void *mkc; int ndescs; int err; struct mlx5_ib_alloc_mw req = {}; @@ -1658,23 +1679,24 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); mw = kzalloc(sizeof(*mw), GFP_KERNEL); - in = kzalloc(sizeof(*in), GFP_KERNEL); + in = kzalloc(inlen, GFP_KERNEL); if (!mw || !in) { err = -ENOMEM; goto free; } - in->seg.status = MLX5_MKEY_STATUS_FREE; - in->seg.xlt_oct_size = cpu_to_be32(ndescs); - in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); - in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_KLM | - MLX5_PERM_LOCAL_READ; - if (type == IB_MW_TYPE_2) - in->seg.flags_pd |= cpu_to_be32(MLX5_MKEY_REMOTE_INVAL); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); - err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, sizeof(*in), - NULL, NULL, NULL); + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, translations_octword_size, ndescs); + MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_KLMS); + MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); if (err) goto free; @@ -1811,7 +1833,7 @@ int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, mr->desc_size * mr->max_descs, DMA_TO_DEVICE); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset); else n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index affc3f6598ca..9529b464fbdc 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -726,7 +726,7 @@ err_umem: static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct mlx5_ib_qp *qp, struct ib_udata *udata, struct ib_qp_init_attr *attr, - struct mlx5_create_qp_mbox_in **in, + u32 **in, struct mlx5_ib_create_qp_resp *resp, int *inlen, struct mlx5_ib_qp_base *base) { @@ -739,6 +739,8 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, u32 offset = 0; int uuarn; int ncont = 0; + __be64 *pas; + void *qpc; int err; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); @@ -795,20 +797,24 @@ static int create_user_qp(struct mlx5_ib_dev *dev, struct ib_pd *pd, ubuffer->umem = NULL; } - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * ncont; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * ncont; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_umem; } - if (ubuffer->umem) - mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, - (*in)->pas, 0); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); - (*in)->ctx.params2 = cpu_to_be32(offset << 6); - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); + pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas); + if (ubuffer->umem) + mlx5_ib_populate_pas(dev, ubuffer->umem, page_shift, pas, 0); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + + MLX5_SET(qpc, qpc, log_page_size, page_shift - MLX5_ADAPTER_PAGE_SHIFT); + MLX5_SET(qpc, qpc, page_offset, offset); + + MLX5_SET(qpc, qpc, uar_page, uar_index); resp->uuar_index = uuarn; qp->uuarn = uuarn; @@ -857,12 +863,13 @@ static void destroy_qp_user(struct ib_pd *pd, struct mlx5_ib_qp *qp, static int create_kernel_qp(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *init_attr, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in **in, int *inlen, + u32 **in, int *inlen, struct mlx5_ib_qp_base *base) { enum mlx5_ib_latency_class lc = MLX5_IB_LATENCY_CLASS_LOW; struct mlx5_uuar_info *uuari; int uar_index; + void *qpc; int uuarn; int err; @@ -902,25 +909,29 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev, } qp->sq.qend = mlx5_get_send_wqe(qp, qp->sq.wqe_cnt); - *inlen = sizeof(**in) + sizeof(*(*in)->pas) * qp->buf.npages; + *inlen = MLX5_ST_SZ_BYTES(create_qp_in) + + MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) * qp->buf.npages; *in = mlx5_vzalloc(*inlen); if (!*in) { err = -ENOMEM; goto err_buf; } - (*in)->ctx.qp_counter_set_usr_page = cpu_to_be32(uar_index); - (*in)->ctx.log_pg_sz_remote_qpn = - cpu_to_be32((qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT) << 24); + + qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc); + MLX5_SET(qpc, qpc, uar_page, uar_index); + MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + /* Set "fast registration enabled" for all kernel QPs */ - (*in)->ctx.params1 |= cpu_to_be32(1 << 11); - (*in)->ctx.sq_crq_size |= cpu_to_be16(1 << 4); + MLX5_SET(qpc, qpc, fre, 1); + MLX5_SET(qpc, qpc, rlky, 1); if (init_attr->create_flags & mlx5_ib_create_qp_sqpn_qp1()) { - (*in)->ctx.deth_sqpn = cpu_to_be32(1); + MLX5_SET(qpc, qpc, deth_sqpn, 1); qp->flags |= MLX5_IB_QP_SQPN_QP1; } - mlx5_fill_page_array(&qp->buf, (*in)->pas); + mlx5_fill_page_array(&qp->buf, + (__be64 *)MLX5_ADDR_OF(create_qp_in, *in, pas)); err = mlx5_db_alloc(dev->mdev, &qp->db); if (err) { @@ -974,15 +985,15 @@ static void destroy_qp_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) free_uuar(&dev->mdev->priv.uuari, qp->bf->uuarn); } -static __be32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) +static u32 get_rx_type(struct mlx5_ib_qp *qp, struct ib_qp_init_attr *attr) { if (attr->srq || (attr->qp_type == IB_QPT_XRC_TGT) || (attr->qp_type == IB_QPT_XRC_INI)) - return cpu_to_be32(MLX5_SRQ_RQ); + return MLX5_SRQ_RQ; else if (!qp->has_rq) - return cpu_to_be32(MLX5_ZERO_LEN_RQ); + return MLX5_ZERO_LEN_RQ; else - return cpu_to_be32(MLX5_NON_ZERO_RQ); + return MLX5_NON_ZERO_RQ; } static int is_connected(enum ib_qp_type qp_type) @@ -996,13 +1007,10 @@ static int is_connected(enum ib_qp_type qp_type) static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, struct mlx5_ib_sq *sq, u32 tdn) { - u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, transport_domain, tdn); - return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn); } @@ -1191,7 +1199,7 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev, } static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, struct ib_pd *pd) { struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; @@ -1462,18 +1470,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, struct ib_udata *udata, struct mlx5_ib_qp *qp) { struct mlx5_ib_resources *devr = &dev->devr; + int inlen = MLX5_ST_SZ_BYTES(create_qp_in); struct mlx5_core_dev *mdev = dev->mdev; - struct mlx5_ib_qp_base *base; struct mlx5_ib_create_qp_resp resp; - struct mlx5_create_qp_mbox_in *in; - struct mlx5_ib_create_qp ucmd; struct mlx5_ib_cq *send_cq; struct mlx5_ib_cq *recv_cq; unsigned long flags; - int inlen = sizeof(*in); - int err; u32 uidx = MLX5_IB_DEFAULT_UIDX; + struct mlx5_ib_create_qp ucmd; + struct mlx5_ib_qp_base *base; void *qpc; + u32 *in; + int err; base = init_attr->qp_type == IB_QPT_RAW_PACKET ? &qp->raw_packet_qp.rq.base : @@ -1601,7 +1609,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (err) return err; } else { - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; @@ -1611,26 +1619,29 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, if (is_sqp(init_attr->qp_type)) qp->port = init_attr->port_num; - in->ctx.flags = cpu_to_be32(to_mlx5_st(init_attr->qp_type) << 16 | - MLX5_QP_PM_MIGRATED << 11); + qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); + + MLX5_SET(qpc, qpc, st, to_mlx5_st(init_attr->qp_type)); + MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED); if (init_attr->qp_type != MLX5_IB_QPT_REG_UMR) - in->ctx.flags_pd = cpu_to_be32(to_mpd(pd ? pd : devr->p0)->pdn); + MLX5_SET(qpc, qpc, pd, to_mpd(pd ? pd : devr->p0)->pdn); else - in->ctx.flags_pd = cpu_to_be32(MLX5_QP_LAT_SENSITIVE); + MLX5_SET(qpc, qpc, latency_sensitive, 1); + if (qp->wq_sig) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_ENABLE_SIG); + MLX5_SET(qpc, qpc, wq_signature, 1); if (qp->flags & MLX5_IB_QP_BLOCK_MULTICAST_LOOPBACK) - in->ctx.flags_pd |= cpu_to_be32(MLX5_QP_BLOCK_MCAST); + MLX5_SET(qpc, qpc, block_lb_mc, 1); if (qp->flags & MLX5_IB_QP_CROSS_CHANNEL) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_MASTER); + MLX5_SET(qpc, qpc, cd_master, 1); if (qp->flags & MLX5_IB_QP_MANAGED_SEND) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_SEND); + MLX5_SET(qpc, qpc, cd_slave_send, 1); if (qp->flags & MLX5_IB_QP_MANAGED_RECV) - in->ctx.params2 |= cpu_to_be32(MLX5_QP_BIT_CC_SLAVE_RECV); + MLX5_SET(qpc, qpc, cd_slave_receive, 1); if (qp->scat_cqe && is_connected(init_attr->qp_type)) { int rcqe_sz; @@ -1640,71 +1651,68 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd, scqe_sz = mlx5_ib_get_cqe_size(dev, init_attr->send_cq); if (rcqe_sz == 128) - in->ctx.cs_res = MLX5_RES_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA64_CQE); else - in->ctx.cs_res = MLX5_RES_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_res, MLX5_RES_SCAT_DATA32_CQE); if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR) { if (scqe_sz == 128) - in->ctx.cs_req = MLX5_REQ_SCAT_DATA64_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA64_CQE); else - in->ctx.cs_req = MLX5_REQ_SCAT_DATA32_CQE; + MLX5_SET(qpc, qpc, cs_req, MLX5_REQ_SCAT_DATA32_CQE); } } if (qp->rq.wqe_cnt) { - in->ctx.rq_size_stride = (qp->rq.wqe_shift - 4); - in->ctx.rq_size_stride |= ilog2(qp->rq.wqe_cnt) << 3; + MLX5_SET(qpc, qpc, log_rq_stride, qp->rq.wqe_shift - 4); + MLX5_SET(qpc, qpc, log_rq_size, ilog2(qp->rq.wqe_cnt)); } - in->ctx.rq_type_srqn = get_rx_type(qp, init_attr); + MLX5_SET(qpc, qpc, rq_type, get_rx_type(qp, init_attr)); if (qp->sq.wqe_cnt) - in->ctx.sq_crq_size |= cpu_to_be16(ilog2(qp->sq.wqe_cnt) << 11); + MLX5_SET(qpc, qpc, log_sq_size, ilog2(qp->sq.wqe_cnt)); else - in->ctx.sq_crq_size |= cpu_to_be16(0x8000); + MLX5_SET(qpc, qpc, no_sq, 1); /* Set default resources */ switch (init_attr->qp_type) { case IB_QPT_XRC_TGT: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.cqn_send = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(init_attr->xrcd)->xrcdn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(init_attr->xrcd)->xrcdn); break; case IB_QPT_XRC_INI: - in->ctx.cqn_recv = cpu_to_be32(to_mcq(devr->c0)->mcq.cqn); - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(devr->s0)->msrq.srqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s0)->msrq.srqn); break; default: if (init_attr->srq) { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x0)->xrcdn); - in->ctx.rq_type_srqn |= cpu_to_be32(to_msrq(init_attr->srq)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x0)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(init_attr->srq)->msrq.srqn); } else { - in->ctx.xrcd = cpu_to_be32(to_mxrcd(devr->x1)->xrcdn); - in->ctx.rq_type_srqn |= - cpu_to_be32(to_msrq(devr->s1)->msrq.srqn); + MLX5_SET(qpc, qpc, xrcd, to_mxrcd(devr->x1)->xrcdn); + MLX5_SET(qpc, qpc, srqn_rmpn_xrqn, to_msrq(devr->s1)->msrq.srqn); } } if (init_attr->send_cq) - in->ctx.cqn_send = cpu_to_be32(to_mcq(init_attr->send_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_snd, to_mcq(init_attr->send_cq)->mcq.cqn); if (init_attr->recv_cq) - in->ctx.cqn_recv = cpu_to_be32(to_mcq(init_attr->recv_cq)->mcq.cqn); + MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(init_attr->recv_cq)->mcq.cqn); - in->ctx.db_rec_addr = cpu_to_be64(qp->db.dma); + MLX5_SET64(qpc, qpc, dbr_addr, qp->db.dma); - if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); - /* 0xffffff means we ask to work with cqe version 0 */ + /* 0xffffff means we ask to work with cqe version 0 */ + if (MLX5_CAP_GEN(mdev, cqe_version) == MLX5_CQE_VERSION_V1) MLX5_SET(qpc, qpc, user_index, uidx); - } + /* we use IB_QP_CREATE_IPOIB_UD_LSO to indicates ipoib qp */ if (init_attr->qp_type == IB_QPT_UD && (init_attr->create_flags & IB_QP_CREATE_IPOIB_UD_LSO)) { - qpc = MLX5_ADDR_OF(create_qp_in, in, qpc); MLX5_SET(qpc, qpc, ulp_stateless_offload_mode, 1); qp->flags |= MLX5_IB_QP_LSO; } @@ -1861,7 +1869,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) { struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_ib_qp_base *base = &qp->trans_qp.base; - struct mlx5_modify_qp_mbox_in *in; unsigned long flags; int err; @@ -1874,16 +1881,12 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) &qp->raw_packet_qp.rq.base : &qp->trans_qp.base; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return; - if (qp->state != IB_QPS_RESET) { if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET) { mlx5_ib_qp_disable_pagefaults(qp); err = mlx5_core_qp_modify(dev->mdev, - MLX5_CMD_OP_2RST_QP, in, 0, - &base->mqp); + MLX5_CMD_OP_2RST_QP, 0, + NULL, &base->mqp); } else { err = modify_raw_packet_qp(dev, qp, MLX5_CMD_OP_2RST_QP); @@ -1925,8 +1928,6 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp) base->mqp.qpn); } - kfree(in); - if (qp->create_type == MLX5_QP_KERNEL) destroy_qp_kernel(dev, qp); else if (qp->create_type == MLX5_QP_USER) @@ -2512,7 +2513,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_qp_base *base = &qp->trans_qp.base; struct mlx5_ib_cq *send_cq, *recv_cq; struct mlx5_qp_context *context; - struct mlx5_modify_qp_mbox_in *in; struct mlx5_ib_pd *pd; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; @@ -2521,11 +2521,10 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, int err; u16 op; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) + context = kzalloc(sizeof(*context), GFP_KERNEL); + if (!context) return -ENOMEM; - context = &in->ctx; err = to_mlx5_st(ibqp->qp_type); if (err < 0) { mlx5_ib_dbg(dev, "unsupported qp type %d\n", ibqp->qp_type); @@ -2690,12 +2689,11 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, op = optab[mlx5_cur][mlx5_new]; optpar = ib_mask_to_mlx5_opt(attr_mask); optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st]; - in->optparam = cpu_to_be32(optpar); if (qp->ibqp.qp_type == IB_QPT_RAW_PACKET) err = modify_raw_packet_qp(dev, qp, op); else - err = mlx5_core_qp_modify(dev->mdev, op, in, sqd_event, + err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, &base->mqp); if (err) goto out; @@ -2736,7 +2734,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, } out: - kfree(in); + kfree(context); return err; } @@ -2969,7 +2967,7 @@ static void set_reg_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr, memset(umr, 0, sizeof(*umr)); - if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3112,9 +3110,9 @@ static void set_reg_mkey_seg(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); - if (mr->access_mode == MLX5_ACCESS_MODE_MTT) + if (mr->access_mode == MLX5_MKC_ACCESS_MODE_MTT) seg->log2_page_size = ilog2(mr->ibmr.page_size); - else if (mr->access_mode == MLX5_ACCESS_MODE_KLM) + else if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) /* KLMs take twice the size of MTTs */ ndescs *= 2; @@ -3455,7 +3453,7 @@ static void set_sig_mkey_segment(struct mlx5_mkey_seg *seg, memset(seg, 0, sizeof(*seg)); seg->flags = get_umr_flags(wr->access_flags) | - MLX5_ACCESS_MODE_KLM; + MLX5_MKC_ACCESS_MODE_KLMS; seg->qpn_mkey7_0 = cpu_to_be32((sig_key & 0xff) | 0xffffff00); seg->flags_pd = cpu_to_be32(MLX5_MKEY_REMOTE_INVAL | sigerr << 26 | MLX5_MKEY_BSF_EN | pdn); @@ -4317,21 +4315,24 @@ static int query_raw_packet_qp_state(struct mlx5_ib_dev *dev, static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct ib_qp_attr *qp_attr) { - struct mlx5_query_qp_mbox_out *outb; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *context; int mlx5_state; + u32 *outb; int err = 0; - outb = kzalloc(sizeof(*outb), GFP_KERNEL); + outb = kzalloc(outlen, GFP_KERNEL); if (!outb) return -ENOMEM; - context = &outb->ctx; err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb, - sizeof(*outb)); + outlen); if (err) goto out; + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + context = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, outb, qpc); + mlx5_state = be32_to_cpu(context->flags) >> 28; qp->state = to_ib_qp_state(mlx5_state); diff --git a/drivers/isdn/hardware/mISDN/avmfritz.c b/drivers/isdn/hardware/mISDN/avmfritz.c index 292991c90c02..e3fa1cd64470 100644 --- a/drivers/isdn/hardware/mISDN/avmfritz.c +++ b/drivers/isdn/hardware/mISDN/avmfritz.c @@ -284,7 +284,7 @@ __write_ctrl_pciv2(struct fritzcard *fc, struct hdlc_hw *hdlc, u32 channel) { AVM_HDLC_STATUS_1)); } -void +static void write_ctrl(struct bchannel *bch, int which) { struct fritzcard *fc = bch->hw; struct hdlc_hw *hdlc; @@ -741,7 +741,7 @@ inithdlc(struct fritzcard *fc) modehdlc(&fc->bch[1], -1); } -void +static void clear_pending_hdlc_ints(struct fritzcard *fc) { u32 val; @@ -962,7 +962,7 @@ avm_dctrl(struct mISDNchannel *ch, u32 cmd, void *arg) return err; } -int +static int setup_fritz(struct fritzcard *fc) { u32 val, ver; diff --git a/drivers/isdn/hardware/mISDN/hfcmulti.c b/drivers/isdn/hardware/mISDN/hfcmulti.c index 28543d795188..480c2d7794eb 100644 --- a/drivers/isdn/hardware/mISDN/hfcmulti.c +++ b/drivers/isdn/hardware/mISDN/hfcmulti.c @@ -564,19 +564,19 @@ disable_hwirq(struct hfc_multi *hc) #define MAX_TDM_CHAN 32 -inline void +static inline void enablepcibridge(struct hfc_multi *c) { HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x3); /* was _io before */ } -inline void +static inline void disablepcibridge(struct hfc_multi *c) { HFC_outb(c, R_BRG_PCM_CFG, (0x0 << 6) | 0x2); /* was _io before */ } -inline unsigned char +static inline unsigned char readpcibridge(struct hfc_multi *hc, unsigned char address) { unsigned short cipv; @@ -604,7 +604,7 @@ readpcibridge(struct hfc_multi *hc, unsigned char address) return data; } -inline void +static inline void writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data) { unsigned short cipv; @@ -634,14 +634,14 @@ writepcibridge(struct hfc_multi *hc, unsigned char address, unsigned char data) outl(datav, hc->pci_iobase); } -inline void +static inline void cpld_set_reg(struct hfc_multi *hc, unsigned char reg) { /* Do data pin read low byte */ HFC_outb(hc, R_GPIO_OUT1, reg); } -inline void +static inline void cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val) { cpld_set_reg(hc, reg); @@ -653,7 +653,7 @@ cpld_write_reg(struct hfc_multi *hc, unsigned char reg, unsigned char val) return; } -inline unsigned char +static inline unsigned char cpld_read_reg(struct hfc_multi *hc, unsigned char reg) { unsigned char bytein; @@ -670,14 +670,14 @@ cpld_read_reg(struct hfc_multi *hc, unsigned char reg) return bytein; } -inline void +static inline void vpm_write_address(struct hfc_multi *hc, unsigned short addr) { cpld_write_reg(hc, 0, 0xff & addr); cpld_write_reg(hc, 1, 0x01 & (addr >> 8)); } -inline unsigned short +static inline unsigned short vpm_read_address(struct hfc_multi *c) { unsigned short addr; @@ -691,7 +691,7 @@ vpm_read_address(struct hfc_multi *c) return addr & 0x1ff; } -inline unsigned char +static inline unsigned char vpm_in(struct hfc_multi *c, int which, unsigned short addr) { unsigned char res; @@ -712,7 +712,7 @@ vpm_in(struct hfc_multi *c, int which, unsigned short addr) return res; } -inline void +static inline void vpm_out(struct hfc_multi *c, int which, unsigned short addr, unsigned char data) { @@ -1024,7 +1024,7 @@ hfcmulti_resync(struct hfc_multi *locked, struct hfc_multi *newmaster, int rm) } /* This must be called AND hc must be locked irqsave!!! */ -inline void +static inline void plxsd_checksync(struct hfc_multi *hc, int rm) { if (hc->syncronized) { diff --git a/drivers/isdn/hardware/mISDN/mISDNipac.c b/drivers/isdn/hardware/mISDN/mISDNipac.c index aa9b6c3cadc1..8d338ba366d0 100644 --- a/drivers/isdn/hardware/mISDN/mISDNipac.c +++ b/drivers/isdn/hardware/mISDN/mISDNipac.c @@ -113,7 +113,7 @@ isac_ph_state_bh(struct dchannel *dch) pr_debug("%s: TE newstate %x\n", isac->name, dch->state); } -void +static void isac_empty_fifo(struct isac_hw *isac, int count) { u8 *ptr; diff --git a/drivers/isdn/hardware/mISDN/w6692.c b/drivers/isdn/hardware/mISDN/w6692.c index 741675525b53..3b067ea656bd 100644 --- a/drivers/isdn/hardware/mISDN/w6692.c +++ b/drivers/isdn/hardware/mISDN/w6692.c @@ -848,7 +848,7 @@ dbusy_timer_handler(struct dchannel *dch) } } -void initW6692(struct w6692_hw *card) +static void initW6692(struct w6692_hw *card) { u8 val; diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0c5415b05ea9..95c32f2d7601 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -149,6 +149,8 @@ config IPVLAN tristate "IP-VLAN support" depends on INET depends on IPV6 + depends on NETFILTER + depends on NET_L3_MASTER_DEV ---help--- This allows one to create virtual devices off of a main interface and packets will be delivered based on the dest L3 (IPv6/IPv4 addr) diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 9599ed6f1213..3f31ca32f52b 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4628,7 +4628,7 @@ static int bond_init(struct net_device *bond_dev) netdev_dbg(bond_dev, "Begin bond_init\n"); - bond->wq = create_singlethread_workqueue(bond_dev->name); + bond->wq = alloc_ordered_workqueue(bond_dev->name, WQ_MEM_RECLAIM); if (!bond->wq) return -ENOMEM; diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c index 71f0e791355b..b3d02759c226 100644 --- a/drivers/net/can/usb/ems_usb.c +++ b/drivers/net/can/usb/ems_usb.c @@ -600,7 +600,6 @@ static int ems_usb_start(struct ems_usb *dev) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -752,10 +751,8 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) goto nomem; - } buf = usb_alloc_coherent(dev->udev, size, GFP_ATOMIC, &urb->transfer_dma); if (!buf) { @@ -1007,10 +1004,8 @@ static int ems_usb_probe(struct usb_interface *intf, dev->tx_contexts[i].echo_index = MAX_TX_URBS; dev->intr_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!dev->intr_urb) { - dev_err(&intf->dev, "Couldn't alloc intr URB\n"); + if (!dev->intr_urb) goto cleanup_candev; - } dev->intr_in_buffer = kzalloc(INTR_IN_BUFFER_SIZE, GFP_KERNEL); if (!dev->intr_in_buffer) diff --git a/drivers/net/can/usb/esd_usb2.c b/drivers/net/can/usb/esd_usb2.c index 784a9002fbb9..be928ce62d32 100644 --- a/drivers/net/can/usb/esd_usb2.c +++ b/drivers/net/can/usb/esd_usb2.c @@ -558,8 +558,6 @@ static int esd_usb2_setup_rx_urbs(struct esd_usb2 *dev) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - dev_warn(dev->udev->dev.parent, - "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -730,7 +728,6 @@ static netdev_tx_t esd_usb2_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; dev_kfree_skb(skb); goto nourbmem; diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 6f0cbc38782e..77e3cc06a30c 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -493,10 +493,8 @@ static netdev_tx_t gs_can_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URB\n"); + if (!urb) goto nomem_urb; - } hf = usb_alloc_coherent(dev->udev, sizeof(*hf), GFP_ATOMIC, &urb->transfer_dma); @@ -600,11 +598,8 @@ static int gs_can_open(struct net_device *netdev) /* alloc rx urb */ urb = usb_alloc_urb(0, GFP_KERNEL); - if (!urb) { - netdev_err(netdev, - "No memory left for URB\n"); + if (!urb) return -ENOMEM; - } /* alloc rx buffer */ buf = usb_alloc_coherent(dev->udev, diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 6f1f3b675ff5..d51e0c401b48 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -787,10 +787,8 @@ static int kvaser_usb_simple_msg_async(struct kvaser_usb_net_priv *priv, int err; urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) return -ENOMEM; - } buf = kmalloc(sizeof(struct kvaser_msg), GFP_ATOMIC); if (!buf) { @@ -1393,8 +1391,6 @@ static int kvaser_usb_setup_rx_urbs(struct kvaser_usb *dev) urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - dev_warn(dev->udev->dev.parent, - "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -1670,7 +1666,6 @@ static netdev_tx_t kvaser_usb_start_xmit(struct sk_buff *skb, urb = usb_alloc_urb(0, GFP_ATOMIC); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; diff --git a/drivers/net/can/usb/peak_usb/pcan_usb_core.c b/drivers/net/can/usb/peak_usb/pcan_usb_core.c index bfb91d8fa460..c06382cdfdfe 100644 --- a/drivers/net/can/usb/peak_usb/pcan_usb_core.c +++ b/drivers/net/can/usb/peak_usb/pcan_usb_core.c @@ -399,7 +399,6 @@ static int peak_usb_start(struct peak_usb_device *dev) /* create a URB, and a buffer for it, to receive usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -454,7 +453,6 @@ static int peak_usb_start(struct peak_usb_device *dev) /* create a URB and a buffer for it, to transmit usb messages */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } @@ -651,10 +649,8 @@ static int peak_usb_restart(struct peak_usb_device *dev) /* first allocate a urb to handle the asynchronous steps */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->netdev, "no memory left for urb\n"); + if (!urb) return -ENOMEM; - } /* also allocate enough space for the commands to send */ buf = kmalloc(PCAN_USB_MAX_CMD_LEN, GFP_ATOMIC); diff --git a/drivers/net/can/usb/usb_8dev.c b/drivers/net/can/usb/usb_8dev.c index a731720f1d13..108a30e15097 100644 --- a/drivers/net/can/usb/usb_8dev.c +++ b/drivers/net/can/usb/usb_8dev.c @@ -623,10 +623,8 @@ static netdev_tx_t usb_8dev_start_xmit(struct sk_buff *skb, /* create a URB, and a buffer for it, and copy the data to the URB */ urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); + if (!urb) goto nomem; - } buf = usb_alloc_coherent(priv->udev, size, GFP_ATOMIC, &urb->transfer_dma); @@ -748,7 +746,6 @@ static int usb_8dev_start(struct usb_8dev_priv *priv) /* create a URB, and a buffer for it */ urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - netdev_err(netdev, "No memory left for URBs\n"); err = -ENOMEM; break; } diff --git a/drivers/net/dsa/Kconfig b/drivers/net/dsa/Kconfig index 8f4544394f44..065984670ff1 100644 --- a/drivers/net/dsa/Kconfig +++ b/drivers/net/dsa/Kconfig @@ -16,6 +16,7 @@ config NET_DSA_BCM_SF2 select FIXED_PHY select BCM7XXX_PHY select MDIO_BCM_UNIMAC + select B53 ---help--- This enables support for the Broadcom Starfighter 2 Ethernet switch chips. @@ -24,4 +25,13 @@ source "drivers/net/dsa/b53/Kconfig" source "drivers/net/dsa/mv88e6xxx/Kconfig" +config NET_DSA_QCA8K + tristate "Qualcomm Atheros QCA8K Ethernet switch family support" + depends on NET_DSA + select NET_DSA_TAG_QCA + select REGMAP + ---help--- + This enables support for the Qualcomm Atheros QCA8K Ethernet + switch chips. + endmenu diff --git a/drivers/net/dsa/Makefile b/drivers/net/dsa/Makefile index ca1e71b853a6..8346e4f9737a 100644 --- a/drivers/net/dsa/Makefile +++ b/drivers/net/dsa/Makefile @@ -1,5 +1,6 @@ obj-$(CONFIG_NET_DSA_MV88E6060) += mv88e6060.o obj-$(CONFIG_NET_DSA_BCM_SF2) += bcm_sf2.o +obj-$(CONFIG_NET_DSA_QCA8K) += qca8k.o obj-y += b53/ obj-y += mv88e6xxx/ diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c index bda37d336736..7717b19dc806 100644 --- a/drivers/net/dsa/b53/b53_common.c +++ b/drivers/net/dsa/b53/b53_common.c @@ -167,6 +167,65 @@ static const struct b53_mib_desc b53_mibs[] = { #define B53_MIBS_SIZE ARRAY_SIZE(b53_mibs) +static const struct b53_mib_desc b53_mibs_58xx[] = { + { 8, 0x00, "TxOctets" }, + { 4, 0x08, "TxDropPkts" }, + { 4, 0x0c, "TxQPKTQ0" }, + { 4, 0x10, "TxBroadcastPkts" }, + { 4, 0x14, "TxMulticastPkts" }, + { 4, 0x18, "TxUnicastPKts" }, + { 4, 0x1c, "TxCollisions" }, + { 4, 0x20, "TxSingleCollision" }, + { 4, 0x24, "TxMultipleCollision" }, + { 4, 0x28, "TxDeferredCollision" }, + { 4, 0x2c, "TxLateCollision" }, + { 4, 0x30, "TxExcessiveCollision" }, + { 4, 0x34, "TxFrameInDisc" }, + { 4, 0x38, "TxPausePkts" }, + { 4, 0x3c, "TxQPKTQ1" }, + { 4, 0x40, "TxQPKTQ2" }, + { 4, 0x44, "TxQPKTQ3" }, + { 4, 0x48, "TxQPKTQ4" }, + { 4, 0x4c, "TxQPKTQ5" }, + { 8, 0x50, "RxOctets" }, + { 4, 0x58, "RxUndersizePkts" }, + { 4, 0x5c, "RxPausePkts" }, + { 4, 0x60, "RxPkts64Octets" }, + { 4, 0x64, "RxPkts65to127Octets" }, + { 4, 0x68, "RxPkts128to255Octets" }, + { 4, 0x6c, "RxPkts256to511Octets" }, + { 4, 0x70, "RxPkts512to1023Octets" }, + { 4, 0x74, "RxPkts1024toMaxPktsOctets" }, + { 4, 0x78, "RxOversizePkts" }, + { 4, 0x7c, "RxJabbers" }, + { 4, 0x80, "RxAlignmentErrors" }, + { 4, 0x84, "RxFCSErrors" }, + { 8, 0x88, "RxGoodOctets" }, + { 4, 0x90, "RxDropPkts" }, + { 4, 0x94, "RxUnicastPkts" }, + { 4, 0x98, "RxMulticastPkts" }, + { 4, 0x9c, "RxBroadcastPkts" }, + { 4, 0xa0, "RxSAChanges" }, + { 4, 0xa4, "RxFragments" }, + { 4, 0xa8, "RxJumboPkt" }, + { 4, 0xac, "RxSymblErr" }, + { 4, 0xb0, "InRangeErrCount" }, + { 4, 0xb4, "OutRangeErrCount" }, + { 4, 0xb8, "EEELpiEvent" }, + { 4, 0xbc, "EEELpiDuration" }, + { 4, 0xc0, "RxDiscard" }, + { 4, 0xc8, "TxQPKTQ6" }, + { 4, 0xcc, "TxQPKTQ7" }, + { 4, 0xd0, "TxPkts64Octets" }, + { 4, 0xd4, "TxPkts65to127Octets" }, + { 4, 0xd8, "TxPkts128to255Octets" }, + { 4, 0xdc, "TxPkts256to511Ocets" }, + { 4, 0xe0, "TxPkts512to1023Ocets" }, + { 4, 0xe4, "TxPkts1024toMaxPktOcets" }, +}; + +#define B53_MIBS_58XX_SIZE ARRAY_SIZE(b53_mibs_58xx) + static int b53_do_vlan_op(struct b53_device *dev, u8 op) { unsigned int i; @@ -418,7 +477,7 @@ static int b53_fast_age_vlan(struct b53_device *dev, u16 vid) static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int i; u16 pvlan; @@ -436,7 +495,7 @@ static void b53_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) static int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int cpu_port = dev->cpu_port; u16 pvlan; @@ -461,7 +520,7 @@ static int b53_enable_port(struct dsa_switch *ds, int port, static void b53_disable_port(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 reg; /* Disable Tx/Rx for the port */ @@ -570,7 +629,7 @@ static int b53_switch_reset(struct b53_device *dev) static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; u16 value = 0; int ret; @@ -585,7 +644,7 @@ static int b53_phy_read16(struct dsa_switch *ds, int addr, int reg) static int b53_phy_write16(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (priv->ops->phy_write16) return priv->ops->phy_write16(priv, addr, reg, val); @@ -635,6 +694,8 @@ static const struct b53_mib_desc *b53_get_mib(struct b53_device *dev) return b53_mibs_65; else if (is63xx(dev)) return b53_mibs_63xx; + else if (is58xx(dev)) + return b53_mibs_58xx; else return b53_mibs; } @@ -645,13 +706,15 @@ static unsigned int b53_get_mib_size(struct b53_device *dev) return B53_MIBS_65_SIZE; else if (is63xx(dev)) return B53_MIBS_63XX_SIZE; + else if (is58xx(dev)) + return B53_MIBS_58XX_SIZE; else return B53_MIBS_SIZE; } static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); unsigned int i; @@ -664,7 +727,7 @@ static void b53_get_strings(struct dsa_switch *ds, int port, uint8_t *data) static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; const struct b53_mib_desc *mibs = b53_get_mib(dev); unsigned int mib_size = b53_get_mib_size(dev); const struct b53_mib_desc *s; @@ -696,19 +759,14 @@ static void b53_get_ethtool_stats(struct dsa_switch *ds, int port, static int b53_get_sset_count(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; return b53_get_mib_size(dev); } -static int b53_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static int b53_setup(struct dsa_switch *ds) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; unsigned int port; int ret; @@ -739,7 +797,7 @@ static int b53_setup(struct dsa_switch *ds) static void b53_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u8 rgmii_ctrl = 0, reg = 0, off; if (!phy_is_pseudo_fixed_link(phydev)) @@ -873,7 +931,7 @@ static int b53_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; if ((is5325(dev) || is5365(dev)) && vlan->vid_begin == 0) return -EOPNOTSUPP; @@ -890,7 +948,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; unsigned int cpu_port = dev->cpu_port; @@ -924,7 +982,7 @@ static void b53_vlan_add(struct dsa_switch *ds, int port, static int b53_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; unsigned int cpu_port = dev->cpu_port; struct b53_vlan *vl; @@ -970,7 +1028,7 @@ static int b53_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; u16 vid, vid_start = 0, pvid; struct b53_vlan *vl; int err = 0; @@ -1129,7 +1187,7 @@ static int b53_fdb_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; /* 5325 and 5365 require some more massaging, but could * be supported eventually @@ -1144,7 +1202,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; if (b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) pr_err("%s: failed to add MAC address\n", __func__); @@ -1153,7 +1211,7 @@ static void b53_fdb_add(struct dsa_switch *ds, int port, static int b53_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; return b53_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); } @@ -1212,7 +1270,7 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct b53_device *priv = ds_to_priv(ds); + struct b53_device *priv = ds->priv; struct net_device *dev = ds->ports[port].netdev; struct b53_arl_entry results[2]; unsigned int count = 0; @@ -1251,10 +1309,22 @@ static int b53_fdb_dump(struct dsa_switch *ds, int port, static int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; + s8 cpu_port = ds->dst->cpu_port; u16 pvlan, reg; unsigned int i; + /* Make this port leave the all VLANs join since we will have proper + * VLAN entries from now on + */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg &= ~BIT(port); + if ((reg & BIT(cpu_port)) == BIT(cpu_port)) + reg &= ~BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } + dev->ports[port].bridge_dev = bridge; b53_read16(dev, B53_PVLAN_PAGE, B53_PVLAN_PORT_MASK(port), &pvlan); @@ -1284,9 +1354,10 @@ static int b53_br_join(struct dsa_switch *ds, int port, static void b53_br_leave(struct dsa_switch *ds, int port) { - struct b53_device *dev = ds_to_priv(ds); + struct b53_device *dev = ds->priv; struct net_device *bridge = dev->ports[port].bridge_dev; struct b53_vlan *vl = &dev->vlans[0]; + s8 cpu_port = ds->dst->cpu_port; unsigned int i; u16 pvlan, reg, pvid; @@ -1316,22 +1387,27 @@ static void b53_br_leave(struct dsa_switch *ds, int port) else pvid = 0; - b53_get_vlan_entry(dev, pvid, vl); - vl->members |= BIT(port) | BIT(dev->cpu_port); - vl->untag |= BIT(port) | BIT(dev->cpu_port); - b53_set_vlan_entry(dev, pvid, vl); + /* Make this port join all VLANs without VLAN entries */ + if (is58xx(dev)) { + b53_read16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, ®); + reg |= BIT(port); + if (!(reg & BIT(cpu_port))) + reg |= BIT(cpu_port); + b53_write16(dev, B53_VLAN_PAGE, B53_JOIN_ALL_VLAN_EN, reg); + } else { + b53_get_vlan_entry(dev, pvid, vl); + vl->members |= BIT(port) | BIT(dev->cpu_port); + vl->untag |= BIT(port) | BIT(dev->cpu_port); + b53_set_vlan_entry(dev, pvid, vl); + } } -static void b53_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) +static void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state) { - struct b53_device *dev = ds_to_priv(ds); - u8 hw_state, cur_hw_state; + struct b53_device *dev = ds->priv; + u8 hw_state; u8 reg; - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); - cur_hw_state = reg & PORT_CTRL_STP_STATE_MASK; - switch (state) { case BR_STATE_DISABLED: hw_state = PORT_CTRL_DIS_STATE; @@ -1353,30 +1429,28 @@ static void b53_br_set_stp_state(struct dsa_switch *ds, int port, return; } - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= PORT_CTRL_LEARN_STATE && - hw_state <= PORT_CTRL_LISTEN_STATE) { - if (b53_fast_age_port(dev, port)) { - dev_err(ds->dev, "fast ageing failed\n"); - return; - } - } - } - b53_read8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), ®); reg &= ~PORT_CTRL_STP_STATE_MASK; reg |= hw_state; b53_write8(dev, B53_CTRL_PAGE, B53_PORT_CTRL(port), reg); } -static struct dsa_switch_driver b53_switch_ops = { - .tag_protocol = DSA_TAG_PROTO_NONE, +static void b53_br_fast_age(struct dsa_switch *ds, int port) +{ + struct b53_device *dev = ds->priv; + + if (b53_fast_age_port(dev, port)) + dev_err(ds->dev, "fast ageing failed\n"); +} + +static enum dsa_tag_protocol b53_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_NONE; +} + +static struct dsa_switch_ops b53_switch_ops = { + .get_tag_protocol = b53_get_tag_protocol, .setup = b53_setup, - .set_addr = b53_set_addr, .get_strings = b53_get_strings, .get_ethtool_stats = b53_get_ethtool_stats, .get_sset_count = b53_get_sset_count, @@ -1388,6 +1462,7 @@ static struct dsa_switch_driver b53_switch_ops = { .port_bridge_join = b53_br_join, .port_bridge_leave = b53_br_leave, .port_stp_state_set = b53_br_set_stp_state, + .port_fast_age = b53_br_fast_age, .port_vlan_filtering = b53_vlan_filtering, .port_vlan_prepare = b53_vlan_prepare, .port_vlan_add = b53_vlan_add, @@ -1593,11 +1668,22 @@ static const struct b53_chip_data b53_switch_chips[] = { .jumbo_pm_reg = B53_JUMBO_PORT_MASK, .jumbo_size_reg = B53_JUMBO_MAX_SIZE, }, + { + .chip_id = BCM7445_DEVICE_ID, + .dev_name = "BCM7445", + .vlans = 4096, + .enabled_ports = 0x1ff, + .arl_entries = 4, + .cpu_port = B53_CPU_PORT, + .vta_regs = B53_VTA_REGS, + .duplex_reg = B53_DUPLEX_STAT_GE, + .jumbo_pm_reg = B53_JUMBO_PORT_MASK, + .jumbo_size_reg = B53_JUMBO_MAX_SIZE, + }, }; static int b53_switch_init(struct b53_device *dev) { - struct dsa_switch *ds = dev->ds; unsigned int i; int ret; @@ -1613,7 +1699,6 @@ static int b53_switch_init(struct b53_device *dev) dev->vta_regs[1] = chip->vta_regs[1]; dev->vta_regs[2] = chip->vta_regs[2]; dev->jumbo_pm_reg = chip->jumbo_pm_reg; - ds->drv = &b53_switch_ops; dev->cpu_port = chip->cpu_port; dev->num_vlans = chip->vlans; dev->num_arl_entries = chip->arl_entries; @@ -1681,7 +1766,8 @@ static int b53_switch_init(struct b53_device *dev) return 0; } -struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops, +struct b53_device *b53_switch_alloc(struct device *base, + const struct b53_io_ops *ops, void *priv) { struct dsa_switch *ds; @@ -1700,6 +1786,7 @@ struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops, dev->ds = ds; dev->priv = priv; dev->ops = ops; + ds->ops = &b53_switch_ops; mutex_init(&dev->reg_mutex); mutex_init(&dev->stats_mutex); diff --git a/drivers/net/dsa/b53/b53_mdio.c b/drivers/net/dsa/b53/b53_mdio.c index aa87c3fffdac..477a16b5660a 100644 --- a/drivers/net/dsa/b53/b53_mdio.c +++ b/drivers/net/dsa/b53/b53_mdio.c @@ -267,7 +267,7 @@ static int b53_mdio_phy_write16(struct b53_device *dev, int addr, int reg, return mdiobus_write_nested(bus, addr, reg, value); } -static struct b53_io_ops b53_mdio_ops = { +static const struct b53_io_ops b53_mdio_ops = { .read8 = b53_mdio_read8, .read16 = b53_mdio_read16, .read32 = b53_mdio_read32, diff --git a/drivers/net/dsa/b53/b53_mmap.c b/drivers/net/dsa/b53/b53_mmap.c index 77ffc4312808..cc9e6bd83e0e 100644 --- a/drivers/net/dsa/b53/b53_mmap.c +++ b/drivers/net/dsa/b53/b53_mmap.c @@ -208,7 +208,7 @@ static int b53_mmap_write64(struct b53_device *dev, u8 page, u8 reg, return 0; } -static struct b53_io_ops b53_mmap_ops = { +static const struct b53_io_ops b53_mmap_ops = { .read8 = b53_mmap_read8, .read16 = b53_mmap_read16, .read32 = b53_mmap_read32, diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h index 835a744f206e..f192a673caba 100644 --- a/drivers/net/dsa/b53/b53_priv.h +++ b/drivers/net/dsa/b53/b53_priv.h @@ -60,6 +60,7 @@ enum { BCM53018_DEVICE_ID = 0x53018, BCM53019_DEVICE_ID = 0x53019, BCM58XX_DEVICE_ID = 0x5800, + BCM7445_DEVICE_ID = 0x7445, }; #define B53_N_PORTS 9 @@ -174,6 +175,12 @@ static inline int is5301x(struct b53_device *dev) dev->chip_id == BCM53019_DEVICE_ID; } +static inline int is58xx(struct b53_device *dev) +{ + return dev->chip_id == BCM58XX_DEVICE_ID || + dev->chip_id == BCM7445_DEVICE_ID; +} + #define B53_CPU_PORT_25 5 #define B53_CPU_PORT 8 @@ -182,7 +189,8 @@ static inline int is_cpu_port(struct b53_device *dev, int port) return dev->cpu_port; } -struct b53_device *b53_switch_alloc(struct device *base, struct b53_io_ops *ops, +struct b53_device *b53_switch_alloc(struct device *base, + const struct b53_io_ops *ops, void *priv); int b53_switch_detect(struct b53_device *dev); @@ -364,7 +372,6 @@ static inline void b53_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, #ifdef CONFIG_BCM47XX -#include #include #include static inline int b53_switch_get_reset_gpio(struct b53_device *dev) diff --git a/drivers/net/dsa/b53/b53_regs.h b/drivers/net/dsa/b53/b53_regs.h index a0b453ea34c9..dac0af4e2cd0 100644 --- a/drivers/net/dsa/b53/b53_regs.h +++ b/drivers/net/dsa/b53/b53_regs.h @@ -309,6 +309,9 @@ /* Port VLAN mask (16 bit) IMP port is always 8, also on 5325 & co */ #define B53_PVLAN_PORT_MASK(i) ((i) * 2) +/* Join all VLANs register (16 bit) */ +#define B53_JOIN_ALL_VLAN_EN 0x50 + /************************************************************************* * 802.1Q Page Registers *************************************************************************/ diff --git a/drivers/net/dsa/b53/b53_spi.c b/drivers/net/dsa/b53/b53_spi.c index 2bda0b5f1578..f89f5308a99b 100644 --- a/drivers/net/dsa/b53/b53_spi.c +++ b/drivers/net/dsa/b53/b53_spi.c @@ -270,7 +270,7 @@ static int b53_spi_write64(struct b53_device *dev, u8 page, u8 reg, u64 value) return spi_write(spi, txbuf, sizeof(txbuf)); } -static struct b53_io_ops b53_spi_ops = { +static const struct b53_io_ops b53_spi_ops = { .read8 = b53_spi_read8, .read16 = b53_spi_read16, .read32 = b53_spi_read32, @@ -317,8 +317,6 @@ static int b53_spi_remove(struct spi_device *spi) static struct spi_driver b53_spi_driver = { .driver = { .name = "b53-switch", - .bus = &spi_bus_type, - .owner = THIS_MODULE, }, .probe = b53_spi_probe, .remove = b53_spi_remove, diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c index 3e2d4a5fcd5a..8a62b6a69703 100644 --- a/drivers/net/dsa/b53/b53_srab.c +++ b/drivers/net/dsa/b53/b53_srab.c @@ -344,7 +344,7 @@ err: return ret; } -static struct b53_io_ops b53_srab_ops = { +static const struct b53_io_ops b53_srab_ops = { .read8 = b53_srab_read8, .read16 = b53_srab_read16, .read32 = b53_srab_read32, diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index b2b838724a9b..e218887f18b7 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -29,130 +29,21 @@ #include #include #include +#include #include "bcm_sf2.h" #include "bcm_sf2_regs.h" +#include "b53/b53_priv.h" +#include "b53/b53_regs.h" -/* String, offset, and register size in bytes if different from 4 bytes */ -static const struct bcm_sf2_hw_stats bcm_sf2_mib[] = { - { "TxOctets", 0x000, 8 }, - { "TxDropPkts", 0x020 }, - { "TxQPKTQ0", 0x030 }, - { "TxBroadcastPkts", 0x040 }, - { "TxMulticastPkts", 0x050 }, - { "TxUnicastPKts", 0x060 }, - { "TxCollisions", 0x070 }, - { "TxSingleCollision", 0x080 }, - { "TxMultipleCollision", 0x090 }, - { "TxDeferredCollision", 0x0a0 }, - { "TxLateCollision", 0x0b0 }, - { "TxExcessiveCollision", 0x0c0 }, - { "TxFrameInDisc", 0x0d0 }, - { "TxPausePkts", 0x0e0 }, - { "TxQPKTQ1", 0x0f0 }, - { "TxQPKTQ2", 0x100 }, - { "TxQPKTQ3", 0x110 }, - { "TxQPKTQ4", 0x120 }, - { "TxQPKTQ5", 0x130 }, - { "RxOctets", 0x140, 8 }, - { "RxUndersizePkts", 0x160 }, - { "RxPausePkts", 0x170 }, - { "RxPkts64Octets", 0x180 }, - { "RxPkts65to127Octets", 0x190 }, - { "RxPkts128to255Octets", 0x1a0 }, - { "RxPkts256to511Octets", 0x1b0 }, - { "RxPkts512to1023Octets", 0x1c0 }, - { "RxPkts1024toMaxPktsOctets", 0x1d0 }, - { "RxOversizePkts", 0x1e0 }, - { "RxJabbers", 0x1f0 }, - { "RxAlignmentErrors", 0x200 }, - { "RxFCSErrors", 0x210 }, - { "RxGoodOctets", 0x220, 8 }, - { "RxDropPkts", 0x240 }, - { "RxUnicastPkts", 0x250 }, - { "RxMulticastPkts", 0x260 }, - { "RxBroadcastPkts", 0x270 }, - { "RxSAChanges", 0x280 }, - { "RxFragments", 0x290 }, - { "RxJumboPkt", 0x2a0 }, - { "RxSymblErr", 0x2b0 }, - { "InRangeErrCount", 0x2c0 }, - { "OutRangeErrCount", 0x2d0 }, - { "EEELpiEvent", 0x2e0 }, - { "EEELpiDuration", 0x2f0 }, - { "RxDiscard", 0x300, 8 }, - { "TxQPKTQ6", 0x320 }, - { "TxQPKTQ7", 0x330 }, - { "TxPkts64Octets", 0x340 }, - { "TxPkts65to127Octets", 0x350 }, - { "TxPkts128to255Octets", 0x360 }, - { "TxPkts256to511Ocets", 0x370 }, - { "TxPkts512to1023Ocets", 0x380 }, - { "TxPkts1024toMaxPktOcets", 0x390 }, -}; - -#define BCM_SF2_STATS_SIZE ARRAY_SIZE(bcm_sf2_mib) - -static void bcm_sf2_sw_get_strings(struct dsa_switch *ds, - int port, uint8_t *data) +static enum dsa_tag_protocol bcm_sf2_sw_get_tag_protocol(struct dsa_switch *ds) { - unsigned int i; - - for (i = 0; i < BCM_SF2_STATS_SIZE; i++) - memcpy(data + i * ETH_GSTRING_LEN, - bcm_sf2_mib[i].string, ETH_GSTRING_LEN); -} - -static void bcm_sf2_sw_get_ethtool_stats(struct dsa_switch *ds, - int port, uint64_t *data) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - const struct bcm_sf2_hw_stats *s; - unsigned int i; - u64 val = 0; - u32 offset; - - mutex_lock(&priv->stats_mutex); - - /* Now fetch the per-port counters */ - for (i = 0; i < BCM_SF2_STATS_SIZE; i++) { - s = &bcm_sf2_mib[i]; - - /* Do a latched 64-bit read if needed */ - offset = s->reg + CORE_P_MIB_OFFSET(port); - if (s->sizeof_stat == 8) - val = core_readq(priv, offset); - else - val = core_readl(priv, offset); - - data[i] = (u64)val; - } - - mutex_unlock(&priv->stats_mutex); -} - -static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) -{ - return BCM_SF2_STATS_SIZE; -} - -static const char *bcm_sf2_sw_drv_probe(struct device *dsa_dev, - struct device *host_dev, int sw_addr, - void **_priv) -{ - struct bcm_sf2_priv *priv; - - priv = devm_kzalloc(dsa_dev, sizeof(*priv), GFP_KERNEL); - if (!priv) - return NULL; - *_priv = priv; - - return "Broadcom Starfighter 2"; + return DSA_TAG_PROTO_BRCM; } static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int i; u32 reg; @@ -172,7 +63,7 @@ static void bcm_sf2_imp_vlan_setup(struct dsa_switch *ds, int cpu_port) static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg, val; /* Enable the port memories */ @@ -237,7 +128,7 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port) static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg; reg = core_readl(priv, CORE_EEE_EN_CTRL); @@ -250,7 +141,7 @@ static void bcm_sf2_eee_enable_set(struct dsa_switch *ds, int port, bool enable) static void bcm_sf2_gphy_enable_set(struct dsa_switch *ds, bool enable) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 reg; reg = reg_readl(priv, REG_SPHY_CNTRL); @@ -324,7 +215,7 @@ static inline void bcm_sf2_port_intr_disable(struct bcm_sf2_priv *priv, static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->dst[ds->index].cpu_port; u32 reg; @@ -365,7 +256,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); reg &= ~PORT_VLAN_CTRL_MASK; reg |= (1 << port); - reg |= priv->port_sts[port].vlan_ctl_mask; + reg |= priv->dev->ports[port].vlan_ctl_mask; core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(port)); bcm_sf2_imp_vlan_setup(ds, cpu_port); @@ -380,7 +271,7 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port, static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 off, reg; if (priv->wol_ports_mask & (1 << port)) @@ -412,7 +303,7 @@ static void bcm_sf2_port_disable(struct dsa_switch *ds, int port, static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, struct phy_device *phy) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; int ret; @@ -430,7 +321,7 @@ static int bcm_sf2_eee_init(struct dsa_switch *ds, int port, static int bcm_sf2_sw_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; u32 reg; @@ -445,7 +336,7 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_eee *p = &priv->port_sts[port].eee; p->eee_enabled = e->eee_enabled; @@ -461,469 +352,6 @@ static int bcm_sf2_sw_set_eee(struct dsa_switch *ds, int port, return 0; } -static int bcm_sf2_fast_age_op(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 1000; - u32 reg; - - reg = core_readl(priv, CORE_FAST_AGE_CTRL); - reg |= EN_AGE_PORT | EN_AGE_VLAN | EN_AGE_DYNAMIC | FAST_AGE_STR_DONE; - core_writel(priv, reg, CORE_FAST_AGE_CTRL); - - do { - reg = core_readl(priv, CORE_FAST_AGE_CTRL); - if (!(reg & FAST_AGE_STR_DONE)) - break; - - cpu_relax(); - } while (timeout--); - - if (!timeout) - return -ETIMEDOUT; - - core_writel(priv, 0, CORE_FAST_AGE_CTRL); - - return 0; -} - -/* Fast-ageing of ARL entries for a given port, equivalent to an ARL - * flush for that port. - */ -static int bcm_sf2_sw_fast_age_port(struct dsa_switch *ds, int port) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - - core_writel(priv, port, CORE_FAST_AGE_PORT); - - return bcm_sf2_fast_age_op(priv); -} - -static int bcm_sf2_sw_fast_age_vlan(struct bcm_sf2_priv *priv, u16 vid) -{ - core_writel(priv, vid, CORE_FAST_AGE_VID); - - return bcm_sf2_fast_age_op(priv); -} - -static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 10; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL); - if (!(reg & ARLA_VTBL_STDN)) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op) -{ - core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL); - - return bcm_sf2_vlan_op_wait(priv); -} - -static void bcm_sf2_set_vlan_entry(struct bcm_sf2_priv *priv, u16 vid, - struct bcm_sf2_vlan *vlan) -{ - int ret; - - core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR); - core_writel(priv, vlan->untag << UNTAG_MAP_SHIFT | vlan->members, - CORE_ARLA_VTBL_ENTRY); - - ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_WRITE); - if (ret) - pr_err("failed to write VLAN entry\n"); -} - -static int bcm_sf2_get_vlan_entry(struct bcm_sf2_priv *priv, u16 vid, - struct bcm_sf2_vlan *vlan) -{ - u32 entry; - int ret; - - core_writel(priv, vid & VTBL_ADDR_INDEX_MASK, CORE_ARLA_VTBL_ADDR); - - ret = bcm_sf2_vlan_op(priv, ARLA_VTBL_CMD_READ); - if (ret) - return ret; - - entry = core_readl(priv, CORE_ARLA_VTBL_ENTRY); - vlan->members = entry & FWD_MAP_MASK; - vlan->untag = (entry >> UNTAG_MAP_SHIFT) & UNTAG_MAP_MASK; - - return 0; -} - -static int bcm_sf2_sw_br_join(struct dsa_switch *ds, int port, - struct net_device *bridge) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - s8 cpu_port = ds->dst->cpu_port; - unsigned int i; - u32 reg, p_ctl; - - /* Make this port leave the all VLANs join since we will have proper - * VLAN entries from now on - */ - reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN); - reg &= ~BIT(port); - if ((reg & BIT(cpu_port)) == BIT(cpu_port)) - reg &= ~BIT(cpu_port); - core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN); - - priv->port_sts[port].bridge_dev = bridge; - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - - for (i = 0; i < priv->hw_params.num_ports; i++) { - if (priv->port_sts[i].bridge_dev != bridge) - continue; - - /* Add this local port to the remote port VLAN control - * membership and update the remote port bitmask - */ - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg |= 1 << port; - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[i].vlan_ctl_mask = reg; - - p_ctl |= 1 << i; - } - - /* Configure the local port VLAN control membership to include - * remote ports and update the local port bitmask - */ - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; - - return 0; -} - -static void bcm_sf2_sw_br_leave(struct dsa_switch *ds, int port) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct net_device *bridge = priv->port_sts[port].bridge_dev; - s8 cpu_port = ds->dst->cpu_port; - unsigned int i; - u32 reg, p_ctl; - - p_ctl = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(port)); - - for (i = 0; i < priv->hw_params.num_ports; i++) { - /* Don't touch the remaining ports */ - if (priv->port_sts[i].bridge_dev != bridge) - continue; - - reg = core_readl(priv, CORE_PORT_VLAN_CTL_PORT(i)); - reg &= ~(1 << port); - core_writel(priv, reg, CORE_PORT_VLAN_CTL_PORT(i)); - priv->port_sts[port].vlan_ctl_mask = reg; - - /* Prevent self removal to preserve isolation */ - if (port != i) - p_ctl &= ~(1 << i); - } - - core_writel(priv, p_ctl, CORE_PORT_VLAN_CTL_PORT(port)); - priv->port_sts[port].vlan_ctl_mask = p_ctl; - priv->port_sts[port].bridge_dev = NULL; - - /* Make this port join all VLANs without VLAN entries */ - reg = core_readl(priv, CORE_JOIN_ALL_VLAN_EN); - reg |= BIT(port); - if (!(reg & BIT(cpu_port))) - reg |= BIT(cpu_port); - core_writel(priv, reg, CORE_JOIN_ALL_VLAN_EN); -} - -static void bcm_sf2_sw_br_set_stp_state(struct dsa_switch *ds, int port, - u8 state) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - u8 hw_state, cur_hw_state; - u32 reg; - - reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - cur_hw_state = reg & (G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); - - switch (state) { - case BR_STATE_DISABLED: - hw_state = G_MISTP_DIS_STATE; - break; - case BR_STATE_LISTENING: - hw_state = G_MISTP_LISTEN_STATE; - break; - case BR_STATE_LEARNING: - hw_state = G_MISTP_LEARN_STATE; - break; - case BR_STATE_FORWARDING: - hw_state = G_MISTP_FWD_STATE; - break; - case BR_STATE_BLOCKING: - hw_state = G_MISTP_BLOCK_STATE; - break; - default: - pr_err("%s: invalid STP state: %d\n", __func__, state); - return; - } - - /* Fast-age ARL entries if we are moving a port from Learning or - * Forwarding (cur_hw_state) state to Disabled, Blocking or Listening - * state (hw_state) - */ - if (cur_hw_state != hw_state) { - if (cur_hw_state >= G_MISTP_LEARN_STATE && - hw_state <= G_MISTP_LISTEN_STATE) { - if (bcm_sf2_sw_fast_age_port(ds, port)) { - pr_err("%s: fast-ageing failed\n", __func__); - return; - } - } - } - - reg = core_readl(priv, CORE_G_PCTL_PORT(port)); - reg &= ~(G_MISTP_STATE_MASK << G_MISTP_STATE_SHIFT); - reg |= hw_state; - core_writel(priv, reg, CORE_G_PCTL_PORT(port)); -} - -/* Address Resolution Logic routines */ -static int bcm_sf2_arl_op_wait(struct bcm_sf2_priv *priv) -{ - unsigned int timeout = 10; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_RWCTL); - if (!(reg & ARL_STRTDN)) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static int bcm_sf2_arl_rw_op(struct bcm_sf2_priv *priv, unsigned int op) -{ - u32 cmd; - - if (op > ARL_RW) - return -EINVAL; - - cmd = core_readl(priv, CORE_ARLA_RWCTL); - cmd &= ~IVL_SVL_SELECT; - cmd |= ARL_STRTDN; - if (op) - cmd |= ARL_RW; - else - cmd &= ~ARL_RW; - core_writel(priv, cmd, CORE_ARLA_RWCTL); - - return bcm_sf2_arl_op_wait(priv); -} - -static int bcm_sf2_arl_read(struct bcm_sf2_priv *priv, u64 mac, - u16 vid, struct bcm_sf2_arl_entry *ent, u8 *idx, - bool is_valid) -{ - unsigned int i; - int ret; - - ret = bcm_sf2_arl_op_wait(priv); - if (ret) - return ret; - - /* Read the 4 bins */ - for (i = 0; i < 4; i++) { - u64 mac_vid; - u32 fwd_entry; - - mac_vid = core_readq(priv, CORE_ARLA_MACVID_ENTRY(i)); - fwd_entry = core_readl(priv, CORE_ARLA_FWD_ENTRY(i)); - bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry); - - if (ent->is_valid && is_valid) { - *idx = i; - return 0; - } - - /* This is the MAC we just deleted */ - if (!is_valid && (mac_vid & mac)) - return 0; - } - - return -ENOENT; -} - -static int bcm_sf2_arl_op(struct bcm_sf2_priv *priv, int op, int port, - const unsigned char *addr, u16 vid, bool is_valid) -{ - struct bcm_sf2_arl_entry ent; - u32 fwd_entry; - u64 mac, mac_vid = 0; - u8 idx = 0; - int ret; - - /* Convert the array into a 64-bit MAC */ - mac = bcm_sf2_mac_to_u64(addr); - - /* Perform a read for the given MAC and VID */ - core_writeq(priv, mac, CORE_ARLA_MAC); - core_writel(priv, vid, CORE_ARLA_VID); - - /* Issue a read operation for this MAC */ - ret = bcm_sf2_arl_rw_op(priv, 1); - if (ret) - return ret; - - ret = bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid); - /* If this is a read, just finish now */ - if (op) - return ret; - - /* We could not find a matching MAC, so reset to a new entry */ - if (ret) { - fwd_entry = 0; - idx = 0; - } - - memset(&ent, 0, sizeof(ent)); - ent.port = port; - ent.is_valid = is_valid; - ent.vid = vid; - ent.is_static = true; - memcpy(ent.mac, addr, ETH_ALEN); - bcm_sf2_arl_from_entry(&mac_vid, &fwd_entry, &ent); - - core_writeq(priv, mac_vid, CORE_ARLA_MACVID_ENTRY(idx)); - core_writel(priv, fwd_entry, CORE_ARLA_FWD_ENTRY(idx)); - - ret = bcm_sf2_arl_rw_op(priv, 0); - if (ret) - return ret; - - /* Re-read the entry to check */ - return bcm_sf2_arl_read(priv, mac, vid, &ent, &idx, is_valid); -} - -static int bcm_sf2_sw_fdb_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - /* We do not need to do anything specific here yet */ - return 0; -} - -static void bcm_sf2_sw_fdb_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - - if (bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, true)) - pr_err("%s: failed to add MAC address\n", __func__); -} - -static int bcm_sf2_sw_fdb_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_fdb *fdb) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - - return bcm_sf2_arl_op(priv, 0, port, fdb->addr, fdb->vid, false); -} - -static int bcm_sf2_arl_search_wait(struct bcm_sf2_priv *priv) -{ - unsigned timeout = 1000; - u32 reg; - - do { - reg = core_readl(priv, CORE_ARLA_SRCH_CTL); - if (!(reg & ARLA_SRCH_STDN)) - return 0; - - if (reg & ARLA_SRCH_VLID) - return 0; - - usleep_range(1000, 2000); - } while (timeout--); - - return -ETIMEDOUT; -} - -static void bcm_sf2_arl_search_rd(struct bcm_sf2_priv *priv, u8 idx, - struct bcm_sf2_arl_entry *ent) -{ - u64 mac_vid; - u32 fwd_entry; - - mac_vid = core_readq(priv, CORE_ARLA_SRCH_RSLT_MACVID(idx)); - fwd_entry = core_readl(priv, CORE_ARLA_SRCH_RSLT(idx)); - bcm_sf2_arl_to_entry(ent, mac_vid, fwd_entry); -} - -static int bcm_sf2_sw_fdb_copy(struct net_device *dev, int port, - const struct bcm_sf2_arl_entry *ent, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) -{ - if (!ent->is_valid) - return 0; - - if (port != ent->port) - return 0; - - ether_addr_copy(fdb->addr, ent->mac); - fdb->vid = ent->vid; - fdb->ndm_state = ent->is_static ? NUD_NOARP : NUD_REACHABLE; - - return cb(&fdb->obj); -} - -static int bcm_sf2_sw_fdb_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct net_device *dev = ds->ports[port].netdev; - struct bcm_sf2_arl_entry results[2]; - unsigned int count = 0; - int ret; - - /* Start search operation */ - core_writel(priv, ARLA_SRCH_STDN, CORE_ARLA_SRCH_CTL); - - do { - ret = bcm_sf2_arl_search_wait(priv); - if (ret) - return ret; - - /* Read both entries, then return their values back */ - bcm_sf2_arl_search_rd(priv, 0, &results[0]); - ret = bcm_sf2_sw_fdb_copy(dev, port, &results[0], fdb, cb); - if (ret) - return ret; - - bcm_sf2_arl_search_rd(priv, 1, &results[1]); - ret = bcm_sf2_sw_fdb_copy(dev, port, &results[1], fdb, cb); - if (ret) - return ret; - - if (!results[0].is_valid && !results[1].is_valid) - break; - - } while (count++ < CORE_ARLA_NUM_ENTRIES); - - return 0; -} - static int bcm_sf2_sw_indir_rw(struct bcm_sf2_priv *priv, int op, int addr, int regnum, u16 val) { @@ -1036,12 +464,10 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv) static void bcm_sf2_intr_disable(struct bcm_sf2_priv *priv) { - intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_0_mask_set(priv, 0xffffffff); intrl2_0_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_0_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); - intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_MASK_SET); + intrl2_1_mask_set(priv, 0xffffffff); intrl2_1_writel(priv, 0xffffffff, INTRL2_CPU_CLEAR); - intrl2_1_writel(priv, 0, INTRL2_CPU_MASK_CLEAR); } static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv, @@ -1082,7 +508,7 @@ static void bcm_sf2_identify_ports(struct bcm_sf2_priv *priv, static int bcm_sf2_mdio_register(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct device_node *dn; static int index; int err; @@ -1146,14 +572,9 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) of_node_put(priv->master_mii_dn); } -static int bcm_sf2_sw_set_addr(struct dsa_switch *ds, u8 *addr) -{ - return 0; -} - static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); /* The BCM7xxx PHY driver expects to find the integrated PHY revision * in bits 15:8 and the patch level in bits 7:0 which is exactly what @@ -1166,7 +587,7 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port) static void bcm_sf2_sw_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 id_mode_dis = 0, port_mode; const char *str = NULL; u32 reg; @@ -1246,7 +667,7 @@ force_link: static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, struct fixed_phy_status *status) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); u32 duplex, pause; u32 reg; @@ -1298,7 +719,7 @@ static void bcm_sf2_sw_fixed_link_update(struct dsa_switch *ds, int port, static int bcm_sf2_sw_suspend(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; bcm_sf2_intr_disable(priv); @@ -1318,7 +739,7 @@ static int bcm_sf2_sw_suspend(struct dsa_switch *ds) static int bcm_sf2_sw_resume(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; int ret; @@ -1345,7 +766,7 @@ static void bcm_sf2_sw_get_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { struct net_device *p = ds->dst[ds->index].master_netdev; - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); struct ethtool_wolinfo pwol; /* Get the parent device WoL settings */ @@ -1368,7 +789,7 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, struct ethtool_wolinfo *wol) { struct net_device *p = ds->dst[ds->index].master_netdev; - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); s8 cpu_port = ds->dst[ds->index].cpu_port; struct ethtool_wolinfo pwol; @@ -1393,43 +814,32 @@ static int bcm_sf2_sw_set_wol(struct dsa_switch *ds, int port, return p->ethtool_ops->set_wol(p, wol); } -static void bcm_sf2_enable_vlan(struct bcm_sf2_priv *priv, bool enable) +static int bcm_sf2_vlan_op_wait(struct bcm_sf2_priv *priv) { - u32 mgmt, vc0, vc1, vc4, vc5; + unsigned int timeout = 10; + u32 reg; - mgmt = core_readl(priv, CORE_SWMODE); - vc0 = core_readl(priv, CORE_VLAN_CTRL0); - vc1 = core_readl(priv, CORE_VLAN_CTRL1); - vc4 = core_readl(priv, CORE_VLAN_CTRL4); - vc5 = core_readl(priv, CORE_VLAN_CTRL5); + do { + reg = core_readl(priv, CORE_ARLA_VTBL_RWCTRL); + if (!(reg & ARLA_VTBL_STDN)) + return 0; - mgmt &= ~SW_FWDG_MODE; + usleep_range(1000, 2000); + } while (timeout--); - if (enable) { - vc0 |= VLAN_EN | VLAN_LEARN_MODE_IVL; - vc1 |= EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP; - vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT); - vc4 |= INGR_VID_CHK_DROP; - vc5 |= DROP_VTABLE_MISS | EN_VID_FFF_FWD; - } else { - vc0 &= ~(VLAN_EN | VLAN_LEARN_MODE_IVL); - vc1 &= ~(EN_RSV_MCAST_UNTAG | EN_RSV_MCAST_FWDMAP); - vc4 &= ~(INGR_VID_CHK_MASK << INGR_VID_CHK_SHIFT); - vc5 &= ~(DROP_VTABLE_MISS | EN_VID_FFF_FWD); - vc4 |= INGR_VID_CHK_VID_VIOL_IMP; - } + return -ETIMEDOUT; +} - core_writel(priv, vc0, CORE_VLAN_CTRL0); - core_writel(priv, vc1, CORE_VLAN_CTRL1); - core_writel(priv, 0, CORE_VLAN_CTRL3); - core_writel(priv, vc4, CORE_VLAN_CTRL4); - core_writel(priv, vc5, CORE_VLAN_CTRL5); - core_writel(priv, mgmt, CORE_SWMODE); +static int bcm_sf2_vlan_op(struct bcm_sf2_priv *priv, u8 op) +{ + core_writel(priv, ARLA_VTBL_STDN | op, CORE_ARLA_VTBL_RWCTRL); + + return bcm_sf2_vlan_op_wait(priv); } static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) { - struct bcm_sf2_priv *priv = ds_to_priv(ds); + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; /* Clear all VLANs */ @@ -1443,162 +853,199 @@ static void bcm_sf2_sw_configure_vlan(struct dsa_switch *ds) } } -static int bcm_sf2_sw_vlan_filtering(struct dsa_switch *ds, int port, - bool vlan_filtering) -{ - return 0; -} - -static int bcm_sf2_sw_vlan_prepare(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - - bcm_sf2_enable_vlan(priv, true); - - return 0; -} - -static void bcm_sf2_sw_vlan_add(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan, - struct switchdev_trans *trans) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; - s8 cpu_port = ds->dst->cpu_port; - struct bcm_sf2_vlan *vl; - u16 vid; - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &priv->vlans[vid]; - - bcm_sf2_get_vlan_entry(priv, vid, vl); - - vl->members |= BIT(port) | BIT(cpu_port); - if (untagged) - vl->untag |= BIT(port) | BIT(cpu_port); - else - vl->untag &= ~(BIT(port) | BIT(cpu_port)); - - bcm_sf2_set_vlan_entry(priv, vid, vl); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } - - if (pvid) { - core_writel(priv, vlan->vid_end, CORE_DEFAULT_1Q_TAG_P(port)); - core_writel(priv, vlan->vid_end, - CORE_DEFAULT_1Q_TAG_P(cpu_port)); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } -} - -static int bcm_sf2_sw_vlan_del(struct dsa_switch *ds, int port, - const struct switchdev_obj_port_vlan *vlan) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; - s8 cpu_port = ds->dst->cpu_port; - struct bcm_sf2_vlan *vl; - u16 vid, pvid; - int ret; - - pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port)); - - for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) { - vl = &priv->vlans[vid]; - - ret = bcm_sf2_get_vlan_entry(priv, vid, vl); - if (ret) - return ret; - - vl->members &= ~BIT(port); - if ((vl->members & BIT(cpu_port)) == BIT(cpu_port)) - vl->members = 0; - if (pvid == vid) - pvid = 0; - if (untagged) { - vl->untag &= ~BIT(port); - if ((vl->untag & BIT(port)) == BIT(cpu_port)) - vl->untag = 0; - } - - bcm_sf2_set_vlan_entry(priv, vid, vl); - bcm_sf2_sw_fast_age_vlan(priv, vid); - } - - core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(port)); - core_writel(priv, pvid, CORE_DEFAULT_1Q_TAG_P(cpu_port)); - bcm_sf2_sw_fast_age_vlan(priv, vid); - - return 0; -} - -static int bcm_sf2_sw_vlan_dump(struct dsa_switch *ds, int port, - struct switchdev_obj_port_vlan *vlan, - int (*cb)(struct switchdev_obj *obj)) -{ - struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct bcm_sf2_port_status *p = &priv->port_sts[port]; - struct bcm_sf2_vlan *vl; - u16 vid, pvid; - int err = 0; - - pvid = core_readl(priv, CORE_DEFAULT_1Q_TAG_P(port)); - - for (vid = 0; vid < VLAN_N_VID; vid++) { - vl = &priv->vlans[vid]; - - if (!(vl->members & BIT(port))) - continue; - - vlan->vid_begin = vlan->vid_end = vid; - vlan->flags = 0; - - if (vl->untag & BIT(port)) - vlan->flags |= BRIDGE_VLAN_INFO_UNTAGGED; - if (p->pvid == vid) - vlan->flags |= BRIDGE_VLAN_INFO_PVID; - - err = cb(&vlan->obj); - if (err) - break; - } - - return err; -} - static int bcm_sf2_sw_setup(struct dsa_switch *ds) { - const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; - struct bcm_sf2_priv *priv = ds_to_priv(ds); - struct device_node *dn; - void __iomem **base; + struct bcm_sf2_priv *priv = bcm_sf2_to_priv(ds); unsigned int port; + + /* Enable all valid ports and disable those unused */ + for (port = 0; port < priv->hw_params.num_ports; port++) { + /* IMP port receives special treatment */ + if ((1 << port) & ds->enabled_port_mask) + bcm_sf2_port_setup(ds, port, NULL); + else if (dsa_is_cpu_port(ds, port)) + bcm_sf2_imp_setup(ds, port); + else + bcm_sf2_port_disable(ds, port, NULL); + } + + bcm_sf2_sw_configure_vlan(ds); + + return 0; +} + +/* The SWITCH_CORE register space is managed by b53 but operates on a page + + * register basis so we need to translate that into an address that the + * bus-glue understands. + */ +#define SF2_PAGE_REG_MKADDR(page, reg) ((page) << 10 | (reg) << 2) + +static int bcm_sf2_core_read8(struct b53_device *dev, u8 page, u8 reg, + u8 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read16(struct b53_device *dev, u8 page, u8 reg, + u16 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read32(struct b53_device *dev, u8 page, u8 reg, + u32 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readl(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_read64(struct b53_device *dev, u8 page, u8 reg, + u64 *val) +{ + struct bcm_sf2_priv *priv = dev->priv; + + *val = core_readq(priv, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write8(struct b53_device *dev, u8 page, u8 reg, + u8 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write16(struct b53_device *dev, u8 page, u8 reg, + u16 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write32(struct b53_device *dev, u8 page, u8 reg, + u32 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writel(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static int bcm_sf2_core_write64(struct b53_device *dev, u8 page, u8 reg, + u64 value) +{ + struct bcm_sf2_priv *priv = dev->priv; + + core_writeq(priv, value, SF2_PAGE_REG_MKADDR(page, reg)); + + return 0; +} + +static struct b53_io_ops bcm_sf2_io_ops = { + .read8 = bcm_sf2_core_read8, + .read16 = bcm_sf2_core_read16, + .read32 = bcm_sf2_core_read32, + .read48 = bcm_sf2_core_read64, + .read64 = bcm_sf2_core_read64, + .write8 = bcm_sf2_core_write8, + .write16 = bcm_sf2_core_write16, + .write32 = bcm_sf2_core_write32, + .write48 = bcm_sf2_core_write64, + .write64 = bcm_sf2_core_write64, +}; + +static int bcm_sf2_sw_probe(struct platform_device *pdev) +{ + const char *reg_names[BCM_SF2_REGS_NUM] = BCM_SF2_REGS_NAME; + struct device_node *dn = pdev->dev.of_node; + struct b53_platform_data *pdata; + struct bcm_sf2_priv *priv; + struct b53_device *dev; + struct dsa_switch *ds; + void __iomem **base; + struct resource *r; unsigned int i; u32 reg, rev; int ret; + priv = devm_kzalloc(&pdev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + dev = b53_switch_alloc(&pdev->dev, &bcm_sf2_io_ops, priv); + if (!dev) + return -ENOMEM; + + pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL); + if (!pdata) + return -ENOMEM; + + /* Auto-detection using standard registers will not work, so + * provide an indication of what kind of device we are for + * b53_common to work with + */ + pdata->chip_id = BCM7445_DEVICE_ID; + dev->pdata = pdata; + + priv->dev = dev; + ds = dev->ds; + + /* Override the parts that are non-standard wrt. normal b53 devices */ + ds->ops->get_tag_protocol = bcm_sf2_sw_get_tag_protocol; + ds->ops->setup = bcm_sf2_sw_setup; + ds->ops->get_phy_flags = bcm_sf2_sw_get_phy_flags; + ds->ops->adjust_link = bcm_sf2_sw_adjust_link; + ds->ops->fixed_link_update = bcm_sf2_sw_fixed_link_update; + ds->ops->suspend = bcm_sf2_sw_suspend; + ds->ops->resume = bcm_sf2_sw_resume; + ds->ops->get_wol = bcm_sf2_sw_get_wol; + ds->ops->set_wol = bcm_sf2_sw_set_wol; + ds->ops->port_enable = bcm_sf2_port_setup; + ds->ops->port_disable = bcm_sf2_port_disable; + ds->ops->get_eee = bcm_sf2_sw_get_eee; + ds->ops->set_eee = bcm_sf2_sw_set_eee; + + /* Avoid having DSA free our slave MDIO bus (checking for + * ds->slave_mii_bus and ds->ops->phy_read being non-NULL) + */ + ds->ops->phy_read = NULL; + + dev_set_drvdata(&pdev->dev, priv); + spin_lock_init(&priv->indir_lock); mutex_init(&priv->stats_mutex); - /* All the interesting properties are at the parent device_node - * level - */ - dn = ds->cd->of_node->parent; - bcm_sf2_identify_ports(priv, ds->cd->of_node); + bcm_sf2_identify_ports(priv, dn->child); priv->irq0 = irq_of_parse_and_map(dn, 0); priv->irq1 = irq_of_parse_and_map(dn, 1); base = &priv->core; for (i = 0; i < BCM_SF2_REGS_NUM; i++) { - *base = of_iomap(dn, i); - if (*base == NULL) { + r = platform_get_resource(pdev, IORESOURCE_MEM, i); + *base = devm_ioremap_resource(&pdev->dev, r); + if (IS_ERR(*base)) { pr_err("unable to find register: %s\n", reg_names[i]); - ret = -ENOMEM; - goto out_unmap; + return PTR_ERR(*base); } base++; } @@ -1606,30 +1053,30 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) ret = bcm_sf2_sw_rst(priv); if (ret) { pr_err("unable to software reset switch: %d\n", ret); - goto out_unmap; + return ret; } ret = bcm_sf2_mdio_register(ds); if (ret) { pr_err("failed to register MDIO bus\n"); - goto out_unmap; + return ret; } /* Disable all interrupts and request them */ bcm_sf2_intr_disable(priv); - ret = request_irq(priv->irq0, bcm_sf2_switch_0_isr, 0, - "switch_0", priv); + ret = devm_request_irq(&pdev->dev, priv->irq0, bcm_sf2_switch_0_isr, 0, + "switch_0", priv); if (ret < 0) { pr_err("failed to request switch_0 IRQ\n"); goto out_mdio; } - ret = request_irq(priv->irq1, bcm_sf2_switch_1_isr, 0, - "switch_1", priv); + ret = devm_request_irq(&pdev->dev, priv->irq1, bcm_sf2_switch_1_isr, 0, + "switch_1", priv); if (ret < 0) { pr_err("failed to request switch_1 IRQ\n"); - goto out_free_irq0; + goto out_mdio; } /* Reset the MIB counters */ @@ -1649,19 +1096,6 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) &priv->hw_params.num_gphy)) priv->hw_params.num_gphy = 1; - /* Enable all valid ports and disable those unused */ - for (port = 0; port < priv->hw_params.num_ports; port++) { - /* IMP port receives special treatment */ - if ((1 << port) & ds->enabled_port_mask) - bcm_sf2_port_setup(ds, port, NULL); - else if (dsa_is_cpu_port(ds, port)) - bcm_sf2_imp_setup(ds, port); - else - bcm_sf2_port_disable(ds, port, NULL); - } - - bcm_sf2_sw_configure_vlan(ds); - rev = reg_readl(priv, REG_SWITCH_REVISION); priv->hw_params.top_rev = (rev >> SWITCH_TOP_REV_SHIFT) & SWITCH_TOP_REV_MASK; @@ -1670,6 +1104,10 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) rev = reg_readl(priv, REG_PHY_REVISION); priv->hw_params.gphy_rev = rev & PHY_REVISION_MASK; + ret = b53_switch_register(dev); + if (ret) + goto out_mdio; + pr_info("Starfighter 2 top: %x.%02x, core: %x.%02x base: 0x%p, IRQs: %d, %d\n", priv->hw_params.top_rev >> 8, priv->hw_params.top_rev & 0xff, priv->hw_params.core_rev >> 8, priv->hw_params.core_rev & 0xff, @@ -1677,66 +1115,60 @@ static int bcm_sf2_sw_setup(struct dsa_switch *ds) return 0; -out_free_irq0: - free_irq(priv->irq0, priv); out_mdio: bcm_sf2_mdio_unregister(priv); -out_unmap: - base = &priv->core; - for (i = 0; i < BCM_SF2_REGS_NUM; i++) { - if (*base) - iounmap(*base); - base++; - } return ret; } -static struct dsa_switch_driver bcm_sf2_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_BRCM, - .probe = bcm_sf2_sw_drv_probe, - .setup = bcm_sf2_sw_setup, - .set_addr = bcm_sf2_sw_set_addr, - .get_phy_flags = bcm_sf2_sw_get_phy_flags, - .get_strings = bcm_sf2_sw_get_strings, - .get_ethtool_stats = bcm_sf2_sw_get_ethtool_stats, - .get_sset_count = bcm_sf2_sw_get_sset_count, - .adjust_link = bcm_sf2_sw_adjust_link, - .fixed_link_update = bcm_sf2_sw_fixed_link_update, - .suspend = bcm_sf2_sw_suspend, - .resume = bcm_sf2_sw_resume, - .get_wol = bcm_sf2_sw_get_wol, - .set_wol = bcm_sf2_sw_set_wol, - .port_enable = bcm_sf2_port_setup, - .port_disable = bcm_sf2_port_disable, - .get_eee = bcm_sf2_sw_get_eee, - .set_eee = bcm_sf2_sw_set_eee, - .port_bridge_join = bcm_sf2_sw_br_join, - .port_bridge_leave = bcm_sf2_sw_br_leave, - .port_stp_state_set = bcm_sf2_sw_br_set_stp_state, - .port_fdb_prepare = bcm_sf2_sw_fdb_prepare, - .port_fdb_add = bcm_sf2_sw_fdb_add, - .port_fdb_del = bcm_sf2_sw_fdb_del, - .port_fdb_dump = bcm_sf2_sw_fdb_dump, - .port_vlan_filtering = bcm_sf2_sw_vlan_filtering, - .port_vlan_prepare = bcm_sf2_sw_vlan_prepare, - .port_vlan_add = bcm_sf2_sw_vlan_add, - .port_vlan_del = bcm_sf2_sw_vlan_del, - .port_vlan_dump = bcm_sf2_sw_vlan_dump, -}; - -static int __init bcm_sf2_init(void) +static int bcm_sf2_sw_remove(struct platform_device *pdev) { - register_switch_driver(&bcm_sf2_switch_driver); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + + /* Disable all ports and interrupts */ + priv->wol_ports_mask = 0; + bcm_sf2_sw_suspend(priv->dev->ds); + dsa_unregister_switch(priv->dev->ds); + bcm_sf2_mdio_unregister(priv); return 0; } -module_init(bcm_sf2_init); -static void __exit bcm_sf2_exit(void) +#ifdef CONFIG_PM_SLEEP +static int bcm_sf2_suspend(struct device *dev) { - unregister_switch_driver(&bcm_sf2_switch_driver); + struct platform_device *pdev = to_platform_device(dev); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + + return dsa_switch_suspend(priv->dev->ds); } -module_exit(bcm_sf2_exit); + +static int bcm_sf2_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct bcm_sf2_priv *priv = platform_get_drvdata(pdev); + + return dsa_switch_resume(priv->dev->ds); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(bcm_sf2_pm_ops, + bcm_sf2_suspend, bcm_sf2_resume); + +static const struct of_device_id bcm_sf2_of_match[] = { + { .compatible = "brcm,bcm7445-switch-v4.0" }, + { /* sentinel */ }, +}; + +static struct platform_driver bcm_sf2_driver = { + .probe = bcm_sf2_sw_probe, + .remove = bcm_sf2_sw_remove, + .driver = { + .name = "brcm-sf2", + .of_match_table = bcm_sf2_of_match, + .pm = &bcm_sf2_pm_ops, + }, +}; +module_platform_driver(bcm_sf2_driver); MODULE_AUTHOR("Broadcom Corporation"); MODULE_DESCRIPTION("Driver for Broadcom Starfighter 2 ethernet switch chip"); diff --git a/drivers/net/dsa/bcm_sf2.h b/drivers/net/dsa/bcm_sf2.h index dd446e466699..44692673e1d5 100644 --- a/drivers/net/dsa/bcm_sf2.h +++ b/drivers/net/dsa/bcm_sf2.h @@ -26,6 +26,7 @@ #include #include "bcm_sf2_regs.h" +#include "b53/b53_priv.h" struct bcm_sf2_hw_params { u16 top_rev; @@ -49,72 +50,8 @@ struct bcm_sf2_port_status { unsigned int link; struct ethtool_eee eee; - - u32 vlan_ctl_mask; - u16 pvid; - - struct net_device *bridge_dev; }; -struct bcm_sf2_arl_entry { - u8 port; - u8 mac[ETH_ALEN]; - u16 vid; - u8 is_valid:1; - u8 is_age:1; - u8 is_static:1; -}; - -struct bcm_sf2_vlan { - u16 members; - u16 untag; -}; - -static inline void bcm_sf2_mac_from_u64(u64 src, u8 *dst) -{ - unsigned int i; - - for (i = 0; i < ETH_ALEN; i++) - dst[ETH_ALEN - 1 - i] = (src >> (8 * i)) & 0xff; -} - -static inline u64 bcm_sf2_mac_to_u64(const u8 *src) -{ - unsigned int i; - u64 dst = 0; - - for (i = 0; i < ETH_ALEN; i++) - dst |= (u64)src[ETH_ALEN - 1 - i] << (8 * i); - - return dst; -} - -static inline void bcm_sf2_arl_to_entry(struct bcm_sf2_arl_entry *ent, - u64 mac_vid, u32 fwd_entry) -{ - memset(ent, 0, sizeof(*ent)); - ent->port = fwd_entry & PORTID_MASK; - ent->is_valid = !!(fwd_entry & ARL_VALID); - ent->is_age = !!(fwd_entry & ARL_AGE); - ent->is_static = !!(fwd_entry & ARL_STATIC); - bcm_sf2_mac_from_u64(mac_vid, ent->mac); - ent->vid = mac_vid >> VID_SHIFT; -} - -static inline void bcm_sf2_arl_from_entry(u64 *mac_vid, u32 *fwd_entry, - const struct bcm_sf2_arl_entry *ent) -{ - *mac_vid = bcm_sf2_mac_to_u64(ent->mac); - *mac_vid |= (u64)(ent->vid & VID_MASK) << VID_SHIFT; - *fwd_entry = ent->port & PORTID_MASK; - if (ent->is_valid) - *fwd_entry |= ARL_VALID; - if (ent->is_static) - *fwd_entry |= ARL_STATIC; - if (ent->is_age) - *fwd_entry |= ARL_AGE; -} - struct bcm_sf2_priv { /* Base registers, keep those in order with BCM_SF2_REGS_NAME */ void __iomem *core; @@ -134,6 +71,9 @@ struct bcm_sf2_priv { u32 irq1_stat; u32 irq1_mask; + /* Backing b53_device */ + struct b53_device *dev; + /* Mutex protecting access to the MIB counters */ struct mutex stats_mutex; @@ -155,16 +95,14 @@ struct bcm_sf2_priv { struct device_node *master_mii_dn; struct mii_bus *slave_mii_bus; struct mii_bus *master_mii_bus; - - /* Cache of programmed VLANs */ - struct bcm_sf2_vlan vlans[VLAN_N_VID]; }; -struct bcm_sf2_hw_stats { - const char *string; - u16 reg; - u8 sizeof_stat; -}; +static inline struct bcm_sf2_priv *bcm_sf2_to_priv(struct dsa_switch *ds) +{ + struct b53_device *dev = ds->priv; + + return dev->priv; +} #define SF2_IO_MACRO(name) \ static inline u32 name##_readl(struct bcm_sf2_priv *priv, u32 off) \ diff --git a/drivers/net/dsa/bcm_sf2_regs.h b/drivers/net/dsa/bcm_sf2_regs.h index 9f2a9cb42074..838fe373cd6f 100644 --- a/drivers/net/dsa/bcm_sf2_regs.h +++ b/drivers/net/dsa/bcm_sf2_regs.h @@ -115,14 +115,6 @@ #define RX_BCST_EN (1 << 2) #define RX_MCST_EN (1 << 3) #define RX_UCST_EN (1 << 4) -#define G_MISTP_STATE_SHIFT 5 -#define G_MISTP_NO_STP (0 << G_MISTP_STATE_SHIFT) -#define G_MISTP_DIS_STATE (1 << G_MISTP_STATE_SHIFT) -#define G_MISTP_BLOCK_STATE (2 << G_MISTP_STATE_SHIFT) -#define G_MISTP_LISTEN_STATE (3 << G_MISTP_STATE_SHIFT) -#define G_MISTP_LEARN_STATE (4 << G_MISTP_STATE_SHIFT) -#define G_MISTP_FWD_STATE (5 << G_MISTP_STATE_SHIFT) -#define G_MISTP_STATE_MASK 0x7 #define CORE_SWMODE 0x0002c #define SW_FWDG_MODE (1 << 0) @@ -205,75 +197,11 @@ #define BRCM_HDR_EN_P5 (1 << 1) #define BRCM_HDR_EN_P7 (1 << 2) -#define CORE_BRCM_HDR_CTRL2 0x0828 - -#define CORE_HL_PRTC_CTRL 0x0940 -#define ARP_EN (1 << 0) -#define RARP_EN (1 << 1) -#define DHCP_EN (1 << 2) -#define ICMPV4_EN (1 << 3) -#define ICMPV6_EN (1 << 4) -#define ICMPV6_FWD_MODE (1 << 5) -#define IGMP_DIP_EN (1 << 8) -#define IGMP_RPTLVE_EN (1 << 9) -#define IGMP_RTPLVE_FWD_MODE (1 << 10) -#define IGMP_QRY_EN (1 << 11) -#define IGMP_QRY_FWD_MODE (1 << 12) -#define IGMP_UKN_EN (1 << 13) -#define IGMP_UKN_FWD_MODE (1 << 14) -#define MLD_RPTDONE_EN (1 << 15) -#define MLD_RPTDONE_FWD_MODE (1 << 16) -#define MLD_QRY_EN (1 << 17) -#define MLD_QRY_FWD_MODE (1 << 18) - #define CORE_RST_MIB_CNT_EN 0x0950 #define CORE_BRCM_HDR_RX_DIS 0x0980 #define CORE_BRCM_HDR_TX_DIS 0x0988 -#define CORE_ARLA_NUM_ENTRIES 1024 - -#define CORE_ARLA_RWCTL 0x1400 -#define ARL_RW (1 << 0) -#define IVL_SVL_SELECT (1 << 6) -#define ARL_STRTDN (1 << 7) - -#define CORE_ARLA_MAC 0x1408 -#define CORE_ARLA_VID 0x1420 -#define ARLA_VIDTAB_INDX_MASK 0x1fff - -#define CORE_ARLA_MACVID0 0x1440 -#define MAC_MASK 0xffffffffff -#define VID_SHIFT 48 -#define VID_MASK 0xfff - -#define CORE_ARLA_FWD_ENTRY0 0x1460 -#define PORTID_MASK 0x1ff -#define ARL_CON_SHIFT 9 -#define ARL_CON_MASK 0x3 -#define ARL_PRI_SHIFT 11 -#define ARL_PRI_MASK 0x7 -#define ARL_AGE (1 << 14) -#define ARL_STATIC (1 << 15) -#define ARL_VALID (1 << 16) - -#define CORE_ARLA_MACVID_ENTRY(x) (CORE_ARLA_MACVID0 + ((x) * 0x40)) -#define CORE_ARLA_FWD_ENTRY(x) (CORE_ARLA_FWD_ENTRY0 + ((x) * 0x40)) - -#define CORE_ARLA_SRCH_CTL 0x1540 -#define ARLA_SRCH_VLID (1 << 0) -#define IVL_SVL_SELECT (1 << 6) -#define ARLA_SRCH_STDN (1 << 7) - -#define CORE_ARLA_SRCH_ADR 0x1544 -#define ARLA_SRCH_ADR_VALID (1 << 15) - -#define CORE_ARLA_SRCH_RSLT_0_MACVID 0x1580 -#define CORE_ARLA_SRCH_RSLT_0 0x15a0 - -#define CORE_ARLA_SRCH_RSLT_MACVID(x) (CORE_ARLA_SRCH_RSLT_0_MACVID + ((x) * 0x40)) -#define CORE_ARLA_SRCH_RSLT(x) (CORE_ARLA_SRCH_RSLT_0 + ((x) * 0x40)) - #define CORE_ARLA_VTBL_RWCTRL 0x1600 #define ARLA_VTBL_CMD_WRITE 0 #define ARLA_VTBL_CMD_READ 1 @@ -297,59 +225,9 @@ #define P_TXQ_PSM_VDD(x) (P_TXQ_PSM_VDD_MASK << \ ((x) * P_TXQ_PSM_VDD_SHIFT)) -#define CORE_P0_MIB_OFFSET 0x8000 -#define P_MIB_SIZE 0x400 -#define CORE_P_MIB_OFFSET(x) (CORE_P0_MIB_OFFSET + (x) * P_MIB_SIZE) - #define CORE_PORT_VLAN_CTL_PORT(x) (0xc400 + ((x) * 0x8)) #define PORT_VLAN_CTRL_MASK 0x1ff -#define CORE_VLAN_CTRL0 0xd000 -#define CHANGE_1P_VID_INNER (1 << 0) -#define CHANGE_1P_VID_OUTER (1 << 1) -#define CHANGE_1Q_VID (1 << 3) -#define VLAN_LEARN_MODE_SVL (0 << 5) -#define VLAN_LEARN_MODE_IVL (3 << 5) -#define VLAN_EN (1 << 7) - -#define CORE_VLAN_CTRL1 0xd004 -#define EN_RSV_MCAST_FWDMAP (1 << 2) -#define EN_RSV_MCAST_UNTAG (1 << 3) -#define EN_IPMC_BYPASS_FWDMAP (1 << 5) -#define EN_IPMC_BYPASS_UNTAG (1 << 6) - -#define CORE_VLAN_CTRL2 0xd008 -#define EN_MIIM_BYPASS_V_FWDMAP (1 << 2) -#define EN_GMRP_GVRP_V_FWDMAP (1 << 5) -#define EN_GMRP_GVRP_UNTAG_MAP (1 << 6) - -#define CORE_VLAN_CTRL3 0xd00c -#define EN_DROP_NON1Q_MASK 0x1ff - -#define CORE_VLAN_CTRL4 0xd014 -#define RESV_MCAST_FLOOD (1 << 1) -#define EN_DOUBLE_TAG_MASK 0x3 -#define EN_DOUBLE_TAG_SHIFT 2 -#define EN_MGE_REV_GMRP (1 << 4) -#define EN_MGE_REV_GVRP (1 << 5) -#define INGR_VID_CHK_SHIFT 6 -#define INGR_VID_CHK_MASK 0x3 -#define INGR_VID_CHK_FWD (0 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_DROP (1 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_NO_CHK (2 << INGR_VID_CHK_SHIFT) -#define INGR_VID_CHK_VID_VIOL_IMP (3 << INGR_VID_CHK_SHIFT) - -#define CORE_VLAN_CTRL5 0xd018 -#define EN_CPU_RX_BYP_INNER_CRCCHCK (1 << 0) -#define EN_VID_FFF_FWD (1 << 2) -#define DROP_VTABLE_MISS (1 << 3) -#define EGRESS_DIR_FRM_BYP_TRUNK_EN (1 << 4) -#define PRESV_NON1Q (1 << 6) - -#define CORE_VLAN_CTRL6 0xd01c -#define STRICT_SFD_DETECT (1 << 0) -#define DIS_ARL_BUST_LMIT (1 << 4) - #define CORE_DEFAULT_1Q_TAG_P(x) (0xd040 + ((x) * 8)) #define CFI_SHIFT 12 #define PRI_SHIFT 13 diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index e36b40886bd8..7ce36dbd9b62 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -19,7 +19,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_read_nested(priv->bus, priv->sw_addr + addr, reg); } @@ -37,7 +37,7 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - struct mv88e6060_priv *priv = ds_to_priv(ds); + struct mv88e6060_priv *priv = ds->priv; return mdiobus_write_nested(priv->bus, priv->sw_addr + addr, reg, val); } @@ -69,6 +69,11 @@ static const char *mv88e6060_get_name(struct mii_bus *bus, int sw_addr) return NULL; } +static enum dsa_tag_protocol mv88e6060_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_TRAILER; +} + static const char *mv88e6060_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **_priv) @@ -247,8 +252,8 @@ mv88e6060_phy_write(struct dsa_switch *ds, int port, int regnum, u16 val) return reg_write(ds, addr, regnum, val); } -static struct dsa_switch_driver mv88e6060_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_TRAILER, +static struct dsa_switch_ops mv88e6060_switch_ops = { + .get_tag_protocol = mv88e6060_get_tag_protocol, .probe = mv88e6060_drv_probe, .setup = mv88e6060_setup, .set_addr = mv88e6060_set_addr, @@ -258,14 +263,14 @@ static struct dsa_switch_driver mv88e6060_switch_driver = { static int __init mv88e6060_init(void) { - register_switch_driver(&mv88e6060_switch_driver); + register_switch_driver(&mv88e6060_switch_ops); return 0; } module_init(mv88e6060_init); static void __exit mv88e6060_cleanup(void) { - unregister_switch_driver(&mv88e6060_switch_driver); + unregister_switch_driver(&mv88e6060_switch_ops); } module_exit(mv88e6060_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/Kconfig b/drivers/net/dsa/mv88e6xxx/Kconfig index 490bc06f993e..486668813e15 100644 --- a/drivers/net/dsa/mv88e6xxx/Kconfig +++ b/drivers/net/dsa/mv88e6xxx/Kconfig @@ -2,6 +2,18 @@ config NET_DSA_MV88E6XXX tristate "Marvell 88E6xxx Ethernet switch fabric support" depends on NET_DSA select NET_DSA_TAG_EDSA + select NET_DSA_TAG_DSA help This driver adds support for most of the Marvell 88E6xxx models of Ethernet switch chips, except 88E6060. + +config NET_DSA_MV88E6XXX_GLOBAL2 + bool "Switch Global 2 Registers support" + default y + depends on NET_DSA_MV88E6XXX + help + This registers set at internal SMI address 0x1C provides extended + features like EEPROM interface, trunking, cross-chip setup, etc. + + It is required on most chips. If the chip you compile the support for + doesn't have such registers set, say N here. In doubt, say Y. diff --git a/drivers/net/dsa/mv88e6xxx/Makefile b/drivers/net/dsa/mv88e6xxx/Makefile index 6e29a75ee2f7..10ce820daa48 100644 --- a/drivers/net/dsa/mv88e6xxx/Makefile +++ b/drivers/net/dsa/mv88e6xxx/Makefile @@ -1 +1,4 @@ -obj-$(CONFIG_NET_DSA_MV88E6XXX) += chip.o +obj-$(CONFIG_NET_DSA_MV88E6XXX) += mv88e6xxx.o +mv88e6xxx-objs := chip.o +mv88e6xxx-objs += global1.o +mv88e6xxx-$(CONFIG_NET_DSA_MV88E6XXX_GLOBAL2) += global2.o diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 710679067594..883fd9809dd2 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -29,7 +29,10 @@ #include #include #include + #include "mv88e6xxx.h" +#include "global1.h" +#include "global2.h" static void assert_reg_lock(struct mv88e6xxx_chip *chip) { @@ -95,7 +98,7 @@ static int mv88e6xxx_smi_single_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_single_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_single_chip_ops = { .read = mv88e6xxx_smi_single_chip_read, .write = mv88e6xxx_smi_single_chip_write, }; @@ -177,13 +180,12 @@ static int mv88e6xxx_smi_multi_chip_write(struct mv88e6xxx_chip *chip, return 0; } -static const struct mv88e6xxx_ops mv88e6xxx_smi_multi_chip_ops = { +static const struct mv88e6xxx_bus_ops mv88e6xxx_smi_multi_chip_ops = { .read = mv88e6xxx_smi_multi_chip_read, .write = mv88e6xxx_smi_multi_chip_write, }; -static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 *val) +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val) { int err; @@ -199,8 +201,7 @@ static int mv88e6xxx_read(struct mv88e6xxx_chip *chip, return 0; } -static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, - int addr, int reg, u16 val) +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val) { int err; @@ -216,25 +217,144 @@ static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, return 0; } -/* Indirect write to single pointer-data register with an Update bit */ -static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, - u16 update) +static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, int reg, + u16 *val) { - u16 val; - int i, err; + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +static int mv88e6xxx_port_write(struct mv88e6xxx_chip *chip, int port, int reg, + u16 val) +{ + int addr = chip->info->port_base_addr + port; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +static int mv88e6xxx_phy_read(struct mv88e6xxx_chip *chip, int phy, + int reg, u16 *val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + + if (!chip->info->ops->phy_read) + return -EOPNOTSUPP; + + return chip->info->ops->phy_read(chip, addr, reg, val); +} + +static int mv88e6xxx_phy_write(struct mv88e6xxx_chip *chip, int phy, + int reg, u16 val) +{ + int addr = phy; /* PHY devices addresses start at 0x0 */ + + if (!chip->info->ops->phy_write) + return -EOPNOTSUPP; + + return chip->info->ops->phy_write(chip, addr, reg, val); +} + +static int mv88e6xxx_phy_page_get(struct mv88e6xxx_chip *chip, int phy, u8 page) +{ + if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PHY_PAGE)) + return -EOPNOTSUPP; + + return mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); +} + +static void mv88e6xxx_phy_page_put(struct mv88e6xxx_chip *chip, int phy) +{ + int err; + + /* Restore PHY page Copper 0x0 for access via the registered MDIO bus */ + err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, PHY_PAGE_COPPER); + if (unlikely(err)) { + dev_err(chip->dev, "failed to restore PHY %d page Copper (%d)\n", + phy, err); + } +} + +static int mv88e6xxx_phy_page_read(struct mv88e6xxx_chip *chip, int phy, + u8 page, int reg, u16 *val) +{ + int err; + + /* There is no paging for registers 22 */ + if (reg == PHY_PAGE) + return -EINVAL; + + err = mv88e6xxx_phy_page_get(chip, phy, page); + if (!err) { + err = mv88e6xxx_phy_read(chip, phy, reg, val); + mv88e6xxx_phy_page_put(chip, phy); + } + + return err; +} + +static int mv88e6xxx_phy_page_write(struct mv88e6xxx_chip *chip, int phy, + u8 page, int reg, u16 val) +{ + int err; + + /* There is no paging for registers 22 */ + if (reg == PHY_PAGE) + return -EINVAL; + + err = mv88e6xxx_phy_page_get(chip, phy, page); + if (!err) { + err = mv88e6xxx_phy_write(chip, phy, PHY_PAGE, page); + mv88e6xxx_phy_page_put(chip, phy); + } + + return err; +} + +static int mv88e6xxx_serdes_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return mv88e6xxx_phy_page_read(chip, ADDR_SERDES, SERDES_PAGE_FIBER, + reg, val); +} + +static int mv88e6xxx_serdes_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_phy_page_write(chip, ADDR_SERDES, SERDES_PAGE_FIBER, + reg, val); +} + +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask) +{ + int i; + + for (i = 0; i < 16; i++) { + u16 val; + int err; - /* Wait until the previous operation is completed */ - for (i = 0; i < 16; ++i) { err = mv88e6xxx_read(chip, addr, reg, &val); if (err) return err; - if (!(val & BIT(15))) - break; + if (!(val & mask)) + return 0; + + usleep_range(1000, 2000); } - if (i == 16) - return -ETIMEDOUT; + dev_err(chip->dev, "Timeout while waiting for switch\n"); + return -ETIMEDOUT; +} + +/* Indirect write to single pointer-data register with an Update bit */ +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, u16 update) +{ + u16 val; + int err; + + /* Wait until the previous operation is completed */ + err = mv88e6xxx_wait(chip, addr, reg, BIT(15)); + if (err) + return err; /* Set the Update bit to trigger a write operation */ val = BIT(15) | update; @@ -242,63 +362,27 @@ static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, return mv88e6xxx_write(chip, addr, reg, val); } -static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg) +static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) { u16 val; - int err; + int i, err; - err = mv88e6xxx_read(chip, addr, reg, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); if (err) return err; - return val; -} + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val & ~GLOBAL_CONTROL_PPU_ENABLE); + if (err) + return err; -static int _mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, - int reg, u16 val) -{ - return mv88e6xxx_write(chip, addr, reg, val); -} - -static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip, - int addr, int regnum) -{ - if (addr >= 0) - return _mv88e6xxx_reg_read(chip, addr, regnum); - return 0xffff; -} - -static int mv88e6xxx_mdio_write_direct(struct mv88e6xxx_chip *chip, - int addr, int regnum, u16 val) -{ - if (addr >= 0) - return _mv88e6xxx_reg_write(chip, addr, regnum, val); - return 0; -} - -static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) -{ - int ret; - unsigned long timeout; - - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret & ~GLOBAL_CONTROL_PPU_ENABLE); - if (ret) - return ret; - - timeout = jiffies + 1 * HZ; - while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) != - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) != GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -307,27 +391,25 @@ static int mv88e6xxx_ppu_disable(struct mv88e6xxx_chip *chip) static int mv88e6xxx_ppu_enable(struct mv88e6xxx_chip *chip) { - int ret, err; - unsigned long timeout; + u16 val; + int i, err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_CONTROL); - if (ret < 0) - return ret; - - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, - ret | GLOBAL_CONTROL_PPU_ENABLE); + err = mv88e6xxx_g1_read(chip, GLOBAL_CONTROL, &val); if (err) return err; - timeout = jiffies + 1 * HZ; - while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATUS); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, + val | GLOBAL_CONTROL_PPU_ENABLE); + if (err) + return err; + + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g1_read(chip, GLOBAL_STATUS, &val); + if (err) + return err; usleep_range(1000, 2000); - if ((ret & GLOBAL_STATUS_PPU_MASK) == - GLOBAL_STATUS_PPU_POLLING) + if ((val & GLOBAL_STATUS_PPU_MASK) == GLOBAL_STATUS_PPU_POLLING) return 0; } @@ -400,32 +482,37 @@ static void mv88e6xxx_ppu_state_init(struct mv88e6xxx_chip *chip) chip->ppu_timer.function = mv88e6xxx_ppu_reenable_timer; } -static int mv88e6xxx_mdio_read_ppu(struct mv88e6xxx_chip *chip, int addr, - int regnum) +static void mv88e6xxx_ppu_state_destroy(struct mv88e6xxx_chip *chip) { - int ret; - - ret = mv88e6xxx_ppu_access_get(chip); - if (ret >= 0) { - ret = _mv88e6xxx_reg_read(chip, addr, regnum); - mv88e6xxx_ppu_access_put(chip); - } - - return ret; + del_timer_sync(&chip->ppu_timer); } -static int mv88e6xxx_mdio_write_ppu(struct mv88e6xxx_chip *chip, int addr, - int regnum, u16 val) +static int mv88e6xxx_phy_ppu_read(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 *val) { - int ret; + int err; - ret = mv88e6xxx_ppu_access_get(chip); - if (ret >= 0) { - ret = _mv88e6xxx_reg_write(chip, addr, regnum, val); + err = mv88e6xxx_ppu_access_get(chip); + if (!err) { + err = mv88e6xxx_read(chip, addr, reg, val); mv88e6xxx_ppu_access_put(chip); } - return ret; + return err; +} + +static int mv88e6xxx_phy_ppu_write(struct mv88e6xxx_chip *chip, int addr, + int reg, u16 val) +{ + int err; + + err = mv88e6xxx_ppu_access_get(chip); + if (!err) { + err = mv88e6xxx_write(chip, addr, reg, val); + mv88e6xxx_ppu_access_put(chip); + } + + return err; } static bool mv88e6xxx_6065_family(struct mv88e6xxx_chip *chip) @@ -468,21 +555,6 @@ static bool mv88e6xxx_6352_family(struct mv88e6xxx_chip *chip) return chip->info->family == MV88E6XXX_FAMILY_6352; } -static unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) -{ - return chip->info->num_databases; -} - -static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) -{ - /* Does the device have dedicated FID registers for ATU and VTU ops? */ - if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || - mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) - return true; - - return false; -} - /* We expect the switch to perform auto negotiation if there is a real * phy. However, in the case of a fixed link phy, we force the port * settings from the fixed link settings. @@ -490,24 +562,24 @@ static bool mv88e6xxx_has_fid_reg(struct mv88e6xxx_chip *chip) static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, struct phy_device *phydev) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - u32 reg; - int ret; + struct mv88e6xxx_chip *chip = ds->priv; + u16 reg; + int err; if (!phy_is_pseudo_fixed_link(phydev)) return; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); + if (err) goto out; - reg = ret & ~(PORT_PCS_CTRL_LINK_UP | - PORT_PCS_CTRL_FORCE_LINK | - PORT_PCS_CTRL_DUPLEX_FULL | - PORT_PCS_CTRL_FORCE_DUPLEX | - PORT_PCS_CTRL_UNFORCED); + reg &= ~(PORT_PCS_CTRL_LINK_UP | + PORT_PCS_CTRL_FORCE_LINK | + PORT_PCS_CTRL_DUPLEX_FULL | + PORT_PCS_CTRL_FORCE_DUPLEX | + PORT_PCS_CTRL_UNFORCED); reg |= PORT_PCS_CTRL_FORCE_LINK; if (phydev->link) @@ -536,7 +608,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= PORT_PCS_CTRL_DUPLEX_FULL; if ((mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip)) && - (port >= chip->info->num_ports - 2)) { + (port >= mv88e6xxx_num_ports(chip) - 2)) { if (phydev->interface == PHY_INTERFACE_MODE_RGMII_RXID) reg |= PORT_PCS_CTRL_RGMII_DELAY_RXCLK; if (phydev->interface == PHY_INTERFACE_MODE_RGMII_TXID) @@ -545,7 +617,7 @@ static void mv88e6xxx_adjust_link(struct dsa_switch *ds, int port, reg |= (PORT_PCS_CTRL_RGMII_DELAY_RXCLK | PORT_PCS_CTRL_RGMII_DELAY_TXCLK); } - _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_PCS_CTRL, reg); + mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); out: mutex_unlock(&chip->reg_lock); @@ -553,12 +625,12 @@ out: static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) { - int ret; - int i; + u16 val; + int i, err; for (i = 0; i < 10; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_OP); - if ((ret & GLOBAL_STATS_OP_BUSY) == 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_OP, &val); + if ((val & GLOBAL_STATS_OP_BUSY) == 0) return 0; } @@ -567,55 +639,52 @@ static int _mv88e6xxx_stats_wait(struct mv88e6xxx_chip *chip) static int _mv88e6xxx_stats_snapshot(struct mv88e6xxx_chip *chip, int port) { - int ret; + int err; if (mv88e6xxx_6320_family(chip) || mv88e6xxx_6352_family(chip)) port = (port + 1) << 5; /* Snapshot the hardware statistics counters for this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_CAPTURE_PORT | - GLOBAL_STATS_OP_HIST_RX_TX | port); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_CAPTURE_PORT | + GLOBAL_STATS_OP_HIST_RX_TX | port); + if (err) + return err; /* Wait for the snapshotting to complete. */ - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) - return ret; - - return 0; + return _mv88e6xxx_stats_wait(chip); } static void _mv88e6xxx_stats_read(struct mv88e6xxx_chip *chip, int stat, u32 *val) { - u32 _val; - int ret; + u32 value; + u16 reg; + int err; *val = 0; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_READ_CAPTURED | - GLOBAL_STATS_OP_HIST_RX_TX | stat); - if (ret < 0) + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_READ_CAPTURED | + GLOBAL_STATS_OP_HIST_RX_TX | stat); + if (err) return; - ret = _mv88e6xxx_stats_wait(chip); - if (ret < 0) + err = _mv88e6xxx_stats_wait(chip); + if (err) return; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_32); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_32, ®); + if (err) return; - _val = ret << 16; + value = reg << 16; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_STATS_COUNTER_01); - if (ret < 0) + err = mv88e6xxx_g1_read(chip, GLOBAL_STATS_COUNTER_01, ®); + if (err) return; - *val = _val | ret; + *val = value | reg; } static struct mv88e6xxx_hw_stat mv88e6xxx_hw_stats[] = { @@ -705,22 +774,22 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, { u32 low; u32 high = 0; - int ret; + int err; + u16 reg; u64 value; switch (s->type) { case PORT: - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), s->reg); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg, ®); + if (err) return UINT64_MAX; - low = ret; + low = reg; if (s->sizeof_stat == 4) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), - s->reg + 1); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, s->reg + 1, ®); + if (err) return UINT64_MAX; - high = ret; + high = reg; } break; case BANK0: @@ -736,7 +805,7 @@ static uint64_t _mv88e6xxx_get_ethtool_stat(struct mv88e6xxx_chip *chip, static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, uint8_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -752,7 +821,7 @@ static void mv88e6xxx_get_strings(struct dsa_switch *ds, int port, static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int i, j; @@ -767,7 +836,7 @@ static int mv88e6xxx_get_sset_count(struct dsa_switch *ds) static void mv88e6xxx_get_ethtool_stats(struct dsa_switch *ds, int port, uint64_t *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct mv88e6xxx_hw_stat *stat; int ret; int i, j; @@ -798,7 +867,9 @@ static int mv88e6xxx_get_regs_len(struct dsa_switch *ds, int port) static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, struct ethtool_regs *regs, void *_p) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; + int err; + u16 reg; u16 *p = _p; int i; @@ -809,170 +880,106 @@ static void mv88e6xxx_get_regs(struct dsa_switch *ds, int port, mutex_lock(&chip->reg_lock); for (i = 0; i < 32; i++) { - int ret; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), i); - if (ret >= 0) - p[i] = ret; + err = mv88e6xxx_port_read(chip, port, i, ®); + if (!err) + p[i] = reg; } mutex_unlock(&chip->reg_lock); } -static int _mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int reg, int offset, - u16 mask) -{ - unsigned long timeout = jiffies + HZ / 10; - - while (time_before(jiffies, timeout)) { - int ret; - - ret = _mv88e6xxx_reg_read(chip, reg, offset); - if (ret < 0) - return ret; - if (!(ret & mask)) - return 0; - - usleep_range(1000, 2000); - } - return -ETIMEDOUT; -} - -static int mv88e6xxx_mdio_wait(struct mv88e6xxx_chip *chip) -{ - return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_BUSY); -} - static int _mv88e6xxx_atu_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_ATU_OP, - GLOBAL_ATU_OP_BUSY); -} - -static int mv88e6xxx_mdio_read_indirect(struct mv88e6xxx_chip *chip, - int addr, int regnum) -{ - int ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_READ | (addr << 5) | - regnum); - if (ret < 0) - return ret; - - ret = mv88e6xxx_mdio_wait(chip); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA); - - return ret; -} - -static int mv88e6xxx_mdio_write_indirect(struct mv88e6xxx_chip *chip, - int addr, int regnum, u16 val) -{ - int ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_DATA, val); - if (ret < 0) - return ret; - - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SMI_OP, - GLOBAL2_SMI_OP_22_WRITE | (addr << 5) | - regnum); - - return mv88e6xxx_mdio_wait(chip); + return mv88e6xxx_g1_wait(chip, GLOBAL_ATU_OP, GLOBAL_ATU_OP_BUSY); } static int mv88e6xxx_get_eee(struct dsa_switch *ds, int port, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int reg; + struct mv88e6xxx_chip *chip = ds->priv; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - reg = mv88e6xxx_mdio_read_indirect(chip, port, 16); - if (reg < 0) + err = mv88e6xxx_phy_read(chip, port, 16, ®); + if (err) goto out; e->eee_enabled = !!(reg & 0x0200); e->tx_lpi_enabled = !!(reg & 0x0100); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (reg < 0) + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) goto out; e->eee_active = !!(reg & PORT_STATUS_EEE); - reg = 0; - out: mutex_unlock(&chip->reg_lock); - return reg; + + return err; } static int mv88e6xxx_set_eee(struct dsa_switch *ds, int port, struct phy_device *phydev, struct ethtool_eee *e) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int reg; - int ret; + struct mv88e6xxx_chip *chip = ds->priv; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_EEE)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_mdio_read_indirect(chip, port, 16); - if (ret < 0) + err = mv88e6xxx_phy_read(chip, port, 16, ®); + if (err) goto out; - reg = ret & ~0x0300; + reg &= ~0x0300; if (e->eee_enabled) reg |= 0x0200; if (e->tx_lpi_enabled) reg |= 0x0100; - ret = mv88e6xxx_mdio_write_indirect(chip, port, 16, reg); + err = mv88e6xxx_phy_write(chip, port, 16, reg); out: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int _mv88e6xxx_atu_cmd(struct mv88e6xxx_chip *chip, u16 fid, u16 cmd) { - int ret; + u16 val; + int err; - if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_FID, - fid); - if (ret < 0) - return ret; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_ATU_FID)) { + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_FID, fid); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* ATU DBNum[7:4] are located in ATU Control 15:12 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - (ret & 0xfff) | - ((fid << 8) & 0xf000)); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + (val & 0xfff) | ((fid << 8) & 0xf000)); + if (err) + return err; /* ATU DBNum[3:0] are located in ATU Operation 3:0 */ cmd |= fid & 0xf; } - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_OP, cmd); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_OP, cmd); + if (err) + return err; return _mv88e6xxx_atu_wait(chip); } @@ -997,7 +1004,7 @@ static int _mv88e6xxx_atu_data_write(struct mv88e6xxx_chip *chip, data |= (entry->portv_trunkid << shift) & mask; } - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_DATA, data); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_DATA, data); } static int _mv88e6xxx_atu_flush_move(struct mv88e6xxx_chip *chip, @@ -1073,57 +1080,45 @@ static int _mv88e6xxx_port_state(struct mv88e6xxx_chip *chip, int port, u8 state) { struct dsa_switch *ds = chip->ds; - int reg, ret = 0; + u16 reg; + int err; u8 oldstate; - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL, ®); + if (err) + return err; oldstate = reg & PORT_CONTROL_STATE_MASK; - if (oldstate != state) { - /* Flush forwarding database if we're moving a port - * from Learning or Forwarding state to Disabled or - * Blocking or Listening state. - */ - if ((oldstate == PORT_CONTROL_STATE_LEARNING || - oldstate == PORT_CONTROL_STATE_FORWARDING) && - (state == PORT_CONTROL_STATE_DISABLED || - state == PORT_CONTROL_STATE_BLOCKING)) { - ret = _mv88e6xxx_atu_remove(chip, 0, port, false); - if (ret) - return ret; - } + reg &= ~PORT_CONTROL_STATE_MASK; + reg |= state; - reg = (reg & ~PORT_CONTROL_STATE_MASK) | state; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; - netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", - mv88e6xxx_port_state_names[state], - mv88e6xxx_port_state_names[oldstate]); - } + netdev_dbg(ds->ports[port].netdev, "PortState %s (was %s)\n", + mv88e6xxx_port_state_names[state], + mv88e6xxx_port_state_names[oldstate]); - return ret; + return 0; } static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) { struct net_device *bridge = chip->ports[port].bridge_dev; - const u16 mask = (1 << chip->info->num_ports) - 1; + const u16 mask = (1 << mv88e6xxx_num_ports(chip)) - 1; struct dsa_switch *ds = chip->ds; u16 output_ports = 0; - int reg; + u16 reg; + int err; int i; /* allow CPU port or DSA link(s) to send frames to every port */ if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { output_ports = mask; } else { - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { /* allow sending frames to every group member */ if (bridge && chip->ports[i].bridge_dev == bridge) output_ports |= BIT(i); @@ -1137,20 +1132,20 @@ static int _mv88e6xxx_port_based_vlan_map(struct mv88e6xxx_chip *chip, int port) /* prevent frames from going back out of the port they came in on */ output_ports &= ~BIT(port); - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (reg < 0) - return reg; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; reg &= ~mask; reg |= output_ports & mask; - return _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, reg); + return mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); } static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int stp_state; int err; @@ -1181,27 +1176,39 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, mv88e6xxx_port_state_names[stp_state]); } +static void mv88e6xxx_port_fast_age(struct dsa_switch *ds, int port) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = _mv88e6xxx_atu_remove(chip, 0, port, false); + mutex_unlock(&chip->reg_lock); + + if (err) + netdev_err(ds->ports[port].netdev, "failed to flush ATU\n"); +} + static int _mv88e6xxx_port_pvid(struct mv88e6xxx_chip *chip, int port, u16 *new, u16 *old) { struct dsa_switch *ds = chip->ds; - u16 pvid; - int ret; + u16 pvid, reg; + int err; - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_DEFAULT_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_DEFAULT_VLAN, ®); + if (err) + return err; - pvid = ret & PORT_DEFAULT_VLAN_MASK; + pvid = reg & PORT_DEFAULT_VLAN_MASK; if (new) { - ret &= ~PORT_DEFAULT_VLAN_MASK; - ret |= *new & PORT_DEFAULT_VLAN_MASK; + reg &= ~PORT_DEFAULT_VLAN_MASK; + reg |= *new & PORT_DEFAULT_VLAN_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_DEFAULT_VLAN, ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "DefaultVID %d (was %d)\n", *new, pvid); @@ -1227,17 +1234,16 @@ static int _mv88e6xxx_port_pvid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_vtu_wait(struct mv88e6xxx_chip *chip) { - return _mv88e6xxx_wait(chip, REG_GLOBAL, GLOBAL_VTU_OP, - GLOBAL_VTU_OP_BUSY); + return mv88e6xxx_g1_wait(chip, GLOBAL_VTU_OP, GLOBAL_VTU_OP_BUSY); } static int _mv88e6xxx_vtu_cmd(struct mv88e6xxx_chip *chip, u16 op) { - int ret; + int err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_OP, op); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_OP, op); + if (err) + return err; return _mv88e6xxx_vtu_wait(chip); } @@ -1254,23 +1260,21 @@ static int _mv88e6xxx_vtu_stu_flush(struct mv88e6xxx_chip *chip) } static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3]; - int i; - int ret; + int i, err; for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i); - if (ret < 0) - return ret; + u16 *reg = ®s[i]; - regs[i] = ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u16 reg = regs[i / 4]; @@ -1281,26 +1285,25 @@ static int _mv88e6xxx_vtu_stu_data_read(struct mv88e6xxx_chip *chip, } static int mv88e6xxx_vtu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 0); } static int mv88e6xxx_stu_data_read(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_read(chip, entry, 2); } static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry, + struct mv88e6xxx_vtu_entry *entry, unsigned int nibble_offset) { u16 regs[3] = { 0 }; - int i; - int ret; + int i, err; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { unsigned int shift = (i % 4) * 4 + nibble_offset; u8 data = entry->data[i]; @@ -1308,86 +1311,85 @@ static int _mv88e6xxx_vtu_stu_data_write(struct mv88e6xxx_chip *chip, } for (i = 0; i < 3; ++i) { - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, - GLOBAL_VTU_DATA_0_3 + i, regs[i]); - if (ret < 0) - return ret; + u16 reg = regs[i]; + + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_DATA_0_3 + i, reg); + if (err) + return err; } return 0; } static int mv88e6xxx_vtu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 0); } static int mv88e6xxx_stu_data_write(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { return _mv88e6xxx_vtu_stu_data_write(chip, entry, 2); } static int _mv88e6xxx_vtu_vid_write(struct mv88e6xxx_chip *chip, u16 vid) { - return _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, - vid & GLOBAL_VTU_VID_MASK); + return mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, + vid & GLOBAL_VTU_VID_MASK); } static int _mv88e6xxx_vtu_getnext(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + struct mv88e6xxx_vtu_entry next = { 0 }; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_VTU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.vid = ret & GLOBAL_VTU_VID_MASK; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.vid = val & GLOBAL_VTU_VID_MASK; + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_vtu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_read(chip, &next); + if (err) + return err; - if (mv88e6xxx_has_fid_reg(chip)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_FID); - if (ret < 0) - return ret; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_FID, &val); + if (err) + return err; - next.fid = ret & GLOBAL_VTU_FID_MASK; + next.fid = val & GLOBAL_VTU_FID_MASK; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 */ - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_OP); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_OP, &val); + if (err) + return err; - next.fid = (ret & 0xf00) >> 4; - next.fid |= ret & 0xf; + next.fid = (val & 0xf00) >> 4; + next.fid |= val & 0xf; } if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; } } @@ -1399,8 +1401,8 @@ static int mv88e6xxx_port_vlan_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_vlan *vlan, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - struct mv88e6xxx_vtu_stu_entry next; + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_vtu_entry next; u16 pvid; int err; @@ -1451,38 +1453,36 @@ unlock: } static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 op = GLOBAL_VTU_OP_VTU_LOAD_PURGE; u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port member tags */ - ret = mv88e6xxx_vtu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_vtu_data_write(chip, entry); + if (err) + return err; if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_STU)) { reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; } - if (mv88e6xxx_has_fid_reg(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G1_VTU_FID)) { reg = entry->fid & GLOBAL_VTU_FID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_FID, - reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_FID, reg); + if (err) + return err; } else if (mv88e6xxx_num_databases(chip) == 256) { /* VTU DBNum[7:4] are located in VTU Operation 11:8, and * VTU DBNum[3:0] are located in VTU Operation 3:0 @@ -1494,48 +1494,49 @@ static int _mv88e6xxx_vtu_loadpurge(struct mv88e6xxx_chip *chip, reg = GLOBAL_VTU_VID_VALID; loadpurge: reg |= entry->vid & GLOBAL_VTU_VID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, op); } static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { - struct mv88e6xxx_vtu_stu_entry next = { 0 }; - int ret; + struct mv88e6xxx_vtu_entry next = { 0 }; + u16 val; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, - sid & GLOBAL_VTU_SID_MASK); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, + sid & GLOBAL_VTU_SID_MASK); + if (err) + return err; - ret = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_GET_NEXT); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_SID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_SID, &val); + if (err) + return err; - next.sid = ret & GLOBAL_VTU_SID_MASK; + next.sid = val & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_VTU_VID); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_VTU_VID, &val); + if (err) + return err; - next.valid = !!(ret & GLOBAL_VTU_VID_VALID); + next.valid = !!(val & GLOBAL_VTU_VID_VALID); if (next.valid) { - ret = mv88e6xxx_stu_data_read(chip, &next); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_read(chip, &next); + if (err) + return err; } *entry = next; @@ -1543,33 +1544,33 @@ static int _mv88e6xxx_stu_getnext(struct mv88e6xxx_chip *chip, u8 sid, } static int _mv88e6xxx_stu_loadpurge(struct mv88e6xxx_chip *chip, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { u16 reg = 0; - int ret; + int err; - ret = _mv88e6xxx_vtu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_vtu_wait(chip); + if (err) + return err; if (!entry->valid) goto loadpurge; /* Write port states */ - ret = mv88e6xxx_stu_data_write(chip, entry); - if (ret < 0) - return ret; + err = mv88e6xxx_stu_data_write(chip, entry); + if (err) + return err; reg = GLOBAL_VTU_VID_VALID; loadpurge: - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_VID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_VID, reg); + if (err) + return err; reg = entry->sid & GLOBAL_VTU_SID_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_VTU_SID, reg); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_VTU_SID, reg); + if (err) + return err; return _mv88e6xxx_vtu_cmd(chip, GLOBAL_VTU_OP_STU_LOAD_PURGE); } @@ -1580,7 +1581,8 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, struct dsa_switch *ds = chip->ds; u16 upper_mask; u16 fid; - int ret; + u16 reg; + int err; if (mv88e6xxx_num_databases(chip) == 4096) upper_mask = 0xff; @@ -1590,37 +1592,35 @@ static int _mv88e6xxx_port_fid(struct mv88e6xxx_chip *chip, int port, return -EOPNOTSUPP; /* Port's default FID bits 3:0 are located in reg 0x06, offset 12 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_BASE_VLAN); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_BASE_VLAN, ®); + if (err) + return err; - fid = (ret & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; + fid = (reg & PORT_BASE_VLAN_FID_3_0_MASK) >> 12; if (new) { - ret &= ~PORT_BASE_VLAN_FID_3_0_MASK; - ret |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; + reg &= ~PORT_BASE_VLAN_FID_3_0_MASK; + reg |= (*new << 12) & PORT_BASE_VLAN_FID_3_0_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_BASE_VLAN, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_BASE_VLAN, reg); + if (err) + return err; } /* Port's default FID bits 11:4 are located in reg 0x05, offset 0 */ - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_1); - if (ret < 0) - return ret; + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_1, ®); + if (err) + return err; - fid |= (ret & upper_mask) << 4; + fid |= (reg & upper_mask) << 4; if (new) { - ret &= ~upper_mask; - ret |= (*new >> 4) & upper_mask; + reg &= ~upper_mask; + reg |= (*new >> 4) & upper_mask; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - ret); - if (ret < 0) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, reg); + if (err) + return err; netdev_dbg(ds->ports[port].netdev, "FID %d (was %d)\n", *new, fid); @@ -1647,13 +1647,13 @@ static int _mv88e6xxx_port_fid_set(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) { DECLARE_BITMAP(fid_bitmap, MV88E6XXX_N_FID); - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; bitmap_zero(fid_bitmap, MV88E6XXX_N_FID); /* Set every FID bit used by the (un)bridged ports */ - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { err = _mv88e6xxx_port_fid_get(chip, i, fid); if (err) return err; @@ -1689,10 +1689,10 @@ static int _mv88e6xxx_fid_new(struct mv88e6xxx_chip *chip, u16 *fid) } static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry) + struct mv88e6xxx_vtu_entry *entry) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan = { + struct mv88e6xxx_vtu_entry vlan = { .valid = true, .vid = vid, }; @@ -1703,14 +1703,14 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, return err; /* exclude all ports except the CPU and DSA ports */ - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) vlan.data[i] = dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i) ? GLOBAL_VTU_DATA_MEMBER_TAG_UNMODIFIED : GLOBAL_VTU_DATA_MEMBER_TAG_NON_MEMBER; if (mv88e6xxx_6097_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6352_family(chip)) { - struct mv88e6xxx_vtu_stu_entry vstp; + struct mv88e6xxx_vtu_entry vstp; /* Adding a VTU entry requires a valid STU entry. As VSTP is not * implemented, only one STU entry is needed to cover all VTU @@ -1737,7 +1737,7 @@ static int _mv88e6xxx_vtu_new(struct mv88e6xxx_chip *chip, u16 vid, } static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, - struct mv88e6xxx_vtu_stu_entry *entry, bool creat) + struct mv88e6xxx_vtu_entry *entry, bool creat) { int err; @@ -1768,8 +1768,8 @@ static int _mv88e6xxx_vtu_get(struct mv88e6xxx_chip *chip, u16 vid, static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, u16 vid_begin, u16 vid_end) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_chip *chip = ds->priv; + struct mv88e6xxx_vtu_entry vlan; int i, err; if (!vid_begin) @@ -1792,7 +1792,7 @@ static int mv88e6xxx_port_check_hw_vlan(struct dsa_switch *ds, int port, if (vlan.vid > vid_end) break; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_dsa_port(ds, i) || dsa_is_cpu_port(ds, i)) continue; @@ -1829,29 +1829,29 @@ static const char * const mv88e6xxx_port_8021q_mode_names[] = { static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 old, new = vlan_filtering ? PORT_CONTROL_2_8021Q_SECURE : PORT_CONTROL_2_8021Q_DISABLED; - int ret; + u16 reg; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) return -EOPNOTSUPP; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_CONTROL_2); - if (ret < 0) + err = mv88e6xxx_port_read(chip, port, PORT_CONTROL_2, ®); + if (err) goto unlock; - old = ret & PORT_CONTROL_2_8021Q_MASK; + old = reg & PORT_CONTROL_2_8021Q_MASK; if (new != old) { - ret &= ~PORT_CONTROL_2_8021Q_MASK; - ret |= new & PORT_CONTROL_2_8021Q_MASK; + reg &= ~PORT_CONTROL_2_8021Q_MASK; + reg |= new & PORT_CONTROL_2_8021Q_MASK; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_2, - ret); - if (ret < 0) + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) goto unlock; netdev_dbg(ds->ports[port].netdev, "802.1Q Mode %s (was %s)\n", @@ -1859,11 +1859,11 @@ static int mv88e6xxx_port_vlan_filtering(struct dsa_switch *ds, int port, mv88e6xxx_port_8021q_mode_names[old]); } - ret = 0; + err = 0; unlock: mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int @@ -1871,7 +1871,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VTU)) @@ -1894,7 +1894,7 @@ mv88e6xxx_port_vlan_prepare(struct dsa_switch *ds, int port, static int _mv88e6xxx_port_vlan_add(struct mv88e6xxx_chip *chip, int port, u16 vid, bool untagged) { - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, true); @@ -1912,7 +1912,7 @@ static void mv88e6xxx_port_vlan_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED; bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID; u16 vid; @@ -1939,7 +1939,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, int port, u16 vid) { struct dsa_switch *ds = chip->ds; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_vtu_entry vlan; int i, err; err = _mv88e6xxx_vtu_get(chip, vid, &vlan, false); @@ -1954,7 +1954,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, /* keep the VLAN unless all ports are excluded */ vlan.valid = false; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (dsa_is_cpu_port(ds, i) || dsa_is_dsa_port(ds, i)) continue; @@ -1974,7 +1974,7 @@ static int _mv88e6xxx_port_vlan_del(struct mv88e6xxx_chip *chip, static int mv88e6xxx_port_vlan_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_vlan *vlan) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; u16 pvid, vid; int err = 0; @@ -2008,14 +2008,13 @@ unlock: static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, const unsigned char *addr) { - int i, ret; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_write( - chip, REG_GLOBAL, GLOBAL_ATU_MAC_01 + i, - (addr[i * 2] << 8) | addr[i * 2 + 1]); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_MAC_01 + i, + (addr[i * 2] << 8) | addr[i * 2 + 1]); + if (err) + return err; } return 0; @@ -2024,15 +2023,16 @@ static int _mv88e6xxx_atu_mac_write(struct mv88e6xxx_chip *chip, static int _mv88e6xxx_atu_mac_read(struct mv88e6xxx_chip *chip, unsigned char *addr) { - int i, ret; + u16 val; + int i, err; for (i = 0; i < 3; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, - GLOBAL_ATU_MAC_01 + i); - if (ret < 0) - return ret; - addr[i * 2] = ret >> 8; - addr[i * 2 + 1] = ret & 0xff; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_MAC_01 + i, &val); + if (err) + return err; + + addr[i * 2] = val >> 8; + addr[i * 2 + 1] = val & 0xff; } return 0; @@ -2058,12 +2058,48 @@ static int _mv88e6xxx_atu_load(struct mv88e6xxx_chip *chip, return _mv88e6xxx_atu_cmd(chip, entry->fid, GLOBAL_ATU_OP_LOAD_DB); } -static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port, - const unsigned char *addr, u16 vid, - u8 state) +static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, + struct mv88e6xxx_atu_entry *entry); + +static int mv88e6xxx_atu_get(struct mv88e6xxx_chip *chip, int fid, + const u8 *addr, struct mv88e6xxx_atu_entry *entry) { - struct mv88e6xxx_atu_entry entry = { 0 }; - struct mv88e6xxx_vtu_stu_entry vlan; + struct mv88e6xxx_atu_entry next; + int err; + + eth_broadcast_addr(next.mac); + + err = _mv88e6xxx_atu_mac_write(chip, next.mac); + if (err) + return err; + + do { + err = _mv88e6xxx_atu_getnext(chip, fid, &next); + if (err) + return err; + + if (next.state == GLOBAL_ATU_DATA_STATE_UNUSED) + break; + + if (ether_addr_equal(next.mac, addr)) { + *entry = next; + return 0; + } + } while (!is_broadcast_ether_addr(next.mac)); + + memset(entry, 0, sizeof(*entry)); + entry->fid = fid; + ether_addr_copy(entry->mac, addr); + + return 0; +} + +static int mv88e6xxx_port_db_load_purge(struct mv88e6xxx_chip *chip, int port, + const unsigned char *addr, u16 vid, + u8 state) +{ + struct mv88e6xxx_vtu_entry vlan; + struct mv88e6xxx_atu_entry entry; int err; /* Null VLAN ID corresponds to the port private database */ @@ -2074,12 +2110,18 @@ static int _mv88e6xxx_port_fdb_load(struct mv88e6xxx_chip *chip, int port, if (err) return err; - entry.fid = vlan.fid; - entry.state = state; - ether_addr_copy(entry.mac, addr); - if (state != GLOBAL_ATU_DATA_STATE_UNUSED) { - entry.trunk = false; - entry.portv_trunkid = BIT(port); + err = mv88e6xxx_atu_get(chip, vlan.fid, addr, &entry); + if (err) + return err; + + /* Purge the ATU entry only if no port is using it anymore */ + if (state == GLOBAL_ATU_DATA_STATE_UNUSED) { + entry.portv_trunkid &= ~BIT(port); + if (!entry.portv_trunkid) + entry.state = GLOBAL_ATU_DATA_STATE_UNUSED; + } else { + entry.portv_trunkid |= BIT(port); + entry.state = state; } return _mv88e6xxx_atu_load(chip, &entry); @@ -2099,61 +2141,59 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - int state = is_multicast_ether_addr(fdb->addr) ? - GLOBAL_ATU_DATA_STATE_MC_STATIC : - GLOBAL_ATU_DATA_STATE_UC_STATIC; - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; mutex_lock(&chip->reg_lock); - if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state)) - netdev_err(ds->ports[port].netdev, - "failed to load MAC address\n"); + if (mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, + GLOBAL_ATU_DATA_STATE_UC_STATIC)) + netdev_err(ds->ports[port].netdev, "failed to load unicast MAC address\n"); mutex_unlock(&chip->reg_lock); } static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; + struct mv88e6xxx_chip *chip = ds->priv; + int err; mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, - GLOBAL_ATU_DATA_STATE_UNUSED); + err = mv88e6xxx_port_db_load_purge(chip, port, fdb->addr, fdb->vid, + GLOBAL_ATU_DATA_STATE_UNUSED); mutex_unlock(&chip->reg_lock); - return ret; + return err; } static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, struct mv88e6xxx_atu_entry *entry) { struct mv88e6xxx_atu_entry next = { 0 }; - int ret; + u16 val; + int err; next.fid = fid; - ret = _mv88e6xxx_atu_wait(chip); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_wait(chip); + if (err) + return err; - ret = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_cmd(chip, fid, GLOBAL_ATU_OP_GET_NEXT_DB); + if (err) + return err; - ret = _mv88e6xxx_atu_mac_read(chip, next.mac); - if (ret < 0) - return ret; + err = _mv88e6xxx_atu_mac_read(chip, next.mac); + if (err) + return err; - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, GLOBAL_ATU_DATA); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_DATA, &val); + if (err) + return err; - next.state = ret & GLOBAL_ATU_DATA_STATE_MASK; + next.state = val & GLOBAL_ATU_DATA_STATE_MASK; if (next.state != GLOBAL_ATU_DATA_STATE_UNUSED) { unsigned int mask, shift; - if (ret & GLOBAL_ATU_DATA_TRUNK) { + if (val & GLOBAL_ATU_DATA_TRUNK) { next.trunk = true; mask = GLOBAL_ATU_DATA_TRUNK_ID_MASK; shift = GLOBAL_ATU_DATA_TRUNK_ID_SHIFT; @@ -2163,17 +2203,17 @@ static int _mv88e6xxx_atu_getnext(struct mv88e6xxx_chip *chip, u16 fid, shift = GLOBAL_ATU_DATA_PORT_VECTOR_SHIFT; } - next.portv_trunkid = (ret & mask) >> shift; + next.portv_trunkid = (val & mask) >> shift; } *entry = next; return 0; } -static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip, - u16 fid, u16 vid, int port, - struct switchdev_obj_port_fdb *fdb, - int (*cb)(struct switchdev_obj *obj)) +static int mv88e6xxx_port_db_dump_fid(struct mv88e6xxx_chip *chip, + u16 fid, u16 vid, int port, + struct switchdev_obj *obj, + int (*cb)(struct switchdev_obj *obj)) { struct mv88e6xxx_atu_entry addr = { .mac = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }, @@ -2187,72 +2227,98 @@ static int _mv88e6xxx_port_fdb_dump_one(struct mv88e6xxx_chip *chip, do { err = _mv88e6xxx_atu_getnext(chip, fid, &addr); if (err) - break; + return err; if (addr.state == GLOBAL_ATU_DATA_STATE_UNUSED) break; - if (!addr.trunk && addr.portv_trunkid & BIT(port)) { - bool is_static = addr.state == - (is_multicast_ether_addr(addr.mac) ? - GLOBAL_ATU_DATA_STATE_MC_STATIC : - GLOBAL_ATU_DATA_STATE_UC_STATIC); + if (addr.trunk || (addr.portv_trunkid & BIT(port)) == 0) + continue; + if (obj->id == SWITCHDEV_OBJ_ID_PORT_FDB) { + struct switchdev_obj_port_fdb *fdb; + + if (!is_unicast_ether_addr(addr.mac)) + continue; + + fdb = SWITCHDEV_OBJ_PORT_FDB(obj); fdb->vid = vid; ether_addr_copy(fdb->addr, addr.mac); - fdb->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + if (addr.state == GLOBAL_ATU_DATA_STATE_UC_STATIC) + fdb->ndm_state = NUD_NOARP; + else + fdb->ndm_state = NUD_REACHABLE; + } else if (obj->id == SWITCHDEV_OBJ_ID_PORT_MDB) { + struct switchdev_obj_port_mdb *mdb; - err = cb(&fdb->obj); - if (err) - break; + if (!is_multicast_ether_addr(addr.mac)) + continue; + + mdb = SWITCHDEV_OBJ_PORT_MDB(obj); + mdb->vid = vid; + ether_addr_copy(mdb->addr, addr.mac); + } else { + return -EOPNOTSUPP; } + + err = cb(obj); + if (err) + return err; } while (!is_broadcast_ether_addr(addr.mac)); return err; } +static int mv88e6xxx_port_db_dump(struct mv88e6xxx_chip *chip, int port, + struct switchdev_obj *obj, + int (*cb)(struct switchdev_obj *obj)) +{ + struct mv88e6xxx_vtu_entry vlan = { + .vid = GLOBAL_VTU_VID_MASK, /* all ones */ + }; + u16 fid; + int err; + + /* Dump port's default Filtering Information Database (VLAN ID 0) */ + err = _mv88e6xxx_port_fid_get(chip, port, &fid); + if (err) + return err; + + err = mv88e6xxx_port_db_dump_fid(chip, fid, 0, port, obj, cb); + if (err) + return err; + + /* Dump VLANs' Filtering Information Databases */ + err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid); + if (err) + return err; + + do { + err = _mv88e6xxx_vtu_getnext(chip, &vlan); + if (err) + return err; + + if (!vlan.valid) + break; + + err = mv88e6xxx_port_db_dump_fid(chip, vlan.fid, vlan.vid, port, + obj, cb); + if (err) + return err; + } while (vlan.vid < GLOBAL_VTU_VID_MASK); + + return err; +} + static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - struct mv88e6xxx_vtu_stu_entry vlan = { - .vid = GLOBAL_VTU_VID_MASK, /* all ones */ - }; - u16 fid; + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); - - /* Dump port's default Filtering Information Database (VLAN ID 0) */ - err = _mv88e6xxx_port_fid_get(chip, port, &fid); - if (err) - goto unlock; - - err = _mv88e6xxx_port_fdb_dump_one(chip, fid, 0, port, fdb, cb); - if (err) - goto unlock; - - /* Dump VLANs' Filtering Information Databases */ - err = _mv88e6xxx_vtu_vid_write(chip, vlan.vid); - if (err) - goto unlock; - - do { - err = _mv88e6xxx_vtu_getnext(chip, &vlan); - if (err) - break; - - if (!vlan.valid) - break; - - err = _mv88e6xxx_port_fdb_dump_one(chip, vlan.fid, vlan.vid, - port, fdb, cb); - if (err) - break; - } while (vlan.vid < GLOBAL_VTU_VID_MASK); - -unlock: + err = mv88e6xxx_port_db_dump(chip, port, &fdb->obj, cb); mutex_unlock(&chip->reg_lock); return err; @@ -2261,7 +2327,7 @@ unlock: static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct net_device *bridge) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int i, err = 0; mutex_lock(&chip->reg_lock); @@ -2269,7 +2335,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, /* Assign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = bridge; - for (i = 0; i < chip->info->num_ports; ++i) { + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) { if (chip->ports[i].bridge_dev == bridge) { err = _mv88e6xxx_port_based_vlan_map(chip, i); if (err) @@ -2284,7 +2350,7 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; struct net_device *bridge = chip->ports[port].bridge_dev; int i; @@ -2293,7 +2359,7 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) /* Unassign the bridge and remap each port's VLANTable */ chip->ports[port].bridge_dev = NULL; - for (i = 0; i < chip->info->num_ports; ++i) + for (i = 0; i < mv88e6xxx_num_ports(chip); ++i) if (i == port || chip->ports[i].bridge_dev == bridge) if (_mv88e6xxx_port_based_vlan_map(chip, i)) netdev_warn(ds->ports[i].netdev, @@ -2302,57 +2368,26 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) mutex_unlock(&chip->reg_lock); } -static int _mv88e6xxx_mdio_page_write(struct mv88e6xxx_chip *chip, - int port, int page, int reg, int val) -{ - int ret; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page); - if (ret < 0) - goto restore_page_0; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, reg, val); -restore_page_0: - mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0); - - return ret; -} - -static int _mv88e6xxx_mdio_page_read(struct mv88e6xxx_chip *chip, - int port, int page, int reg) -{ - int ret; - - ret = mv88e6xxx_mdio_write_indirect(chip, port, 0x16, page); - if (ret < 0) - goto restore_page_0; - - ret = mv88e6xxx_mdio_read_indirect(chip, port, reg); -restore_page_0: - mv88e6xxx_mdio_write_indirect(chip, port, 0x16, 0x0); - - return ret; -} - static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) { bool ppu_active = mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE); u16 is_reset = (ppu_active ? 0x8800 : 0xc800); struct gpio_desc *gpiod = chip->reset; unsigned long timeout; - int ret; + u16 reg; + int err; int i; /* Set all ports to the disabled state. */ - for (i = 0; i < chip->info->num_ports; i++) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(i), PORT_CONTROL); - if (ret < 0) - return ret; + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { + err = mv88e6xxx_port_read(chip, i, PORT_CONTROL, ®); + if (err) + return err; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(i), PORT_CONTROL, - ret & 0xfffc); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, i, PORT_CONTROL, + reg & 0xfffc); + if (err) + return err; } /* Wait for transmit queues to drain. */ @@ -2371,65 +2406,53 @@ static int mv88e6xxx_switch_reset(struct mv88e6xxx_chip *chip) * through global registers 0x18 and 0x19. */ if (ppu_active) - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc000); + err = mv88e6xxx_g1_write(chip, 0x04, 0xc000); else - ret = _mv88e6xxx_reg_write(chip, REG_GLOBAL, 0x04, 0xc400); - if (ret) - return ret; + err = mv88e6xxx_g1_write(chip, 0x04, 0xc400); + if (err) + return err; /* Wait up to one second for reset to complete. */ timeout = jiffies + 1 * HZ; while (time_before(jiffies, timeout)) { - ret = _mv88e6xxx_reg_read(chip, REG_GLOBAL, 0x00); - if (ret < 0) - return ret; + err = mv88e6xxx_g1_read(chip, 0x00, ®); + if (err) + return err; - if ((ret & is_reset) == is_reset) + if ((reg & is_reset) == is_reset) break; usleep_range(1000, 2000); } if (time_after(jiffies, timeout)) - ret = -ETIMEDOUT; + err = -ETIMEDOUT; else - ret = 0; + err = 0; - return ret; + return err; } -static int mv88e6xxx_power_on_serdes(struct mv88e6xxx_chip *chip) +static int mv88e6xxx_serdes_power_on(struct mv88e6xxx_chip *chip) { - int ret; + u16 val; + int err; - ret = _mv88e6xxx_mdio_page_read(chip, REG_FIBER_SERDES, - PAGE_FIBER_SERDES, MII_BMCR); - if (ret < 0) - return ret; + /* Clear Power Down bit */ + err = mv88e6xxx_serdes_read(chip, MII_BMCR, &val); + if (err) + return err; - if (ret & BMCR_PDOWN) { - ret &= ~BMCR_PDOWN; - ret = _mv88e6xxx_mdio_page_write(chip, REG_FIBER_SERDES, - PAGE_FIBER_SERDES, MII_BMCR, - ret); + if (val & BMCR_PDOWN) { + val &= ~BMCR_PDOWN; + err = mv88e6xxx_serdes_write(chip, MII_BMCR, val); } - return ret; -} - -static int mv88e6xxx_port_read(struct mv88e6xxx_chip *chip, int port, - int reg, u16 *val) -{ - int addr = chip->info->port_base_addr + port; - - if (port >= chip->info->num_ports) - return -EINVAL; - - return mv88e6xxx_read(chip, addr, reg, val); + return err; } static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) { struct dsa_switch *ds = chip->ds; - int ret; + int err; u16 reg; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || @@ -2442,7 +2465,7 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * and all DSA ports to their maximum bandwidth and * full duplex. */ - reg = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_PCS_CTRL); + err = mv88e6xxx_port_read(chip, port, PORT_PCS_CTRL, ®); if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) { reg &= ~PORT_PCS_CTRL_UNFORCED; reg |= PORT_PCS_CTRL_FORCE_LINK | @@ -2457,10 +2480,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_PCS_CTRL_UNFORCED; } - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PCS_CTRL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PCS_CTRL, reg); + if (err) + return err; } /* Port Control: disable Drop-on-Unlock, disable Drop-on-Lock, @@ -2486,28 +2508,13 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_USE_TAG | PORT_CONTROL_USE_IP | PORT_CONTROL_STATE_FORWARDING; if (dsa_is_cpu_port(ds, port)) { - if (mv88e6xxx_6095_family(chip) || mv88e6xxx_6185_family(chip)) - reg |= PORT_CONTROL_DSA_TAG; - if (mv88e6xxx_6352_family(chip) || - mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || - mv88e6xxx_6097_family(chip) || - mv88e6xxx_6320_family(chip)) { + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) reg |= PORT_CONTROL_FRAME_ETHER_TYPE_DSA | - PORT_CONTROL_FORWARD_UNKNOWN | PORT_CONTROL_FORWARD_UNKNOWN_MC; - } - - if (mv88e6xxx_6352_family(chip) || - mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || - mv88e6xxx_6097_family(chip) || - mv88e6xxx_6095_family(chip) || - mv88e6xxx_6065_family(chip) || - mv88e6xxx_6185_family(chip) || - mv88e6xxx_6320_family(chip)) { - reg |= PORT_CONTROL_EGRESS_ADD_TAG; - } + else + reg |= PORT_CONTROL_DSA_TAG; + reg |= PORT_CONTROL_EGRESS_ADD_TAG | + PORT_CONTROL_FORWARD_UNKNOWN; } if (dsa_is_dsa_port(ds, port)) { if (mv88e6xxx_6095_family(chip) || @@ -2526,26 +2533,25 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) PORT_CONTROL_FORWARD_UNKNOWN_MC; } if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL, reg); + if (err) + return err; } /* If this port is connected to a SerDes, make sure the SerDes is not * powered down. */ - if (mv88e6xxx_6352_family(chip)) { - ret = _mv88e6xxx_reg_read(chip, REG_PORT(port), PORT_STATUS); - if (ret < 0) - return ret; - ret &= PORT_STATUS_CMODE_MASK; - if ((ret == PORT_STATUS_CMODE_100BASE_X) || - (ret == PORT_STATUS_CMODE_1000BASE_X) || - (ret == PORT_STATUS_CMODE_SGMII)) { - ret = mv88e6xxx_power_on_serdes(chip); - if (ret < 0) - return ret; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_SERDES)) { + err = mv88e6xxx_port_read(chip, port, PORT_STATUS, ®); + if (err) + return err; + reg &= PORT_STATUS_CMODE_MASK; + if ((reg == PORT_STATUS_CMODE_100BASE_X) || + (reg == PORT_STATUS_CMODE_1000BASE_X) || + (reg == PORT_STATUS_CMODE_SGMII)) { + err = mv88e6xxx_serdes_power_on(chip); + if (err < 0) + return err; } } @@ -2579,10 +2585,9 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) reg |= PORT_CONTROL_2_8021Q_DISABLED; if (reg) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_CONTROL_2, reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_2, reg); + if (err) + return err; } /* Port Association Vector: when learning source addresses @@ -2595,16 +2600,14 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) if (dsa_is_cpu_port(ds, port)) reg = 0; - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_ASSOC_VECTOR, - reg); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_ASSOC_VECTOR, reg); + if (err) + return err; /* Egress rate control 2: disable egress rate control. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_RATE_CONTROL_2, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL_2, 0x0000); + if (err) + return err; if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || @@ -2613,111 +2616,108 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) * be paused for by the remote end or the period of * time that this port can pause the remote end. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PAUSE_CTRL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PAUSE_CTRL, 0x0000); + if (err) + return err; /* Port ATU control: disable limiting the number of * address database entries that this port is allowed * to use. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ATU_CONTROL, 0x0000); + err = mv88e6xxx_port_write(chip, port, PORT_ATU_CONTROL, + 0x0000); /* Priority Override: disable DA, SA and VTU priority * override. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_PRI_OVERRIDE, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_PRI_OVERRIDE, + 0x0000); + if (err) + return err; /* Port Ethertype: use the Ethertype DSA Ethertype * value. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_ETH_TYPE, ETH_P_EDSA); - if (ret) - return ret; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) { + err = mv88e6xxx_port_write(chip, port, PORT_ETH_TYPE, + ETH_P_EDSA); + if (err) + return err; + } + /* Tag Remap: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_0123, 0x3210); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_0123, + 0x3210); + if (err) + return err; /* Tag Remap 2: use an identity 802.1p prio -> switch * prio mapping. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_TAG_REGMAP_4567, 0x7654); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_TAG_REGMAP_4567, + 0x7654); + if (err) + return err; } /* Rate Control: disable ingress rate limiting. */ if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || mv88e6xxx_6320_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0001); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0001); + if (err) + return err; } else if (mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip)) { - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), - PORT_RATE_CONTROL, 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_RATE_CONTROL, + 0x0000); + if (err) + return err; } /* Port Control 1: disable trunking, disable sending * learning messages to this port. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_CONTROL_1, - 0x0000); - if (ret) - return ret; + err = mv88e6xxx_port_write(chip, port, PORT_CONTROL_1, 0x0000); + if (err) + return err; /* Port based VLAN map: give each port the same default address * database, and allow bidirectional communication between the * CPU and DSA port(s), and the other ports. */ - ret = _mv88e6xxx_port_fid_set(chip, port, 0); - if (ret) - return ret; + err = _mv88e6xxx_port_fid_set(chip, port, 0); + if (err) + return err; - ret = _mv88e6xxx_port_based_vlan_map(chip, port); - if (ret) - return ret; + err = _mv88e6xxx_port_based_vlan_map(chip, port); + if (err) + return err; /* Default VLAN ID and priority: don't set a default VLAN * ID, and set the default packet priority to zero. */ - ret = _mv88e6xxx_reg_write(chip, REG_PORT(port), PORT_DEFAULT_VLAN, - 0x0000); - if (ret) - return ret; - - return 0; + return mv88e6xxx_port_write(chip, port, PORT_DEFAULT_VLAN, 0x0000); } -static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) { int err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01, - (addr[0] << 8) | addr[1]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_01, (addr[0] << 8) | addr[1]); if (err) return err; - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23, - (addr[2] << 8) | addr[3]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_23, (addr[2] << 8) | addr[3]); if (err) return err; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45, - (addr[4] << 8) | addr[5]); + err = mv88e6xxx_g1_write(chip, GLOBAL_MAC_45, (addr[4] << 8) | addr[5]); + if (err) + return err; + + return 0; } static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, @@ -2736,7 +2736,7 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, /* Round to nearest multiple of coeff */ age_time = (msecs + coeff / 2) / coeff; - err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val); + err = mv88e6xxx_g1_read(chip, GLOBAL_ATU_CONTROL, &val); if (err) return err; @@ -2744,13 +2744,13 @@ static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, val &= ~0xff0; val |= age_time << 4; - return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val); + return mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, val); } static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, unsigned int ageing_time) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; mutex_lock(&chip->reg_lock); @@ -2775,7 +2775,7 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU_ACTIVE)) reg |= GLOBAL_CONTROL_PPU_ENABLE; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL, reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL, reg); if (err) return err; @@ -2785,15 +2785,14 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) reg = upstream_port << GLOBAL_MONITOR_CONTROL_INGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_EGRESS_SHIFT | upstream_port << GLOBAL_MONITOR_CONTROL_ARP_SHIFT; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MONITOR_CONTROL, - reg); + err = mv88e6xxx_g1_write(chip, GLOBAL_MONITOR_CONTROL, reg); if (err) return err; /* Disable remote management, and set the switch's DSA device number. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_CONTROL_2, - GLOBAL_CONTROL_2_MULTIPLE_CASCADE | - (ds->index & 0x1f)); + err = mv88e6xxx_g1_write(chip, GLOBAL_CONTROL_2, + GLOBAL_CONTROL_2_MULTIPLE_CASCADE | + (ds->index & 0x1f)); if (err) return err; @@ -2806,8 +2805,8 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) * enable address learn messages to be sent to all message * ports. */ - err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - GLOBAL_ATU_CONTROL_LEARN2ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_ATU_CONTROL, + GLOBAL_ATU_CONTROL_LEARN2ALL); if (err) return err; @@ -2821,39 +2820,39 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return err; /* Configure the IP ToS mapping registers. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_0, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_0, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_1, 0x0000); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_1, 0x0000); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_2, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_2, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_3, 0x5555); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_3, 0x5555); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_4, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_4, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_5, 0xaaaa); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_5, 0xaaaa); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_6, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_6, 0xffff); if (err) return err; - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IP_PRI_7, 0xffff); + err = mv88e6xxx_g1_write(chip, GLOBAL_IP_PRI_7, 0xffff); if (err) return err; /* Configure the IEEE 802.1p priority mapping register. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_IEEE_PRI, 0xfa41); + err = mv88e6xxx_g1_write(chip, GLOBAL_IEEE_PRI, 0xfa41); if (err) return err; /* Clear the statistics counters for all ports */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_FLUSH_ALL); + err = mv88e6xxx_g1_write(chip, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_FLUSH_ALL); if (err) return err; @@ -2865,277 +2864,9 @@ static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) return 0; } -static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, - int target, int port) -{ - u16 val = (target << 8) | (port & 0xf); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); -} - -static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) -{ - int target, port; - int err; - - /* Initialize the routing port to the 32 possible target devices */ - for (target = 0; target < 32; ++target) { - port = 0xf; - - if (target < DSA_MAX_SWITCHES) { - port = chip->ds->rtable[target]; - if (port == DSA_RTABLE_NONE) - port = 0xf; - } - - err = mv88e6xxx_g2_device_mapping_write(chip, target, port); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, - bool hask, u16 mask) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (num << 12) | (mask & port_mask); - - if (hask) - val |= GLOBAL2_TRUNK_MASK_HASK; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); -} - -static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, - u16 map) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - u16 val = (id << 11) | (map & port_mask); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); -} - -static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) -{ - const u16 port_mask = BIT(chip->info->num_ports) - 1; - int i, err; - - /* Clear all eight possible Trunk Mask vectors */ - for (i = 0; i < 8; ++i) { - err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); - if (err) - return err; - } - - /* Clear all sixteen possible Trunk ID routing vectors */ - for (i = 0; i < 16; ++i) { - err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); - if (err) - return err; - } - - return 0; -} - -static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) -{ - int port, err; - - /* Init all Ingress Rate Limit resources of all ports */ - for (port = 0; port < chip->info->num_ports; ++port) { - /* XXX newer chips (like 88E6390) have different 2-bit ops */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_OP_INIT_ALL | - (port << 8)); - if (err) - break; - - /* Wait for the operation to complete */ - err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, - GLOBAL2_IRL_CMD_BUSY); - if (err) - break; - } - - return err; -} - -/* Indirect write to the Switch MAC/WoL/WoF register */ -static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, - unsigned int pointer, u8 data) -{ - u16 val = (pointer << 8) | data; - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); -} - -static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) -{ - int i, err; - - for (i = 0; i < 6; i++) { - err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, - u8 data) -{ - u16 val = (pointer << 8) | (data & 0x7); - - return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); -} - -static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) -{ - int i, err; - - /* Clear all sixteen possible Priority Override entries */ - for (i = 0; i < 16; i++) { - err = mv88e6xxx_g2_pot_write(chip, i, 0); - if (err) - break; - } - - return err; -} - -static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) -{ - return _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, - GLOBAL2_EEPROM_CMD_BUSY | - GLOBAL2_EEPROM_CMD_RUNNING); -} - -static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) -{ - int err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, cmd); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_wait(chip); -} - -static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, - u8 addr, u16 *data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); - if (err) - return err; - - return mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); -} - -static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, - u8 addr, u16 data) -{ - u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; - int err; - - err = mv88e6xxx_g2_eeprom_wait(chip); - if (err) - return err; - - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_EEPROM_DATA, data); - if (err) - return err; - - return mv88e6xxx_g2_eeprom_cmd(chip, cmd); -} - -static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) -{ - u16 reg; - int err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:2x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, - 0xffff); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { - /* Consider the frames with reserved multicast destination - * addresses matching 01:80:c2:00:00:0x as MGMT. - */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); - if (err) - return err; - } - - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || - mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) - reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); - if (err) - return err; - - /* Program the DSA routing table. */ - err = mv88e6xxx_g2_set_device_mapping(chip); - if (err) - return err; - - /* Clear all trunk masks and mapping. */ - err = mv88e6xxx_g2_clear_trunk(chip); - if (err) - return err; - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { - /* Disable ingress rate limiting by resetting all per port - * ingress rate limit resources to their initial state. - */ - err = mv88e6xxx_g2_clear_irl(chip); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { - /* Initialize Cross-chip Port VLAN Table to reset defaults */ - err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, - GLOBAL2_PVT_ADDR_OP_INIT_ONES); - if (err) - return err; - } - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { - /* Clear the priority override table. */ - err = mv88e6xxx_g2_clear_pot(chip); - if (err) - return err; - } - - return 0; -} - static int mv88e6xxx_setup(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; int i; @@ -3149,7 +2880,7 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) goto unlock; /* Setup Switch Port Registers */ - for (i = 0; i < chip->info->num_ports; i++) { + for (i = 0; i < mv88e6xxx_num_ports(chip); i++) { err = mv88e6xxx_setup_port(chip, i); if (err) goto unlock; @@ -3175,100 +2906,48 @@ unlock: static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_switch_mac) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - /* Has an indirect Switch MAC/WoL/WoF register in Global 2? */ - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC)) - err = mv88e6xxx_g2_set_switch_mac(chip, addr); - else - err = mv88e6xxx_g1_set_switch_mac(chip, addr); - + err = chip->info->ops->set_switch_mac(chip, addr); mutex_unlock(&chip->reg_lock); return err; } -#ifdef CONFIG_NET_DSA_HWMON -static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page, - int reg) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; - - mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_mdio_page_read(chip, port, page, reg); - mutex_unlock(&chip->reg_lock); - - return ret; -} - -static int mv88e6xxx_mdio_page_write(struct dsa_switch *ds, int port, int page, - int reg, int val) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; - - mutex_lock(&chip->reg_lock); - ret = _mv88e6xxx_mdio_page_write(chip, port, page, reg, val); - mutex_unlock(&chip->reg_lock); - - return ret; -} -#endif - -static int mv88e6xxx_port_to_mdio_addr(struct mv88e6xxx_chip *chip, int port) -{ - if (port >= 0 && port < chip->info->num_ports) - return port; - return -EINVAL; -} - -static int mv88e6xxx_mdio_read(struct mii_bus *bus, int port, int regnum) +static int mv88e6xxx_mdio_read(struct mii_bus *bus, int phy, int reg) { struct mv88e6xxx_chip *chip = bus->priv; - int addr = mv88e6xxx_port_to_mdio_addr(chip, port); - int ret; + u16 val; + int err; - if (addr < 0) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - ret = mv88e6xxx_mdio_read_ppu(chip, addr, regnum); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY)) - ret = mv88e6xxx_mdio_read_indirect(chip, addr, regnum); - else - ret = mv88e6xxx_mdio_read_direct(chip, addr, regnum); - + err = mv88e6xxx_phy_read(chip, phy, reg, &val); mutex_unlock(&chip->reg_lock); - return ret; + + return err ? err : val; } -static int mv88e6xxx_mdio_write(struct mii_bus *bus, int port, int regnum, - u16 val) +static int mv88e6xxx_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) { struct mv88e6xxx_chip *chip = bus->priv; - int addr = mv88e6xxx_port_to_mdio_addr(chip, port); - int ret; + int err; - if (addr < 0) + if (phy >= mv88e6xxx_num_ports(chip)) return 0xffff; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - ret = mv88e6xxx_mdio_write_ppu(chip, addr, regnum, val); - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SMI_PHY)) - ret = mv88e6xxx_mdio_write_indirect(chip, addr, regnum, val); - else - ret = mv88e6xxx_mdio_write_direct(chip, addr, regnum, val); - + err = mv88e6xxx_phy_write(chip, phy, reg, val); mutex_unlock(&chip->reg_lock); - return ret; + + return err; } static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, @@ -3278,9 +2957,6 @@ static int mv88e6xxx_mdio_register(struct mv88e6xxx_chip *chip, struct mii_bus *bus; int err; - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) - mv88e6xxx_ppu_state_init(chip); - if (np) chip->mdio_np = of_get_child_by_name(np, "mdio"); @@ -3335,69 +3011,70 @@ static void mv88e6xxx_mdio_unregister(struct mv88e6xxx_chip *chip) static int mv88e61xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; + u16 val; int ret; - int val; *temp = 0; mutex_lock(&chip->reg_lock); - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x6); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x6); if (ret < 0) goto error; /* Enable temperature sensor */ - ret = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a); + ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); if (ret < 0) goto error; - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret | (1 << 5)); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val | (1 << 5)); if (ret < 0) goto error; /* Wait for temperature to stabilize */ usleep_range(10000, 12000); - val = mv88e6xxx_mdio_read_direct(chip, 0x0, 0x1a); - if (val < 0) { - ret = val; + ret = mv88e6xxx_phy_read(chip, 0x0, 0x1a, &val); + if (ret < 0) goto error; - } /* Disable temperature sensor */ - ret = mv88e6xxx_mdio_write_direct(chip, 0x0, 0x1a, ret & ~(1 << 5)); + ret = mv88e6xxx_phy_write(chip, 0x0, 0x1a, val & ~(1 << 5)); if (ret < 0) goto error; *temp = ((val & 0x1f) - 5) * 5; error: - mv88e6xxx_mdio_write_direct(chip, 0x0, 0x16, 0x0); + mv88e6xxx_phy_write(chip, 0x0, 0x16, 0x0); mutex_unlock(&chip->reg_lock); return ret; } static int mv88e63xx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; *temp = 0; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 27); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 27, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *temp = (ret & 0xff) - 25; + *temp = (val & 0xff) - 25; return 0; } static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP)) return -EOPNOTSUPP; @@ -3410,8 +3087,9 @@ static int mv88e6xxx_get_temp(struct dsa_switch *ds, int *temp) static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) @@ -3419,36 +3097,45 @@ static int mv88e6xxx_get_temp_limit(struct dsa_switch *ds, int *temp) *temp = 0; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *temp = (((ret >> 8) & 0x1f) * 5) - 25; + *temp = (((val >> 8) & 0x1f) * 5) - 25; return 0; } static int mv88e6xxx_set_temp_limit(struct dsa_switch *ds, int temp) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; - int ret; + u16 val; + int err; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) return -EOPNOTSUPP; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); - if (ret < 0) - return ret; + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + if (err) + goto unlock; temp = clamp_val(DIV_ROUND_CLOSEST(temp, 5) + 5, 0, 0x1f); - return mv88e6xxx_mdio_page_write(ds, phy, 6, 26, - (ret & 0xe0ff) | (temp << 8)); + err = mv88e6xxx_phy_page_write(chip, phy, 6, 26, + (val & 0xe0ff) | (temp << 8)); +unlock: + mutex_unlock(&chip->reg_lock); + + return err; } static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int phy = mv88e6xxx_6320_family(chip) ? 3 : 0; + u16 val; int ret; if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_TEMP_LIMIT)) @@ -3456,11 +3143,13 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) *alarm = false; - ret = mv88e6xxx_mdio_page_read(ds, phy, 6, 26); + mutex_lock(&chip->reg_lock); + ret = mv88e6xxx_phy_page_read(chip, phy, 6, 26, &val); + mutex_unlock(&chip->reg_lock); if (ret < 0) return ret; - *alarm = !!(ret & 0x40); + *alarm = !!(val & 0x40); return 0; } @@ -3468,74 +3157,22 @@ static int mv88e6xxx_get_temp_alarm(struct dsa_switch *ds, bool *alarm) static int mv88e6xxx_get_eeprom_len(struct dsa_switch *ds) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; return chip->eeprom_len; } -static int mv88e6xxx_get_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = (val >> 8) & 0xff; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - *data++ = (val >> 8) & 0xff; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - *data++ = val & 0xff; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->get_eeprom) + return -EOPNOTSUPP; + mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_get_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->get_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); if (err) @@ -3546,93 +3183,139 @@ static int mv88e6xxx_get_eeprom(struct dsa_switch *ds, return 0; } -static int mv88e6xxx_set_eeprom16(struct mv88e6xxx_chip *chip, - struct ethtool_eeprom *eeprom, u8 *data) -{ - unsigned int offset = eeprom->offset; - unsigned int len = eeprom->len; - u16 val; - int err; - - /* Ensure the RO WriteEn bit is set */ - err = mv88e6xxx_read(chip, REG_GLOBAL2, GLOBAL2_EEPROM_CMD, &val); - if (err) - return err; - - if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) - return -EROFS; - - eeprom->len = 0; - - if (offset & 1) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (*data++ << 8) | (val & 0xff); - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - while (len >= 2) { - val = *data++; - val |= *data++ << 8; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset += 2; - len -= 2; - eeprom->len += 2; - } - - if (len) { - err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); - if (err) - return err; - - val = (val & 0xff00) | *data++; - - err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); - if (err) - return err; - - offset++; - len--; - eeprom->len++; - } - - return 0; -} - static int mv88e6xxx_set_eeprom(struct dsa_switch *ds, struct ethtool_eeprom *eeprom, u8 *data) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; int err; + if (!chip->info->ops->set_eeprom) + return -EOPNOTSUPP; + if (eeprom->magic != 0xc3ec4951) return -EINVAL; mutex_lock(&chip->reg_lock); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16)) - err = mv88e6xxx_set_eeprom16(chip, eeprom, data); - else - err = -EOPNOTSUPP; - + err = chip->info->ops->set_eeprom(chip, eeprom, data); mutex_unlock(&chip->reg_lock); return err; } +static const struct mv88e6xxx_ops mv88e6085_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6095_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6123_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6131_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6161_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6165_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_read, + .phy_write = mv88e6xxx_write, +}; + +static const struct mv88e6xxx_ops mv88e6171_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6172_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6175_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6176_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6185_ops = { + .set_switch_mac = mv88e6xxx_g1_set_switch_mac, + .phy_read = mv88e6xxx_phy_ppu_read, + .phy_write = mv88e6xxx_phy_ppu_write, +}; + +static const struct mv88e6xxx_ops mv88e6240_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6320_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6321_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6350_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6351_ops = { + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + +static const struct mv88e6xxx_ops mv88e6352_ops = { + .get_eeprom = mv88e6xxx_g2_get_eeprom16, + .set_eeprom = mv88e6xxx_g2_set_eeprom16, + .set_switch_mac = mv88e6xxx_g2_set_switch_mac, + .phy_read = mv88e6xxx_g2_smi_phy_read, + .phy_write = mv88e6xxx_g2_smi_phy_write, +}; + static const struct mv88e6xxx_info mv88e6xxx_table[] = { [MV88E6085] = { .prod_num = PORT_SWITCH_ID_PROD_NUM_6085, @@ -3641,8 +3324,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, + .ops = &mv88e6085_ops, }, [MV88E6095] = { @@ -3652,8 +3337,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 11, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, + .ops = &mv88e6095_ops, }, [MV88E6123] = { @@ -3663,8 +3350,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 3, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6123_ops, }, [MV88E6131] = { @@ -3674,8 +3363,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 8, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6131_ops, }, [MV88E6161] = { @@ -3685,8 +3376,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6161_ops, }, [MV88E6165] = { @@ -3696,8 +3389,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, + .ops = &mv88e6165_ops, }, [MV88E6171] = { @@ -3707,8 +3402,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6171_ops, }, [MV88E6172] = { @@ -3718,8 +3415,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6172_ops, }, [MV88E6175] = { @@ -3729,8 +3428,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6175_ops, }, [MV88E6176] = { @@ -3740,8 +3441,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6176_ops, }, [MV88E6185] = { @@ -3751,8 +3454,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 10, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, + .ops = &mv88e6185_ops, }, [MV88E6240] = { @@ -3762,8 +3467,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6240_ops, }, [MV88E6320] = { @@ -3773,8 +3480,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6320_ops, }, [MV88E6321] = { @@ -3784,8 +3493,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, + .ops = &mv88e6321_ops, }, [MV88E6350] = { @@ -3795,8 +3506,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6350_ops, }, [MV88E6351] = { @@ -3806,8 +3519,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, + .ops = &mv88e6351_ops, }, [MV88E6352] = { @@ -3817,8 +3532,10 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .global1_addr = 0x1b, .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, + .ops = &mv88e6352_ops, }, }; @@ -3856,6 +3573,10 @@ static int mv88e6xxx_detect(struct mv88e6xxx_chip *chip) /* Update the compatible info with the probed one */ chip->info = info; + err = mv88e6xxx_g2_require(chip); + if (err) + return err; + dev_info(chip->dev, "switch 0x%x detected: %s, revision %u\n", chip->info->prod_num, chip->info->name, rev); @@ -3877,6 +3598,18 @@ static struct mv88e6xxx_chip *mv88e6xxx_alloc_chip(struct device *dev) return chip; } +static void mv88e6xxx_phy_init(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) + mv88e6xxx_ppu_state_init(chip); +} + +static void mv88e6xxx_phy_destroy(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_PPU)) + mv88e6xxx_ppu_state_destroy(chip); +} + static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, struct mii_bus *bus, int sw_addr) { @@ -3886,7 +3619,7 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, if (sw_addr == 0) chip->smi_ops = &mv88e6xxx_smi_single_chip_ops; - else if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_MULTI_CHIP)) + else if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_MULTI_CHIP)) chip->smi_ops = &mv88e6xxx_smi_multi_chip_ops; else return -EINVAL; @@ -3897,6 +3630,16 @@ static int mv88e6xxx_smi_init(struct mv88e6xxx_chip *chip, return 0; } +static enum dsa_tag_protocol mv88e6xxx_get_tag_protocol(struct dsa_switch *ds) +{ + struct mv88e6xxx_chip *chip = ds->priv; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_EDSA)) + return DSA_TAG_PROTO_EDSA; + + return DSA_TAG_PROTO_DSA; +} + static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv) @@ -3924,6 +3667,8 @@ static const char *mv88e6xxx_drv_probe(struct device *dsa_dev, if (err) goto free; + mv88e6xxx_phy_init(chip); + err = mv88e6xxx_mdio_register(chip, NULL); if (err) goto free; @@ -3937,9 +3682,61 @@ free: return NULL; } -static struct dsa_switch_driver mv88e6xxx_switch_driver = { - .tag_protocol = DSA_TAG_PROTO_EDSA, +static int mv88e6xxx_port_mdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + /* We don't need any dynamic resource from the kernel (yet), + * so skip the prepare phase. + */ + + return 0; +} + +static void mv88e6xxx_port_mdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct mv88e6xxx_chip *chip = ds->priv; + + mutex_lock(&chip->reg_lock); + if (mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + GLOBAL_ATU_DATA_STATE_MC_STATIC)) + netdev_err(ds->ports[port].netdev, "failed to load multicast MAC address\n"); + mutex_unlock(&chip->reg_lock); +} + +static int mv88e6xxx_port_mdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_db_load_purge(chip, port, mdb->addr, mdb->vid, + GLOBAL_ATU_DATA_STATE_UNUSED); + mutex_unlock(&chip->reg_lock); + + return err; +} + +static int mv88e6xxx_port_mdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)) +{ + struct mv88e6xxx_chip *chip = ds->priv; + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_port_db_dump(chip, port, &mdb->obj, cb); + mutex_unlock(&chip->reg_lock); + + return err; +} + +static struct dsa_switch_ops mv88e6xxx_switch_ops = { .probe = mv88e6xxx_drv_probe, + .get_tag_protocol = mv88e6xxx_get_tag_protocol, .setup = mv88e6xxx_setup, .set_addr = mv88e6xxx_set_addr, .adjust_link = mv88e6xxx_adjust_link, @@ -3963,6 +3760,7 @@ static struct dsa_switch_driver mv88e6xxx_switch_driver = { .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_stp_state_set = mv88e6xxx_port_stp_state_set, + .port_fast_age = mv88e6xxx_port_fast_age, .port_vlan_filtering = mv88e6xxx_port_vlan_filtering, .port_vlan_prepare = mv88e6xxx_port_vlan_prepare, .port_vlan_add = mv88e6xxx_port_vlan_add, @@ -3972,6 +3770,10 @@ static struct dsa_switch_driver mv88e6xxx_switch_driver = { .port_fdb_add = mv88e6xxx_port_fdb_add, .port_fdb_del = mv88e6xxx_port_fdb_del, .port_fdb_dump = mv88e6xxx_port_fdb_dump, + .port_mdb_prepare = mv88e6xxx_port_mdb_prepare, + .port_mdb_add = mv88e6xxx_port_mdb_add, + .port_mdb_del = mv88e6xxx_port_mdb_del, + .port_mdb_dump = mv88e6xxx_port_mdb_dump, }; static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, @@ -3986,7 +3788,7 @@ static int mv88e6xxx_register_switch(struct mv88e6xxx_chip *chip, ds->dev = dev; ds->priv = chip; - ds->drv = &mv88e6xxx_switch_driver; + ds->ops = &mv88e6xxx_switch_ops; dev_set_drvdata(dev, ds); @@ -4025,11 +3827,13 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) if (err) return err; + mv88e6xxx_phy_init(chip); + chip->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_ASIS); if (IS_ERR(chip->reset)) return PTR_ERR(chip->reset); - if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_EEPROM16) && + if (chip->info->ops->get_eeprom && !of_property_read_u32(np, "eeprom-length", &eeprom_len)) chip->eeprom_len = eeprom_len; @@ -4049,8 +3853,9 @@ static int mv88e6xxx_probe(struct mdio_device *mdiodev) static void mv88e6xxx_remove(struct mdio_device *mdiodev) { struct dsa_switch *ds = dev_get_drvdata(&mdiodev->dev); - struct mv88e6xxx_chip *chip = ds_to_priv(ds); + struct mv88e6xxx_chip *chip = ds->priv; + mv88e6xxx_phy_destroy(chip); mv88e6xxx_unregister_switch(chip); mv88e6xxx_mdio_unregister(chip); } @@ -4076,7 +3881,7 @@ static struct mdio_driver mv88e6xxx_driver = { static int __init mv88e6xxx_init(void) { - register_switch_driver(&mv88e6xxx_switch_driver); + register_switch_driver(&mv88e6xxx_switch_ops); return mdio_driver_register(&mv88e6xxx_driver); } module_init(mv88e6xxx_init); @@ -4084,7 +3889,7 @@ module_init(mv88e6xxx_init); static void __exit mv88e6xxx_cleanup(void) { mdio_driver_unregister(&mv88e6xxx_driver); - unregister_switch_driver(&mv88e6xxx_switch_driver); + unregister_switch_driver(&mv88e6xxx_switch_ops); } module_exit(mv88e6xxx_cleanup); diff --git a/drivers/net/dsa/mv88e6xxx/global1.c b/drivers/net/dsa/mv88e6xxx/global1.c new file mode 100644 index 000000000000..d358720b6c2d --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.c @@ -0,0 +1,34 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global1.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_read(chip, addr, reg, val); +} + +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + int addr = chip->info->global1_addr; + + return mv88e6xxx_write(chip, addr, reg, val); +} + +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, chip->info->global1_addr, reg, mask); +} diff --git a/drivers/net/dsa/mv88e6xxx/global1.h b/drivers/net/dsa/mv88e6xxx/global1.h new file mode 100644 index 000000000000..62291e6fe3a3 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global1.h @@ -0,0 +1,23 @@ +/* + * Marvell 88E6xxx Switch Global (1) Registers support + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL1_H +#define _MV88E6XXX_GLOBAL1_H + +#include "mv88e6xxx.h" + +int mv88e6xxx_g1_read(struct mv88e6xxx_chip *chip, int reg, u16 *val); +int mv88e6xxx_g1_write(struct mv88e6xxx_chip *chip, int reg, u16 val); +int mv88e6xxx_g1_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask); + +#endif /* _MV88E6XXX_GLOBAL1_H */ diff --git a/drivers/net/dsa/mv88e6xxx/global2.c b/drivers/net/dsa/mv88e6xxx/global2.c new file mode 100644 index 000000000000..cf686e7506a9 --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.c @@ -0,0 +1,491 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "mv88e6xxx.h" +#include "global2.h" + +#define ADDR_GLOBAL2 0x1c + +static int mv88e6xxx_g2_read(struct mv88e6xxx_chip *chip, int reg, u16 *val) +{ + return mv88e6xxx_read(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_write(struct mv88e6xxx_chip *chip, int reg, u16 val) +{ + return mv88e6xxx_write(chip, ADDR_GLOBAL2, reg, val); +} + +static int mv88e6xxx_g2_update(struct mv88e6xxx_chip *chip, int reg, u16 update) +{ + return mv88e6xxx_update(chip, ADDR_GLOBAL2, reg, update); +} + +static int mv88e6xxx_g2_wait(struct mv88e6xxx_chip *chip, int reg, u16 mask) +{ + return mv88e6xxx_wait(chip, ADDR_GLOBAL2, reg, mask); +} + +/* Offset 0x06: Device Mapping Table register */ + +static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, + int target, int port) +{ + u16 val = (target << 8) | (port & 0xf); + + return mv88e6xxx_g2_update(chip, GLOBAL2_DEVICE_MAPPING, val); +} + +static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) +{ + int target, port; + int err; + + /* Initialize the routing port to the 32 possible target devices */ + for (target = 0; target < 32; ++target) { + port = 0xf; + + if (target < DSA_MAX_SWITCHES) { + port = chip->ds->rtable[target]; + if (port == DSA_RTABLE_NONE) + port = 0xf; + } + + err = mv88e6xxx_g2_device_mapping_write(chip, target, port); + if (err) + break; + } + + return err; +} + +/* Offset 0x07: Trunk Mask Table register */ + +static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hask, u16 mask) +{ + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; + u16 val = (num << 12) | (mask & port_mask); + + if (hask) + val |= GLOBAL2_TRUNK_MASK_HASK; + + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MASK, val); +} + +/* Offset 0x08: Trunk Mapping Table register */ + +static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map) +{ + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; + u16 val = (id << 11) | (map & port_mask); + + return mv88e6xxx_g2_update(chip, GLOBAL2_TRUNK_MAPPING, val); +} + +static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) +{ + const u16 port_mask = BIT(mv88e6xxx_num_ports(chip)) - 1; + int i, err; + + /* Clear all eight possible Trunk Mask vectors */ + for (i = 0; i < 8; ++i) { + err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); + if (err) + return err; + } + + /* Clear all sixteen possible Trunk ID routing vectors */ + for (i = 0; i < 16; ++i) { + err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); + if (err) + return err; + } + + return 0; +} + +/* Offset 0x09: Ingress Rate Command register + * Offset 0x0A: Ingress Rate Data register + */ + +static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) +{ + int port, err; + + /* Init all Ingress Rate Limit resources of all ports */ + for (port = 0; port < mv88e6xxx_num_ports(chip); ++port) { + /* XXX newer chips (like 88E6390) have different 2-bit ops */ + err = mv88e6xxx_g2_write(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); + if (err) + break; + + /* Wait for the operation to complete */ + err = mv88e6xxx_g2_wait(chip, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); + if (err) + break; + } + + return err; +} + +/* Offset 0x0D: Switch MAC/WoL/WoF register */ + +static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, + unsigned int pointer, u8 data) +{ + u16 val = (pointer << 8) | data; + + return mv88e6xxx_g2_update(chip, GLOBAL2_SWITCH_MAC, val); +} + +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +{ + int i, err; + + for (i = 0; i < 6; i++) { + err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); + if (err) + break; + } + + return err; +} + +/* Offset 0x0F: Priority Override Table */ + +static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, + u8 data) +{ + u16 val = (pointer << 8) | (data & 0x7); + + return mv88e6xxx_g2_update(chip, GLOBAL2_PRIO_OVERRIDE, val); +} + +static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +{ + int i, err; + + /* Clear all sixteen possible Priority Override entries */ + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g2_pot_write(chip, i, 0); + if (err) + break; + } + + return err; +} + +/* Offset 0x14: EEPROM Command + * Offset 0x15: EEPROM Data + */ + +static int mv88e6xxx_g2_eeprom_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_g2_wait(chip, GLOBAL2_EEPROM_CMD, + GLOBAL2_EEPROM_CMD_BUSY | + GLOBAL2_EEPROM_CMD_RUNNING); +} + +static int mv88e6xxx_g2_eeprom_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_wait(chip); +} + +static int mv88e6xxx_g2_eeprom_read16(struct mv88e6xxx_chip *chip, + u8 addr, u16 *data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_READ | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_eeprom_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_DATA, data); +} + +static int mv88e6xxx_g2_eeprom_write16(struct mv88e6xxx_chip *chip, + u8 addr, u16 data) +{ + u16 cmd = GLOBAL2_EEPROM_CMD_OP_WRITE | addr; + int err; + + err = mv88e6xxx_g2_eeprom_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_write(chip, GLOBAL2_EEPROM_DATA, data); + if (err) + return err; + + return mv88e6xxx_g2_eeprom_cmd(chip, cmd); +} + +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = (val >> 8) & 0xff; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + *data++ = (val >> 8) & 0xff; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + *data++ = val & 0xff; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data) +{ + unsigned int offset = eeprom->offset; + unsigned int len = eeprom->len; + u16 val; + int err; + + /* Ensure the RO WriteEn bit is set */ + err = mv88e6xxx_g2_read(chip, GLOBAL2_EEPROM_CMD, &val); + if (err) + return err; + + if (!(val & GLOBAL2_EEPROM_CMD_WRITE_EN)) + return -EROFS; + + eeprom->len = 0; + + if (offset & 1) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (*data++ << 8) | (val & 0xff); + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + while (len >= 2) { + val = *data++; + val |= *data++ << 8; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset += 2; + len -= 2; + eeprom->len += 2; + } + + if (len) { + err = mv88e6xxx_g2_eeprom_read16(chip, offset >> 1, &val); + if (err) + return err; + + val = (val & 0xff00) | *data++; + + err = mv88e6xxx_g2_eeprom_write16(chip, offset >> 1, val); + if (err) + return err; + + offset++; + len--; + eeprom->len++; + } + + return 0; +} + +/* Offset 0x18: SMI PHY Command Register + * Offset 0x19: SMI PHY Data Register + */ + +static int mv88e6xxx_g2_smi_phy_wait(struct mv88e6xxx_chip *chip) +{ + return mv88e6xxx_g2_wait(chip, GLOBAL2_SMI_PHY_CMD, + GLOBAL2_SMI_PHY_CMD_BUSY); +} + +static int mv88e6xxx_g2_smi_phy_cmd(struct mv88e6xxx_chip *chip, u16 cmd) +{ + int err; + + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_CMD, cmd); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_wait(chip); +} + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_smi_phy_cmd(chip, cmd); + if (err) + return err; + + return mv88e6xxx_g2_read(chip, GLOBAL2_SMI_PHY_DATA, val); +} + +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val) +{ + u16 cmd = GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA | (addr << 5) | reg; + int err; + + err = mv88e6xxx_g2_smi_phy_wait(chip); + if (err) + return err; + + err = mv88e6xxx_g2_write(chip, GLOBAL2_SMI_PHY_DATA, val); + if (err) + return err; + + return mv88e6xxx_g2_smi_phy_cmd(chip, cmd); +} + +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + u16 reg; + int err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:2x as MGMT. + */ + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_2X, 0xffff); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:0x as MGMT. + */ + err = mv88e6xxx_g2_write(chip, GLOBAL2_MGMT_EN_0X, 0xffff); + if (err) + return err; + } + + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || + mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) + reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; + err = mv88e6xxx_g2_write(chip, GLOBAL2_SWITCH_MGMT, reg); + if (err) + return err; + + /* Program the DSA routing table. */ + err = mv88e6xxx_g2_set_device_mapping(chip); + if (err) + return err; + + /* Clear all trunk masks and mapping. */ + err = mv88e6xxx_g2_clear_trunk(chip); + if (err) + return err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { + /* Disable ingress rate limiting by resetting all per port + * ingress rate limit resources to their initial state. + */ + err = mv88e6xxx_g2_clear_irl(chip); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { + /* Initialize Cross-chip Port VLAN Table to reset defaults */ + err = mv88e6xxx_g2_write(chip, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { + /* Clear the priority override table. */ + err = mv88e6xxx_g2_clear_pot(chip); + if (err) + return err; + } + + return 0; +} diff --git a/drivers/net/dsa/mv88e6xxx/global2.h b/drivers/net/dsa/mv88e6xxx/global2.h new file mode 100644 index 000000000000..c4bb9035ee3a --- /dev/null +++ b/drivers/net/dsa/mv88e6xxx/global2.h @@ -0,0 +1,88 @@ +/* + * Marvell 88E6xxx Switch Global 2 Registers support (device address 0x1C) + * + * Copyright (c) 2008 Marvell Semiconductor + * + * Copyright (c) 2016 Vivien Didelot + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef _MV88E6XXX_GLOBAL2_H +#define _MV88E6XXX_GLOBAL2_H + +#include "mv88e6xxx.h" + +#ifdef CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + return 0; +} + +int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); +int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr); +int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); +int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip); + +#else /* !CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + +static inline int mv88e6xxx_g2_require(struct mv88e6xxx_chip *chip) +{ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + dev_err(chip->dev, "this chip requires CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 enabled\n"); + return -EOPNOTSUPP; + } + + return 0; +} + +static inline int mv88e6xxx_g2_smi_phy_read(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 *val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_smi_phy_write(struct mv88e6xxx_chip *chip, + int addr, int reg, u16 val) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, + u8 *addr) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_get_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_set_eeprom16(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, + u8 *data) +{ + return -EOPNOTSUPP; +} + +static inline int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + return -EOPNOTSUPP; +} + +#endif /* CONFIG_NET_DSA_MV88E6XXX_GLOBAL2 */ + +#endif /* _MV88E6XXX_GLOBAL2_H */ diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 48d6ea77f9bd..e572121c196e 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -30,11 +30,13 @@ #define SMI_CMD_OP_45_READ_DATA_INC ((3 << 10) | SMI_CMD_BUSY) #define SMI_DATA 0x01 -/* Fiber/SERDES Registers are located at SMI address F, page 1 */ -#define REG_FIBER_SERDES 0x0f -#define PAGE_FIBER_SERDES 0x01 +/* PHY Registers */ +#define PHY_PAGE 0x16 +#define PHY_PAGE_COPPER 0x00 + +#define ADDR_SERDES 0x0f +#define SERDES_PAGE_FIBER 0x01 -#define REG_PORT(p) (0x10 + (p)) #define PORT_STATUS 0x00 #define PORT_STATUS_PAUSE_EN BIT(15) #define PORT_STATUS_MY_PAUSE BIT(14) @@ -157,7 +159,6 @@ #define PORT_TAG_REGMAP_0123 0x18 #define PORT_TAG_REGMAP_4567 0x19 -#define REG_GLOBAL 0x1b #define GLOBAL_STATUS 0x00 #define GLOBAL_STATUS_PPU_STATE BIT(15) /* 6351 and 6171 */ /* Two bits for 6165, 6185 etc */ @@ -169,8 +170,8 @@ #define GLOBAL_MAC_01 0x01 #define GLOBAL_MAC_23 0x02 #define GLOBAL_MAC_45 0x03 -#define GLOBAL_ATU_FID 0x01 /* 6097 6165 6351 6352 */ -#define GLOBAL_VTU_FID 0x02 /* 6097 6165 6351 6352 */ +#define GLOBAL_ATU_FID 0x01 +#define GLOBAL_VTU_FID 0x02 #define GLOBAL_VTU_FID_MASK 0xfff #define GLOBAL_VTU_SID 0x03 /* 6097 6165 6351 6352 */ #define GLOBAL_VTU_SID_MASK 0x3f @@ -275,7 +276,6 @@ #define GLOBAL_STATS_COUNTER_32 0x1e #define GLOBAL_STATS_COUNTER_01 0x1f -#define REG_GLOBAL2 0x1c #define GLOBAL2_INT_SOURCE 0x00 #define GLOBAL2_INT_MASK 0x01 #define GLOBAL2_MGMT_EN_2X 0x02 @@ -329,17 +329,16 @@ #define GLOBAL2_EEPROM_DATA 0x15 #define GLOBAL2_PTP_AVB_OP 0x16 #define GLOBAL2_PTP_AVB_DATA 0x17 -#define GLOBAL2_SMI_OP 0x18 -#define GLOBAL2_SMI_OP_BUSY BIT(15) -#define GLOBAL2_SMI_OP_CLAUSE_22 BIT(12) -#define GLOBAL2_SMI_OP_22_WRITE ((1 << 10) | GLOBAL2_SMI_OP_BUSY | \ - GLOBAL2_SMI_OP_CLAUSE_22) -#define GLOBAL2_SMI_OP_22_READ ((2 << 10) | GLOBAL2_SMI_OP_BUSY | \ - GLOBAL2_SMI_OP_CLAUSE_22) -#define GLOBAL2_SMI_OP_45_WRITE_ADDR ((0 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_OP_45_WRITE_DATA ((1 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_OP_45_READ_DATA ((2 << 10) | GLOBAL2_SMI_OP_BUSY) -#define GLOBAL2_SMI_DATA 0x19 +#define GLOBAL2_SMI_PHY_CMD 0x18 +#define GLOBAL2_SMI_PHY_CMD_BUSY BIT(15) +#define GLOBAL2_SMI_PHY_CMD_MODE_22 BIT(12) +#define GLOBAL2_SMI_PHY_CMD_OP_22_WRITE_DATA ((0x1 << 10) | \ + GLOBAL2_SMI_PHY_CMD_MODE_22 | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_CMD_OP_22_READ_DATA ((0x2 << 10) | \ + GLOBAL2_SMI_PHY_CMD_MODE_22 | \ + GLOBAL2_SMI_PHY_CMD_BUSY) +#define GLOBAL2_SMI_PHY_DATA 0x19 #define GLOBAL2_SCRATCH_MISC 0x1a #define GLOBAL2_SCRATCH_BUSY BIT(15) #define GLOBAL2_SCRATCH_REGISTER_SHIFT 8 @@ -384,10 +383,36 @@ enum mv88e6xxx_family { }; enum mv88e6xxx_cap { + /* Two different tag protocols can be used by the driver. All + * switches support DSA, but only later generations support + * EDSA. + */ + MV88E6XXX_CAP_EDSA, + /* Energy Efficient Ethernet. */ MV88E6XXX_CAP_EEE, + /* Multi-chip Addressing Mode. + * Some chips respond to only 2 registers of its own SMI device address + * when it is non-zero, and use indirect access to internal registers. + */ + MV88E6XXX_CAP_SMI_CMD, /* (0x00) SMI Command */ + MV88E6XXX_CAP_SMI_DATA, /* (0x01) SMI Data */ + + /* PHY Registers. + */ + MV88E6XXX_CAP_PHY_PAGE, /* (0x16) Page Register */ + + /* Fiber/SERDES Registers (SMI address F). + */ + MV88E6XXX_CAP_SERDES, + + /* Switch Global (1) Registers. + */ + MV88E6XXX_CAP_G1_ATU_FID, /* (0x01) ATU FID Register */ + MV88E6XXX_CAP_G1_VTU_FID, /* (0x02) VTU FID Register */ + /* Switch Global 2 Registers. * The device contains a second set of global 16-bit registers. */ @@ -398,16 +423,7 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_G2_IRL_DATA, /* (0x0a) Ingress Rate Data */ MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ - MV88E6XXX_CAP_G2_SWITCH_MAC, /* (0x0d) Switch MAC/WoL/WoF */ MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ - MV88E6XXX_CAP_G2_EEPROM_CMD, /* (0x14) EEPROM Command */ - MV88E6XXX_CAP_G2_EEPROM_DATA, /* (0x15) EEPROM Data */ - - /* Multi-chip Addressing Mode. - * Some chips require an indirect SMI access when their SMI device - * address is not zero. See SMI_CMD and SMI_DATA. - */ - MV88E6XXX_CAP_MULTI_CHIP, /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. @@ -415,12 +431,6 @@ enum mv88e6xxx_cap { MV88E6XXX_CAP_PPU, MV88E6XXX_CAP_PPU_ACTIVE, - /* SMI PHY Command and Data registers. - * This requires an indirect access to PHY registers through - * GLOBAL2_SMI_OP, otherwise direct access to PHY registers is done. - */ - MV88E6XXX_CAP_SMI_PHY, - /* Per VLAN Spanning Tree Unit (STU). * The Port State database, if present, is accessed through VTU * operations and dedicated SID registers. See GLOBAL_VTU_SID. @@ -440,130 +450,148 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) -#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) -#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) -#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) -#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) -#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) -#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) -#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) -#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) -#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) -#define MV88E6XXX_FLAG_G2_EEPROM_CMD BIT(MV88E6XXX_CAP_G2_EEPROM_CMD) -#define MV88E6XXX_FLAG_G2_EEPROM_DATA BIT(MV88E6XXX_CAP_G2_EEPROM_DATA) -#define MV88E6XXX_FLAG_MULTI_CHIP BIT(MV88E6XXX_CAP_MULTI_CHIP) -#define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) -#define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) -#define MV88E6XXX_FLAG_SMI_PHY BIT(MV88E6XXX_CAP_SMI_PHY) -#define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) -#define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +#define MV88E6XXX_FLAG_EDSA BIT_ULL(MV88E6XXX_CAP_EDSA) +#define MV88E6XXX_FLAG_EEE BIT_ULL(MV88E6XXX_CAP_EEE) -/* EEPROM Programming via Global2 with 16-bit data */ -#define MV88E6XXX_FLAGS_EEPROM16 \ - (MV88E6XXX_FLAG_G2_EEPROM_CMD | \ - MV88E6XXX_FLAG_G2_EEPROM_DATA) +#define MV88E6XXX_FLAG_SMI_CMD BIT_ULL(MV88E6XXX_CAP_SMI_CMD) +#define MV88E6XXX_FLAG_SMI_DATA BIT_ULL(MV88E6XXX_CAP_SMI_DATA) + +#define MV88E6XXX_FLAG_PHY_PAGE BIT_ULL(MV88E6XXX_CAP_PHY_PAGE) + +#define MV88E6XXX_FLAG_SERDES BIT_ULL(MV88E6XXX_CAP_SERDES) + +#define MV88E6XXX_FLAG_G1_ATU_FID BIT_ULL(MV88E6XXX_CAP_G1_ATU_FID) +#define MV88E6XXX_FLAG_G1_VTU_FID BIT_ULL(MV88E6XXX_CAP_G1_VTU_FID) + +#define MV88E6XXX_FLAG_GLOBAL2 BIT_ULL(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT_ULL(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT_ULL(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT_ULL(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT_ULL(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT_ULL(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_POT BIT_ULL(MV88E6XXX_CAP_G2_POT) + +#define MV88E6XXX_FLAG_PPU BIT_ULL(MV88E6XXX_CAP_PPU) +#define MV88E6XXX_FLAG_PPU_ACTIVE BIT_ULL(MV88E6XXX_CAP_PPU_ACTIVE) +#define MV88E6XXX_FLAG_STU BIT_ULL(MV88E6XXX_CAP_STU) +#define MV88E6XXX_FLAG_TEMP BIT_ULL(MV88E6XXX_CAP_TEMP) +#define MV88E6XXX_FLAG_TEMP_LIMIT BIT_ULL(MV88E6XXX_CAP_TEMP_LIMIT) +#define MV88E6XXX_FLAG_VTU BIT_ULL(MV88E6XXX_CAP_VTU) /* Ingress Rate Limit unit */ #define MV88E6XXX_FLAGS_IRL \ (MV88E6XXX_FLAG_G2_IRL_CMD | \ MV88E6XXX_FLAG_G2_IRL_DATA) +/* Multi-chip Addressing Mode */ +#define MV88E6XXX_FLAGS_MULTI_CHIP \ + (MV88E6XXX_FLAG_SMI_CMD | \ + MV88E6XXX_FLAG_SMI_DATA) + /* Cross-chip Port VLAN Table */ #define MV88E6XXX_FLAGS_PVT \ (MV88E6XXX_FLAG_G2_PVT_ADDR | \ MV88E6XXX_FLAG_G2_PVT_DATA) +/* Fiber/SERDES Registers at SMI address F, page 1 */ +#define MV88E6XXX_FLAGS_SERDES \ + (MV88E6XXX_FLAG_PHY_PAGE | \ + MV88E6XXX_FLAG_SERDES) + #define MV88E6XXX_FLAGS_FAMILY_6095 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_MULTI_CHIP) #define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6185 \ (MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ MV88E6XXX_FLAG_VTU) #define MV88E6XXX_FLAGS_FAMILY_6320 \ - (MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6351 \ - (MV88E6XXX_FLAG_GLOBAL2 | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ + MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_VTU | \ MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_MULTI_CHIP | \ MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6352 \ - (MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EDSA | \ + MV88E6XXX_FLAG_EEE | \ + MV88E6XXX_FLAG_G1_ATU_FID | \ + MV88E6XXX_FLAG_G1_VTU_FID | \ MV88E6XXX_FLAG_GLOBAL2 | \ MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ - MV88E6XXX_FLAG_G2_SWITCH_MAC | \ MV88E6XXX_FLAG_G2_POT | \ - MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ - MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ MV88E6XXX_FLAG_VTU | \ - MV88E6XXX_FLAGS_EEPROM16 | \ MV88E6XXX_FLAGS_IRL | \ - MV88E6XXX_FLAGS_PVT) + MV88E6XXX_FLAGS_MULTI_CHIP | \ + MV88E6XXX_FLAGS_PVT | \ + MV88E6XXX_FLAGS_SERDES) + +struct mv88e6xxx_ops; struct mv88e6xxx_info { enum mv88e6xxx_family family; @@ -572,8 +600,10 @@ struct mv88e6xxx_info { unsigned int num_databases; unsigned int num_ports; unsigned int port_base_addr; + unsigned int global1_addr; unsigned int age_time_coeff; - unsigned long flags; + unsigned long long flags; + const struct mv88e6xxx_ops *ops; }; struct mv88e6xxx_atu_entry { @@ -584,18 +614,15 @@ struct mv88e6xxx_atu_entry { u8 mac[ETH_ALEN]; }; -struct mv88e6xxx_vtu_stu_entry { - /* VTU only */ +struct mv88e6xxx_vtu_entry { u16 vid; u16 fid; - - /* VTU and STU */ u8 sid; bool valid; u8 data[DSA_MAX_PORTS]; }; -struct mv88e6xxx_ops; +struct mv88e6xxx_bus_ops; struct mv88e6xxx_priv_port { struct net_device *bridge_dev; @@ -616,13 +643,14 @@ struct mv88e6xxx_chip { /* The MII bus and the address on the bus that is used to * communication with the switch */ - const struct mv88e6xxx_ops *smi_ops; + const struct mv88e6xxx_bus_ops *smi_ops; struct mii_bus *bus; int sw_addr; /* Handles automatic disabling and re-enabling of the PHY * polling unit. */ + const struct mv88e6xxx_bus_ops *phy_ops; struct mutex ppu_mutex; int ppu_disabled; struct work_struct ppu_work; @@ -651,11 +679,25 @@ struct mv88e6xxx_chip { struct mii_bus *mdio_bus; }; -struct mv88e6xxx_ops { +struct mv88e6xxx_bus_ops { int (*read)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); int (*write)(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); }; +struct mv88e6xxx_ops { + int (*get_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + int (*set_eeprom)(struct mv88e6xxx_chip *chip, + struct ethtool_eeprom *eeprom, u8 *data); + + int (*set_switch_mac)(struct mv88e6xxx_chip *chip, u8 *addr); + + int (*phy_read)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 *val); + int (*phy_write)(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 val); +}; + enum stat_type { BANK0, BANK1, @@ -675,4 +717,20 @@ static inline bool mv88e6xxx_has(struct mv88e6xxx_chip *chip, return (chip->info->flags & flags) == flags; } +static inline unsigned int mv88e6xxx_num_databases(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_databases; +} + +static inline unsigned int mv88e6xxx_num_ports(struct mv88e6xxx_chip *chip) +{ + return chip->info->num_ports; +} + +int mv88e6xxx_read(struct mv88e6xxx_chip *chip, int addr, int reg, u16 *val); +int mv88e6xxx_write(struct mv88e6xxx_chip *chip, int addr, int reg, u16 val); +int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 update); +int mv88e6xxx_wait(struct mv88e6xxx_chip *chip, int addr, int reg, u16 mask); + #endif diff --git a/drivers/net/dsa/qca8k.c b/drivers/net/dsa/qca8k.c new file mode 100644 index 000000000000..b3df70d07ff6 --- /dev/null +++ b/drivers/net/dsa/qca8k.c @@ -0,0 +1,1040 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * Copyright (c) 2016 John Crispin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "qca8k.h" + +#define MIB_DESC(_s, _o, _n) \ + { \ + .size = (_s), \ + .offset = (_o), \ + .name = (_n), \ + } + +static const struct qca8k_mib_desc ar8327_mib[] = { + MIB_DESC(1, 0x00, "RxBroad"), + MIB_DESC(1, 0x04, "RxPause"), + MIB_DESC(1, 0x08, "RxMulti"), + MIB_DESC(1, 0x0c, "RxFcsErr"), + MIB_DESC(1, 0x10, "RxAlignErr"), + MIB_DESC(1, 0x14, "RxRunt"), + MIB_DESC(1, 0x18, "RxFragment"), + MIB_DESC(1, 0x1c, "Rx64Byte"), + MIB_DESC(1, 0x20, "Rx128Byte"), + MIB_DESC(1, 0x24, "Rx256Byte"), + MIB_DESC(1, 0x28, "Rx512Byte"), + MIB_DESC(1, 0x2c, "Rx1024Byte"), + MIB_DESC(1, 0x30, "Rx1518Byte"), + MIB_DESC(1, 0x34, "RxMaxByte"), + MIB_DESC(1, 0x38, "RxTooLong"), + MIB_DESC(2, 0x3c, "RxGoodByte"), + MIB_DESC(2, 0x44, "RxBadByte"), + MIB_DESC(1, 0x4c, "RxOverFlow"), + MIB_DESC(1, 0x50, "Filtered"), + MIB_DESC(1, 0x54, "TxBroad"), + MIB_DESC(1, 0x58, "TxPause"), + MIB_DESC(1, 0x5c, "TxMulti"), + MIB_DESC(1, 0x60, "TxUnderRun"), + MIB_DESC(1, 0x64, "Tx64Byte"), + MIB_DESC(1, 0x68, "Tx128Byte"), + MIB_DESC(1, 0x6c, "Tx256Byte"), + MIB_DESC(1, 0x70, "Tx512Byte"), + MIB_DESC(1, 0x74, "Tx1024Byte"), + MIB_DESC(1, 0x78, "Tx1518Byte"), + MIB_DESC(1, 0x7c, "TxMaxByte"), + MIB_DESC(1, 0x80, "TxOverSize"), + MIB_DESC(2, 0x84, "TxByte"), + MIB_DESC(1, 0x8c, "TxCollision"), + MIB_DESC(1, 0x90, "TxAbortCol"), + MIB_DESC(1, 0x94, "TxMultiCol"), + MIB_DESC(1, 0x98, "TxSingleCol"), + MIB_DESC(1, 0x9c, "TxExcDefer"), + MIB_DESC(1, 0xa0, "TxDefer"), + MIB_DESC(1, 0xa4, "TxLateCol"), +}; + +/* The 32bit switch registers are accessed indirectly. To achieve this we need + * to set the page of the register. Track the last page that was set to reduce + * mdio writes + */ +static u16 qca8k_current_page = 0xffff; + +static void +qca8k_split_addr(u32 regaddr, u16 *r1, u16 *r2, u16 *page) +{ + regaddr >>= 1; + *r1 = regaddr & 0x1e; + + regaddr >>= 5; + *r2 = regaddr & 0x7; + + regaddr >>= 3; + *page = regaddr & 0x3ff; +} + +static u32 +qca8k_mii_read32(struct mii_bus *bus, int phy_id, u32 regnum) +{ + u32 val; + int ret; + + ret = bus->read(bus, phy_id, regnum); + if (ret >= 0) { + val = ret; + ret = bus->read(bus, phy_id, regnum + 1); + val |= ret << 16; + } + + if (ret < 0) { + dev_err_ratelimited(&bus->dev, + "failed to read qca8k 32bit register\n"); + return ret; + } + + return val; +} + +static void +qca8k_mii_write32(struct mii_bus *bus, int phy_id, u32 regnum, u32 val) +{ + u16 lo, hi; + int ret; + + lo = val & 0xffff; + hi = (u16)(val >> 16); + + ret = bus->write(bus, phy_id, regnum, lo); + if (ret >= 0) + ret = bus->write(bus, phy_id, regnum + 1, hi); + if (ret < 0) + dev_err_ratelimited(&bus->dev, + "failed to write qca8k 32bit register\n"); +} + +static void +qca8k_set_page(struct mii_bus *bus, u16 page) +{ + if (page == qca8k_current_page) + return; + + if (bus->write(bus, 0x18, 0, page) < 0) + dev_err_ratelimited(&bus->dev, + "failed to set qca8k page\n"); + qca8k_current_page = page; +} + +static u32 +qca8k_read(struct qca8k_priv *priv, u32 reg) +{ + u16 r1, r2, page; + u32 val; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + val = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + + mutex_unlock(&priv->bus->mdio_lock); + + return val; +} + +static void +qca8k_write(struct qca8k_priv *priv, u32 reg, u32 val) +{ + u16 r1, r2, page; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, val); + + mutex_unlock(&priv->bus->mdio_lock); +} + +static u32 +qca8k_rmw(struct qca8k_priv *priv, u32 reg, u32 mask, u32 val) +{ + u16 r1, r2, page; + u32 ret; + + qca8k_split_addr(reg, &r1, &r2, &page); + + mutex_lock_nested(&priv->bus->mdio_lock, MDIO_MUTEX_NESTED); + + qca8k_set_page(priv->bus, page); + ret = qca8k_mii_read32(priv->bus, 0x10 | r2, r1); + ret &= ~mask; + ret |= val; + qca8k_mii_write32(priv->bus, 0x10 | r2, r1, ret); + + mutex_unlock(&priv->bus->mdio_lock); + + return ret; +} + +static void +qca8k_reg_set(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, 0, val); +} + +static void +qca8k_reg_clear(struct qca8k_priv *priv, u32 reg, u32 val) +{ + qca8k_rmw(priv, reg, val, 0); +} + +static int +qca8k_regmap_read(void *ctx, uint32_t reg, uint32_t *val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + *val = qca8k_read(priv, reg); + + return 0; +} + +static int +qca8k_regmap_write(void *ctx, uint32_t reg, uint32_t val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ctx; + + qca8k_write(priv, reg, val); + + return 0; +} + +static const struct regmap_range qca8k_readable_ranges[] = { + regmap_reg_range(0x0000, 0x00e4), /* Global control */ + regmap_reg_range(0x0100, 0x0168), /* EEE control */ + regmap_reg_range(0x0200, 0x0270), /* Parser control */ + regmap_reg_range(0x0400, 0x0454), /* ACL */ + regmap_reg_range(0x0600, 0x0718), /* Lookup */ + regmap_reg_range(0x0800, 0x0b70), /* QM */ + regmap_reg_range(0x0c00, 0x0c80), /* PKT */ + regmap_reg_range(0x0e00, 0x0e98), /* L3 */ + regmap_reg_range(0x1000, 0x10ac), /* MIB - Port0 */ + regmap_reg_range(0x1100, 0x11ac), /* MIB - Port1 */ + regmap_reg_range(0x1200, 0x12ac), /* MIB - Port2 */ + regmap_reg_range(0x1300, 0x13ac), /* MIB - Port3 */ + regmap_reg_range(0x1400, 0x14ac), /* MIB - Port4 */ + regmap_reg_range(0x1500, 0x15ac), /* MIB - Port5 */ + regmap_reg_range(0x1600, 0x16ac), /* MIB - Port6 */ + +}; + +static struct regmap_access_table qca8k_readable_table = { + .yes_ranges = qca8k_readable_ranges, + .n_yes_ranges = ARRAY_SIZE(qca8k_readable_ranges), +}; + +static struct regmap_config qca8k_regmap_config = { + .reg_bits = 16, + .val_bits = 32, + .reg_stride = 4, + .max_register = 0x16ac, /* end MIB - Port6 range */ + .reg_read = qca8k_regmap_read, + .reg_write = qca8k_regmap_write, + .rd_table = &qca8k_readable_table, +}; + +static int +qca8k_busy_wait(struct qca8k_priv *priv, u32 reg, u32 mask) +{ + unsigned long timeout; + + timeout = jiffies + msecs_to_jiffies(20); + + /* loop until the busy flag has cleared */ + do { + u32 val = qca8k_read(priv, reg); + int busy = val & mask; + + if (!busy) + break; + cond_resched(); + } while (!time_after_eq(jiffies, timeout)); + + return time_after_eq(jiffies, timeout); +} + +static void +qca8k_fdb_read(struct qca8k_priv *priv, struct qca8k_fdb *fdb) +{ + u32 reg[4]; + int i; + + /* load the ARL table into an array */ + for (i = 0; i < 4; i++) + reg[i] = qca8k_read(priv, QCA8K_REG_ATU_DATA0 + (i * 4)); + + /* vid - 83:72 */ + fdb->vid = (reg[2] >> QCA8K_ATU_VID_S) & QCA8K_ATU_VID_M; + /* aging - 67:64 */ + fdb->aging = reg[2] & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + fdb->port_mask = (reg[1] >> QCA8K_ATU_PORT_S) & QCA8K_ATU_PORT_M; + /* mac - 47:0 */ + fdb->mac[0] = (reg[1] >> QCA8K_ATU_ADDR0_S) & 0xff; + fdb->mac[1] = reg[1] & 0xff; + fdb->mac[2] = (reg[0] >> QCA8K_ATU_ADDR2_S) & 0xff; + fdb->mac[3] = (reg[0] >> QCA8K_ATU_ADDR3_S) & 0xff; + fdb->mac[4] = (reg[0] >> QCA8K_ATU_ADDR4_S) & 0xff; + fdb->mac[5] = reg[0] & 0xff; +} + +static void +qca8k_fdb_write(struct qca8k_priv *priv, u16 vid, u8 port_mask, const u8 *mac, + u8 aging) +{ + u32 reg[3] = { 0 }; + int i; + + /* vid - 83:72 */ + reg[2] = (vid & QCA8K_ATU_VID_M) << QCA8K_ATU_VID_S; + /* aging - 67:64 */ + reg[2] |= aging & QCA8K_ATU_STATUS_M; + /* portmask - 54:48 */ + reg[1] = (port_mask & QCA8K_ATU_PORT_M) << QCA8K_ATU_PORT_S; + /* mac - 47:0 */ + reg[1] |= mac[0] << QCA8K_ATU_ADDR0_S; + reg[1] |= mac[1]; + reg[0] |= mac[2] << QCA8K_ATU_ADDR2_S; + reg[0] |= mac[3] << QCA8K_ATU_ADDR3_S; + reg[0] |= mac[4] << QCA8K_ATU_ADDR4_S; + reg[0] |= mac[5]; + + /* load the array into the ARL table */ + for (i = 0; i < 3; i++) + qca8k_write(priv, QCA8K_REG_ATU_DATA0 + (i * 4), reg[i]); +} + +static int +qca8k_fdb_access(struct qca8k_priv *priv, enum qca8k_fdb_cmd cmd, int port) +{ + u32 reg; + + /* Set the command and FDB index */ + reg = QCA8K_ATU_FUNC_BUSY; + reg |= cmd; + if (port >= 0) { + reg |= QCA8K_ATU_FUNC_PORT_EN; + reg |= (port & QCA8K_ATU_FUNC_PORT_M) << QCA8K_ATU_FUNC_PORT_S; + } + + /* Write the function register triggering the table access */ + qca8k_write(priv, QCA8K_REG_ATU_FUNC, reg); + + /* wait for completion */ + if (qca8k_busy_wait(priv, QCA8K_REG_ATU_FUNC, QCA8K_ATU_FUNC_BUSY)) + return -1; + + /* Check for table full violation when adding an entry */ + if (cmd == QCA8K_FDB_LOAD) { + reg = qca8k_read(priv, QCA8K_REG_ATU_FUNC); + if (reg & QCA8K_ATU_FUNC_FULL) + return -1; + } + + return 0; +} + +static int +qca8k_fdb_next(struct qca8k_priv *priv, struct qca8k_fdb *fdb, int port) +{ + int ret; + + qca8k_fdb_write(priv, fdb->vid, fdb->port_mask, fdb->mac, fdb->aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_NEXT, port); + if (ret >= 0) + qca8k_fdb_read(priv, fdb); + + return ret; +} + +static int +qca8k_fdb_add(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, + u16 vid, u8 aging) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, aging); + ret = qca8k_fdb_access(priv, QCA8K_FDB_LOAD, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static int +qca8k_fdb_del(struct qca8k_priv *priv, const u8 *mac, u16 port_mask, u16 vid) +{ + int ret; + + mutex_lock(&priv->reg_mutex); + qca8k_fdb_write(priv, vid, port_mask, mac, 0); + ret = qca8k_fdb_access(priv, QCA8K_FDB_PURGE, -1); + mutex_unlock(&priv->reg_mutex); + + return ret; +} + +static void +qca8k_fdb_flush(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_fdb_access(priv, QCA8K_FDB_FLUSH, -1); + mutex_unlock(&priv->reg_mutex); +} + +static void +qca8k_mib_init(struct qca8k_priv *priv) +{ + mutex_lock(&priv->reg_mutex); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_FLUSH | QCA8K_MIB_BUSY); + qca8k_busy_wait(priv, QCA8K_REG_MIB, QCA8K_MIB_BUSY); + qca8k_reg_set(priv, QCA8K_REG_MIB, QCA8K_MIB_CPU_KEEP); + qca8k_write(priv, QCA8K_REG_MODULE_EN, QCA8K_MODULE_EN_MIB); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_set_pad_ctrl(struct qca8k_priv *priv, int port, int mode) +{ + u32 reg; + + switch (port) { + case 0: + reg = QCA8K_REG_PORT0_PAD_CTRL; + break; + case 6: + reg = QCA8K_REG_PORT6_PAD_CTRL; + break; + default: + pr_err("Can't set PAD_CTRL on port %d\n", port); + return -EINVAL; + } + + /* Configure a port to be directly connected to an external + * PHY or MAC. + */ + switch (mode) { + case PHY_INTERFACE_MODE_RGMII: + qca8k_write(priv, reg, + QCA8K_PORT_PAD_RGMII_EN | + QCA8K_PORT_PAD_RGMII_TX_DELAY(3) | + QCA8K_PORT_PAD_RGMII_RX_DELAY(3)); + + /* According to the datasheet, RGMII delay is enabled through + * PORT5_PAD_CTRL for all ports, rather than individual port + * registers + */ + qca8k_write(priv, QCA8K_REG_PORT5_PAD_CTRL, + QCA8K_PORT_PAD_RGMII_RX_DELAY_EN); + break; + case PHY_INTERFACE_MODE_SGMII: + qca8k_write(priv, reg, QCA8K_PORT_PAD_SGMII_EN); + break; + default: + pr_err("xMII mode %d not supported\n", mode); + return -EINVAL; + } + + return 0; +} + +static void +qca8k_port_set_status(struct qca8k_priv *priv, int port, int enable) +{ + u32 mask = QCA8K_PORT_STATUS_TXMAC; + + /* Port 0 and 6 have no internal PHY */ + if ((port > 0) && (port < 6)) + mask |= QCA8K_PORT_STATUS_LINK_AUTO; + + if (enable) + qca8k_reg_set(priv, QCA8K_REG_PORT_STATUS(port), mask); + else + qca8k_reg_clear(priv, QCA8K_REG_PORT_STATUS(port), mask); +} + +static int +qca8k_setup(struct dsa_switch *ds) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int ret, i, phy_mode = -1; + + /* Make sure that port 0 is the cpu port */ + if (!dsa_is_cpu_port(ds, 0)) { + pr_err("port 0 is not the CPU port\n"); + return -EINVAL; + } + + mutex_init(&priv->reg_mutex); + + /* Start by setting up the register mapping */ + priv->regmap = devm_regmap_init(ds->dev, NULL, priv, + &qca8k_regmap_config); + if (IS_ERR(priv->regmap)) + pr_warn("regmap initialization failed"); + + /* Initialize CPU port pad mode (xMII type, delays...) */ + phy_mode = of_get_phy_mode(ds->ports[ds->dst->cpu_port].dn); + if (phy_mode < 0) { + pr_err("Can't find phy-mode for master device\n"); + return phy_mode; + } + ret = qca8k_set_pad_ctrl(priv, QCA8K_CPU_PORT, phy_mode); + if (ret < 0) + return ret; + + /* Enable CPU Port */ + qca8k_reg_set(priv, QCA8K_REG_GLOBAL_FW_CTRL0, + QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN); + qca8k_port_set_status(priv, QCA8K_CPU_PORT, 1); + priv->port_sts[QCA8K_CPU_PORT].enabled = 1; + + /* Enable MIB counters */ + qca8k_mib_init(priv); + + /* Enable QCA header mode on the cpu port */ + qca8k_write(priv, QCA8K_REG_PORT_HDR_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_TX_S | + QCA8K_PORT_HDR_CTRL_ALL << QCA8K_PORT_HDR_CTRL_RX_S); + + /* Disable forwarding by default on all ports */ + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, 0); + + /* Disable MAC by default on all user ports */ + for (i = 1; i < QCA8K_NUM_PORTS; i++) + if (ds->enabled_port_mask & BIT(i)) + qca8k_port_set_status(priv, i, 0); + + /* Forward all unknown frames to CPU port for Linux processing */ + qca8k_write(priv, QCA8K_REG_GLOBAL_FW_CTRL1, + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_BC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_MC_DP_S | + BIT(0) << QCA8K_GLOBAL_FW_CTRL1_UC_DP_S); + + /* Setup connection between CPU port & user ports */ + for (i = 0; i < DSA_MAX_PORTS; i++) { + /* CPU port gets connected to all user ports of the switch */ + if (dsa_is_cpu_port(ds, i)) { + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(QCA8K_CPU_PORT), + QCA8K_PORT_LOOKUP_MEMBER, + ds->enabled_port_mask); + } + + /* Invividual user ports get connected to CPU port only */ + if (ds->enabled_port_mask & BIT(i)) { + int shift = 16 * (i % 2); + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_MEMBER, + BIT(QCA8K_CPU_PORT)); + + /* Enable ARP Auto-learning by default */ + qca8k_reg_set(priv, QCA8K_PORT_LOOKUP_CTRL(i), + QCA8K_PORT_LOOKUP_LEARN); + + /* For port based vlans to work we need to set the + * default egress vid + */ + qca8k_rmw(priv, QCA8K_EGRESS_VLAN(i), + 0xffff << shift, 1 << shift); + qca8k_write(priv, QCA8K_REG_PORT_VLAN_CTRL0(i), + QCA8K_PORT_VLAN_CVID(1) | + QCA8K_PORT_VLAN_SVID(1)); + } + } + + /* Flush the FDB table */ + qca8k_fdb_flush(priv); + + return 0; +} + +static int +qca8k_phy_read(struct dsa_switch *ds, int phy, int regnum) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_read(priv->bus, phy, regnum); +} + +static int +qca8k_phy_write(struct dsa_switch *ds, int phy, int regnum, u16 val) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + return mdiobus_write(priv->bus, phy, regnum, val); +} + +static void +qca8k_get_strings(struct dsa_switch *ds, int port, uint8_t *data) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) + strncpy(data + i * ETH_GSTRING_LEN, ar8327_mib[i].name, + ETH_GSTRING_LEN); +} + +static void +qca8k_get_ethtool_stats(struct dsa_switch *ds, int port, + uint64_t *data) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + const struct qca8k_mib_desc *mib; + u32 reg, i; + u64 hi; + + for (i = 0; i < ARRAY_SIZE(ar8327_mib); i++) { + mib = &ar8327_mib[i]; + reg = QCA8K_PORT_MIB_COUNTER(port) + mib->offset; + + data[i] = qca8k_read(priv, reg); + if (mib->size == 2) { + hi = qca8k_read(priv, reg + 4); + data[i] |= hi << 32; + } + } +} + +static int +qca8k_get_sset_count(struct dsa_switch *ds) +{ + return ARRAY_SIZE(ar8327_mib); +} + +static void +qca8k_eee_enable_set(struct dsa_switch *ds, int port, bool enable) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 lpi_en = QCA8K_REG_EEE_CTRL_LPI_EN(port); + u32 reg; + + mutex_lock(&priv->reg_mutex); + reg = qca8k_read(priv, QCA8K_REG_EEE_CTRL); + if (enable) + reg |= lpi_en; + else + reg &= ~lpi_en; + qca8k_write(priv, QCA8K_REG_EEE_CTRL, reg); + mutex_unlock(&priv->reg_mutex); +} + +static int +qca8k_eee_init(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret; + + p->supported = (SUPPORTED_1000baseT_Full | SUPPORTED_100baseT_Full); + + ret = phy_init_eee(phy, 0); + if (ret) + return ret; + + qca8k_eee_enable_set(ds, port, true); + + return 0; +} + +static int +qca8k_set_eee(struct dsa_switch *ds, int port, + struct phy_device *phydev, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + int ret = 0; + + p->eee_enabled = e->eee_enabled; + + if (e->eee_enabled) { + p->eee_enabled = qca8k_eee_init(ds, port, phydev); + if (!p->eee_enabled) + ret = -EOPNOTSUPP; + } + qca8k_eee_enable_set(ds, port, p->eee_enabled); + + return ret; +} + +static int +qca8k_get_eee(struct dsa_switch *ds, int port, + struct ethtool_eee *e) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct ethtool_eee *p = &priv->port_sts[port].eee; + struct net_device *netdev = ds->ports[port].netdev; + int ret; + + ret = phy_ethtool_get_eee(netdev->phydev, p); + if (!ret) + e->eee_active = + !!(p->supported & p->advertised & p->lp_advertised); + else + e->eee_active = 0; + + e->eee_enabled = p->eee_enabled; + + return ret; +} + +static void +qca8k_port_stp_state_set(struct dsa_switch *ds, int port, u8 state) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u32 stp_state; + + switch (state) { + case BR_STATE_DISABLED: + stp_state = QCA8K_PORT_LOOKUP_STATE_DISABLED; + break; + case BR_STATE_BLOCKING: + stp_state = QCA8K_PORT_LOOKUP_STATE_BLOCKING; + break; + case BR_STATE_LISTENING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LISTENING; + break; + case BR_STATE_LEARNING: + stp_state = QCA8K_PORT_LOOKUP_STATE_LEARNING; + break; + case BR_STATE_FORWARDING: + default: + stp_state = QCA8K_PORT_LOOKUP_STATE_FORWARD; + break; + } + + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_STATE_MASK, stp_state); +} + +static int +qca8k_port_bridge_join(struct dsa_switch *ds, int port, + struct net_device *bridge) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int port_mask = BIT(QCA8K_CPU_PORT); + int i; + + priv->port_sts[port].bridge_dev = bridge; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != bridge) + continue; + /* Add this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_set(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + if (i != port) + port_mask |= BIT(i); + } + /* Add all other ports to this ports portvlan mask */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, port_mask); + + return 0; +} + +static void +qca8k_port_bridge_leave(struct dsa_switch *ds, int port) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + int i; + + for (i = 1; i < QCA8K_NUM_PORTS; i++) { + if (priv->port_sts[i].bridge_dev != + priv->port_sts[port].bridge_dev) + continue; + /* Remove this port to the portvlan mask of the other ports + * in the bridge + */ + qca8k_reg_clear(priv, + QCA8K_PORT_LOOKUP_CTRL(i), + BIT(port)); + } + priv->port_sts[port].bridge_dev = NULL; + /* Set the cpu port to be the only one in the portvlan mask of + * this port + */ + qca8k_rmw(priv, QCA8K_PORT_LOOKUP_CTRL(port), + QCA8K_PORT_LOOKUP_MEMBER, BIT(QCA8K_CPU_PORT)); +} + +static int +qca8k_port_enable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 1); + priv->port_sts[port].enabled = 1; + + return 0; +} + +static void +qca8k_port_disable(struct dsa_switch *ds, int port, + struct phy_device *phy) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + qca8k_port_set_status(priv, port, 0); + priv->port_sts[port].enabled = 0; +} + +static int +qca8k_port_fdb_insert(struct qca8k_priv *priv, const u8 *addr, + u16 port_mask, u16 vid) +{ + /* Set the vid to the port vlan id if no vid is set */ + if (!vid) + vid = 1; + + return qca8k_fdb_add(priv, addr, port_mask, vid, + QCA8K_ATU_STATUS_STATIC); +} + +static int +qca8k_port_fdb_prepare(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + + /* The FDB table for static and auto learned entries is the same. We + * need to reserve an entry with no port_mask set to make sure that + * when port_fdb_add is called an entry is still available. Otherwise + * the last free entry might have been used up by auto learning + */ + return qca8k_port_fdb_insert(priv, fdb->addr, 0, fdb->vid); +} + +static void +qca8k_port_fdb_add(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb, + struct switchdev_trans *trans) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + + /* Update the FDB entry adding the port_mask */ + qca8k_port_fdb_insert(priv, fdb->addr, port_mask, fdb->vid); +} + +static int +qca8k_port_fdb_del(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_fdb *fdb) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + u16 port_mask = BIT(port); + u16 vid = fdb->vid; + + if (!vid) + vid = 1; + + return qca8k_fdb_del(priv, fdb->addr, port_mask, vid); +} + +static int +qca8k_port_fdb_dump(struct dsa_switch *ds, int port, + struct switchdev_obj_port_fdb *fdb, + int (*cb)(struct switchdev_obj *obj)) +{ + struct qca8k_priv *priv = (struct qca8k_priv *)ds->priv; + struct qca8k_fdb _fdb = { 0 }; + int cnt = QCA8K_NUM_FDB_RECORDS; + int ret = 0; + + mutex_lock(&priv->reg_mutex); + while (cnt-- && !qca8k_fdb_next(priv, &_fdb, port)) { + if (!_fdb.aging) + break; + + ether_addr_copy(fdb->addr, _fdb.mac); + fdb->vid = _fdb.vid; + if (_fdb.aging == QCA8K_ATU_STATUS_STATIC) + fdb->ndm_state = NUD_NOARP; + else + fdb->ndm_state = NUD_REACHABLE; + + ret = cb(&fdb->obj); + if (ret) + break; + } + mutex_unlock(&priv->reg_mutex); + + return 0; +} + +static enum dsa_tag_protocol +qca8k_get_tag_protocol(struct dsa_switch *ds) +{ + return DSA_TAG_PROTO_QCA; +} + +static struct dsa_switch_ops qca8k_switch_ops = { + .get_tag_protocol = qca8k_get_tag_protocol, + .setup = qca8k_setup, + .get_strings = qca8k_get_strings, + .phy_read = qca8k_phy_read, + .phy_write = qca8k_phy_write, + .get_ethtool_stats = qca8k_get_ethtool_stats, + .get_sset_count = qca8k_get_sset_count, + .get_eee = qca8k_get_eee, + .set_eee = qca8k_set_eee, + .port_enable = qca8k_port_enable, + .port_disable = qca8k_port_disable, + .port_stp_state_set = qca8k_port_stp_state_set, + .port_bridge_join = qca8k_port_bridge_join, + .port_bridge_leave = qca8k_port_bridge_leave, + .port_fdb_prepare = qca8k_port_fdb_prepare, + .port_fdb_add = qca8k_port_fdb_add, + .port_fdb_del = qca8k_port_fdb_del, + .port_fdb_dump = qca8k_port_fdb_dump, +}; + +static int +qca8k_sw_probe(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv; + u32 id; + + /* allocate the private data struct so that we can probe the switches + * ID register + */ + priv = devm_kzalloc(&mdiodev->dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->bus = mdiodev->bus; + + /* read the switches ID register */ + id = qca8k_read(priv, QCA8K_REG_MASK_CTRL); + id >>= QCA8K_MASK_CTRL_ID_S; + id &= QCA8K_MASK_CTRL_ID_M; + if (id != QCA8K_ID_QCA8337) + return -ENODEV; + + priv->ds = devm_kzalloc(&mdiodev->dev, sizeof(*priv->ds), GFP_KERNEL); + if (!priv->ds) + return -ENOMEM; + + priv->ds->priv = priv; + priv->ds->dev = &mdiodev->dev; + priv->ds->ops = &qca8k_switch_ops; + mutex_init(&priv->reg_mutex); + dev_set_drvdata(&mdiodev->dev, priv); + + return dsa_register_switch(priv->ds, priv->ds->dev->of_node); +} + +static void +qca8k_sw_remove(struct mdio_device *mdiodev) +{ + struct qca8k_priv *priv = dev_get_drvdata(&mdiodev->dev); + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) + qca8k_port_set_status(priv, i, 0); + + dsa_unregister_switch(priv->ds); +} + +#ifdef CONFIG_PM_SLEEP +static void +qca8k_set_pm(struct qca8k_priv *priv, int enable) +{ + int i; + + for (i = 0; i < QCA8K_NUM_PORTS; i++) { + if (!priv->port_sts[i].enabled) + continue; + + qca8k_port_set_status(priv, i, enable); + } +} + +static int qca8k_suspend(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 0); + + return dsa_switch_suspend(priv->ds); +} + +static int qca8k_resume(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + struct qca8k_priv *priv = platform_get_drvdata(pdev); + + qca8k_set_pm(priv, 1); + + return dsa_switch_resume(priv->ds); +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(qca8k_pm_ops, + qca8k_suspend, qca8k_resume); + +static const struct of_device_id qca8k_of_match[] = { + { .compatible = "qca,qca8337" }, + { /* sentinel */ }, +}; + +static struct mdio_driver qca8kmdio_driver = { + .probe = qca8k_sw_probe, + .remove = qca8k_sw_remove, + .mdiodrv.driver = { + .name = "qca8k", + .of_match_table = qca8k_of_match, + .pm = &qca8k_pm_ops, + }, +}; + +mdio_module_driver(qca8kmdio_driver); + +MODULE_AUTHOR("Mathieu Olivari, John Crispin "); +MODULE_DESCRIPTION("Driver for QCA8K ethernet switch family"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qca8k"); diff --git a/drivers/net/dsa/qca8k.h b/drivers/net/dsa/qca8k.h new file mode 100644 index 000000000000..201464719531 --- /dev/null +++ b/drivers/net/dsa/qca8k.h @@ -0,0 +1,185 @@ +/* + * Copyright (C) 2009 Felix Fietkau + * Copyright (C) 2011-2012 Gabor Juhos + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __QCA8K_H +#define __QCA8K_H + +#include +#include + +#define QCA8K_NUM_PORTS 7 + +#define PHY_ID_QCA8337 0x004dd036 +#define QCA8K_ID_QCA8337 0x13 + +#define QCA8K_NUM_FDB_RECORDS 2048 + +#define QCA8K_CPU_PORT 0 + +/* Global control registers */ +#define QCA8K_REG_MASK_CTRL 0x000 +#define QCA8K_MASK_CTRL_ID_M 0xff +#define QCA8K_MASK_CTRL_ID_S 8 +#define QCA8K_REG_PORT0_PAD_CTRL 0x004 +#define QCA8K_REG_PORT5_PAD_CTRL 0x008 +#define QCA8K_REG_PORT6_PAD_CTRL 0x00c +#define QCA8K_PORT_PAD_RGMII_EN BIT(26) +#define QCA8K_PORT_PAD_RGMII_TX_DELAY(x) \ + ((0x8 + (x & 0x3)) << 22) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY(x) \ + ((0x10 + (x & 0x3)) << 20) +#define QCA8K_PORT_PAD_RGMII_RX_DELAY_EN BIT(24) +#define QCA8K_PORT_PAD_SGMII_EN BIT(7) +#define QCA8K_REG_MODULE_EN 0x030 +#define QCA8K_MODULE_EN_MIB BIT(0) +#define QCA8K_REG_MIB 0x034 +#define QCA8K_MIB_FLUSH BIT(24) +#define QCA8K_MIB_CPU_KEEP BIT(20) +#define QCA8K_MIB_BUSY BIT(17) +#define QCA8K_GOL_MAC_ADDR0 0x60 +#define QCA8K_GOL_MAC_ADDR1 0x64 +#define QCA8K_REG_PORT_STATUS(_i) (0x07c + (_i) * 4) +#define QCA8K_PORT_STATUS_SPEED GENMASK(2, 0) +#define QCA8K_PORT_STATUS_SPEED_S 0 +#define QCA8K_PORT_STATUS_TXMAC BIT(2) +#define QCA8K_PORT_STATUS_RXMAC BIT(3) +#define QCA8K_PORT_STATUS_TXFLOW BIT(4) +#define QCA8K_PORT_STATUS_RXFLOW BIT(5) +#define QCA8K_PORT_STATUS_DUPLEX BIT(6) +#define QCA8K_PORT_STATUS_LINK_UP BIT(8) +#define QCA8K_PORT_STATUS_LINK_AUTO BIT(9) +#define QCA8K_PORT_STATUS_LINK_PAUSE BIT(10) +#define QCA8K_REG_PORT_HDR_CTRL(_i) (0x9c + (_i * 4)) +#define QCA8K_PORT_HDR_CTRL_RX_MASK GENMASK(3, 2) +#define QCA8K_PORT_HDR_CTRL_RX_S 2 +#define QCA8K_PORT_HDR_CTRL_TX_MASK GENMASK(1, 0) +#define QCA8K_PORT_HDR_CTRL_TX_S 0 +#define QCA8K_PORT_HDR_CTRL_ALL 2 +#define QCA8K_PORT_HDR_CTRL_MGMT 1 +#define QCA8K_PORT_HDR_CTRL_NONE 0 + +/* EEE control registers */ +#define QCA8K_REG_EEE_CTRL 0x100 +#define QCA8K_REG_EEE_CTRL_LPI_EN(_i) ((_i + 1) * 2) + +/* ACL registers */ +#define QCA8K_REG_PORT_VLAN_CTRL0(_i) (0x420 + (_i * 8)) +#define QCA8K_PORT_VLAN_CVID(x) (x << 16) +#define QCA8K_PORT_VLAN_SVID(x) x +#define QCA8K_REG_PORT_VLAN_CTRL1(_i) (0x424 + (_i * 8)) +#define QCA8K_REG_IPV4_PRI_BASE_ADDR 0x470 +#define QCA8K_REG_IPV4_PRI_ADDR_MASK 0x474 + +/* Lookup registers */ +#define QCA8K_REG_ATU_DATA0 0x600 +#define QCA8K_ATU_ADDR2_S 24 +#define QCA8K_ATU_ADDR3_S 16 +#define QCA8K_ATU_ADDR4_S 8 +#define QCA8K_REG_ATU_DATA1 0x604 +#define QCA8K_ATU_PORT_M 0x7f +#define QCA8K_ATU_PORT_S 16 +#define QCA8K_ATU_ADDR0_S 8 +#define QCA8K_REG_ATU_DATA2 0x608 +#define QCA8K_ATU_VID_M 0xfff +#define QCA8K_ATU_VID_S 8 +#define QCA8K_ATU_STATUS_M 0xf +#define QCA8K_ATU_STATUS_STATIC 0xf +#define QCA8K_REG_ATU_FUNC 0x60c +#define QCA8K_ATU_FUNC_BUSY BIT(31) +#define QCA8K_ATU_FUNC_PORT_EN BIT(14) +#define QCA8K_ATU_FUNC_MULTI_EN BIT(13) +#define QCA8K_ATU_FUNC_FULL BIT(12) +#define QCA8K_ATU_FUNC_PORT_M 0xf +#define QCA8K_ATU_FUNC_PORT_S 8 +#define QCA8K_REG_GLOBAL_FW_CTRL0 0x620 +#define QCA8K_GLOBAL_FW_CTRL0_CPU_PORT_EN BIT(10) +#define QCA8K_REG_GLOBAL_FW_CTRL1 0x624 +#define QCA8K_GLOBAL_FW_CTRL1_IGMP_DP_S 24 +#define QCA8K_GLOBAL_FW_CTRL1_BC_DP_S 16 +#define QCA8K_GLOBAL_FW_CTRL1_MC_DP_S 8 +#define QCA8K_GLOBAL_FW_CTRL1_UC_DP_S 0 +#define QCA8K_PORT_LOOKUP_CTRL(_i) (0x660 + (_i) * 0xc) +#define QCA8K_PORT_LOOKUP_MEMBER GENMASK(6, 0) +#define QCA8K_PORT_LOOKUP_STATE_MASK GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_STATE_DISABLED (0 << 16) +#define QCA8K_PORT_LOOKUP_STATE_BLOCKING (1 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LISTENING (2 << 16) +#define QCA8K_PORT_LOOKUP_STATE_LEARNING (3 << 16) +#define QCA8K_PORT_LOOKUP_STATE_FORWARD (4 << 16) +#define QCA8K_PORT_LOOKUP_STATE GENMASK(18, 16) +#define QCA8K_PORT_LOOKUP_LEARN BIT(20) + +/* Pkt edit registers */ +#define QCA8K_EGRESS_VLAN(x) (0x0c70 + (4 * (x / 2))) + +/* L3 registers */ +#define QCA8K_HROUTER_CONTROL 0xe00 +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_M GENMASK(17, 16) +#define QCA8K_HROUTER_CONTROL_GLB_LOCKTIME_S 16 +#define QCA8K_HROUTER_CONTROL_ARP_AGE_MODE 1 +#define QCA8K_HROUTER_PBASED_CONTROL1 0xe08 +#define QCA8K_HROUTER_PBASED_CONTROL2 0xe0c +#define QCA8K_HNAT_CONTROL 0xe38 + +/* MIB registers */ +#define QCA8K_PORT_MIB_COUNTER(_i) (0x1000 + (_i) * 0x100) + +/* QCA specific MII registers */ +#define MII_ATH_MMD_ADDR 0x0d +#define MII_ATH_MMD_DATA 0x0e + +enum { + QCA8K_PORT_SPEED_10M = 0, + QCA8K_PORT_SPEED_100M = 1, + QCA8K_PORT_SPEED_1000M = 2, + QCA8K_PORT_SPEED_ERR = 3, +}; + +enum qca8k_fdb_cmd { + QCA8K_FDB_FLUSH = 1, + QCA8K_FDB_LOAD = 2, + QCA8K_FDB_PURGE = 3, + QCA8K_FDB_NEXT = 6, + QCA8K_FDB_SEARCH = 7, +}; + +struct ar8xxx_port_status { + struct ethtool_eee eee; + struct net_device *bridge_dev; + int enabled; +}; + +struct qca8k_priv { + struct regmap *regmap; + struct mii_bus *bus; + struct ar8xxx_port_status port_sts[QCA8K_NUM_PORTS]; + struct dsa_switch *ds; + struct mutex reg_mutex; +}; + +struct qca8k_mib_desc { + unsigned int size; + unsigned int offset; + const char *name; +}; + +struct qca8k_fdb { + u16 vid; + u8 port_mask; + u8 aging; + u8 mac[6]; +}; + +#endif /* __QCA8K_H */ diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index 25c55ab05c7d..9133e7926da5 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -3089,7 +3089,7 @@ static void set_rx_mode(struct net_device *dev) iowrite16(new_mode, ioaddr + EL3_CMD); } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Setup the card so that it can receive frames with an 802.1q VLAN tag. Note that this must be done after each RxReset due to some backwards compatibility logic in the Cyclone and Tornado ASICs */ diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 2ffd63463299..8cc7467b6c1f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -24,6 +24,7 @@ source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" +source "drivers/net/ethernet/amazon/Kconfig" source "drivers/net/ethernet/amd/Kconfig" source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1d349e9aa9a6..a09423df83f2 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ +obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c index 1d1069641d81..8af2c88d5b33 100644 --- a/drivers/net/ethernet/adaptec/starfire.c +++ b/drivers/net/ethernet/adaptec/starfire.c @@ -66,7 +66,7 @@ */ #define ZEROCOPY -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define VLAN_SUPPORT #endif diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 38eaea18da23..00f9ee3fc3e5 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -192,8 +192,8 @@ static int desc_list_init(struct net_device *dev) goto init_error; skb_reserve(new_skb, NET_IP_ALIGN); - /* Invidate the data cache of skb->data range when it is write back - * cache. It will prevent overwritting the new data from DMA + /* Invalidate the data cache of skb->data range when it is write back + * cache. It will prevent overwriting the new data from DMA */ blackfin_dcache_invalidate_range((unsigned long)new_skb->head, (unsigned long)new_skb->end); @@ -1205,7 +1205,7 @@ static void bfin_mac_rx(struct bfin_mac_local *lp) } /* reserve 2 bytes for RXDWA padding */ skb_reserve(new_skb, NET_IP_ALIGN); - /* Invidate the data cache of skb->data range when it is write back + /* Invalidate the data cache of skb->data range when it is write back * cache. It will prevent overwritting the new data from DMA */ blackfin_dcache_invalidate_range((unsigned long)new_skb->head, @@ -1599,7 +1599,7 @@ static int bfin_mac_probe(struct platform_device *pdev) *(__le16 *) (&(ndev->dev_addr[4])) = cpu_to_le16((u16) bfin_read_EMAC_ADDRHI()); /* probe mac */ - /*todo: how to proble? which is revision_register */ + /*todo: how to probe? which is revision_register */ bfin_write_EMAC_ADDRLO(0x12345678); if (bfin_read_EMAC_ADDRLO() != 0x12345678) { dev_err(&pdev->dev, "Cannot detect Blackfin on-chip ethernet MAC controller!\n"); diff --git a/drivers/net/ethernet/aeroflex/greth.c b/drivers/net/ethernet/aeroflex/greth.c index bca07c5c94bd..f8df8248035e 100644 --- a/drivers/net/ethernet/aeroflex/greth.c +++ b/drivers/net/ethernet/aeroflex/greth.c @@ -1105,27 +1105,6 @@ static void greth_set_msglevel(struct net_device *dev, u32 value) struct greth_private *greth = netdev_priv(dev); greth->msg_enable = value; } -static int greth_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct greth_private *greth = netdev_priv(dev); - struct phy_device *phy = greth->phy; - - if (!phy) - return -ENODEV; - - return phy_ethtool_gset(phy, cmd); -} - -static int greth_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct greth_private *greth = netdev_priv(dev); - struct phy_device *phy = greth->phy; - - if (!phy) - return -ENODEV; - - return phy_ethtool_sset(phy, cmd); -} static int greth_get_regs_len(struct net_device *dev) { @@ -1157,12 +1136,12 @@ static void greth_get_regs(struct net_device *dev, struct ethtool_regs *regs, vo static const struct ethtool_ops greth_ethtool_ops = { .get_msglevel = greth_get_msglevel, .set_msglevel = greth_set_msglevel, - .get_settings = greth_get_settings, - .set_settings = greth_set_settings, .get_drvinfo = greth_get_drvinfo, .get_regs_len = greth_get_regs_len, .get_regs = greth_get_regs, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static struct net_device_ops greth_netdev_ops = { @@ -1224,7 +1203,7 @@ static int greth_mdio_write(struct mii_bus *bus, int phy, int reg, u16 val) static void greth_link_change(struct net_device *dev) { struct greth_private *greth = netdev_priv(dev); - struct phy_device *phydev = greth->phy; + struct phy_device *phydev = dev->phydev; unsigned long flags; int status_change = 0; u32 ctrl; @@ -1307,7 +1286,6 @@ static int greth_mdio_probe(struct net_device *dev) greth->link = 0; greth->speed = 0; greth->duplex = -1; - greth->phy = phy; return 0; } @@ -1325,6 +1303,7 @@ static int greth_mdio_init(struct greth_private *greth) { int ret; unsigned long timeout; + struct net_device *ndev = greth->netdev; greth->mdio = mdiobus_alloc(); if (!greth->mdio) { @@ -1349,15 +1328,16 @@ static int greth_mdio_init(struct greth_private *greth) goto unreg_mdio; } - phy_start(greth->phy); + phy_start(ndev->phydev); /* If Ethernet debug link is used make autoneg happen right away */ if (greth->edcl && greth_edcl == 1) { - phy_start_aneg(greth->phy); + phy_start_aneg(ndev->phydev); timeout = jiffies + 6*HZ; - while (!phy_aneg_done(greth->phy) && time_before(jiffies, timeout)) { + while (!phy_aneg_done(ndev->phydev) && + time_before(jiffies, timeout)) { } - phy_read_status(greth->phy); + phy_read_status(ndev->phydev); greth_link_change(greth->netdev); } @@ -1569,8 +1549,8 @@ static int greth_of_remove(struct platform_device *of_dev) dma_free_coherent(&of_dev->dev, 1024, greth->tx_bd_base, greth->tx_bd_base_phys); - if (greth->phy) - phy_stop(greth->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); mdiobus_unregister(greth->mdio); unregister_netdev(ndev); diff --git a/drivers/net/ethernet/aeroflex/greth.h b/drivers/net/ethernet/aeroflex/greth.h index 92dd918e4a83..9c07140a5d8d 100644 --- a/drivers/net/ethernet/aeroflex/greth.h +++ b/drivers/net/ethernet/aeroflex/greth.h @@ -123,7 +123,6 @@ struct greth_private { struct napi_struct napi; spinlock_t devlock; - struct phy_device *phy; struct mii_bus *mdio; unsigned int link; unsigned int speed; diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c index c83ebae73d91..906683851c7d 100644 --- a/drivers/net/ethernet/agere/et131x.c +++ b/drivers/net/ethernet/agere/et131x.c @@ -2961,7 +2961,7 @@ static void et131x_get_drvinfo(struct net_device *netdev, sizeof(info->bus_info)); } -static struct ethtool_ops et131x_ethtool_ops = { +static const struct ethtool_ops et131x_ethtool_ops = { .get_drvinfo = et131x_get_drvinfo, .get_regs_len = et131x_get_regs_len, .get_regs = et131x_get_regs, diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig new file mode 100644 index 000000000000..99b30353541a --- /dev/null +++ b/drivers/net/ethernet/amazon/Kconfig @@ -0,0 +1,27 @@ +# +# Amazon network device configuration +# + +config NET_VENDOR_AMAZON + bool "Amazon Devices" + default y + ---help--- + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Amazon devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AMAZON + +config ENA_ETHERNET + tristate "Elastic Network Adapter (ENA) support" + depends on (PCI_MSI && X86) + ---help--- + This driver supports Elastic Network Adapter (ENA)" + + To compile this driver as a module, choose M here. + The module will be called ena. + +endif #NET_VENDOR_AMAZON diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile new file mode 100644 index 000000000000..8e0b73f60d51 --- /dev/null +++ b/drivers/net/ethernet/amazon/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Amazon network device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena/ diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile new file mode 100644 index 000000000000..eaeeae06c5d9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Elastic Network Adapter (ENA) device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena.o + +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h new file mode 100644 index 000000000000..a46e749bf226 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -0,0 +1,973 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ADMIN_H_ +#define _ENA_ADMIN_H_ + +enum ena_admin_aq_opcode { + ENA_ADMIN_CREATE_SQ = 1, + + ENA_ADMIN_DESTROY_SQ = 2, + + ENA_ADMIN_CREATE_CQ = 3, + + ENA_ADMIN_DESTROY_CQ = 4, + + ENA_ADMIN_GET_FEATURE = 8, + + ENA_ADMIN_SET_FEATURE = 9, + + ENA_ADMIN_GET_STATS = 11, +}; + +enum ena_admin_aq_completion_status { + ENA_ADMIN_SUCCESS = 0, + + ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + + ENA_ADMIN_BAD_OPCODE = 2, + + ENA_ADMIN_UNSUPPORTED_OPCODE = 3, + + ENA_ADMIN_MALFORMED_REQUEST = 4, + + /* Additional status is provided in ACQ entry extended_status */ + ENA_ADMIN_ILLEGAL_PARAMETER = 5, + + ENA_ADMIN_UNKNOWN_ERROR = 6, +}; + +enum ena_admin_aq_feature_id { + ENA_ADMIN_DEVICE_ATTRIBUTES = 1, + + ENA_ADMIN_MAX_QUEUES_NUM = 2, + + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, + + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, + + ENA_ADMIN_MTU = 14, + + ENA_ADMIN_RSS_HASH_INPUT = 18, + + ENA_ADMIN_INTERRUPT_MODERATION = 20, + + ENA_ADMIN_AENQ_CONFIG = 26, + + ENA_ADMIN_LINK_CONFIG = 27, + + ENA_ADMIN_HOST_ATTR_CONFIG = 28, + + ENA_ADMIN_FEATURES_OPCODE_NUM = 32, +}; + +enum ena_admin_placement_policy_type { + /* descriptors and headers are in host memory */ + ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, + + /* descriptors and headers are in device memory (a.k.a Low Latency + * Queue) + */ + ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, +}; + +enum ena_admin_link_types { + ENA_ADMIN_LINK_SPEED_1G = 0x1, + + ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, + + ENA_ADMIN_LINK_SPEED_5G = 0x4, + + ENA_ADMIN_LINK_SPEED_10G = 0x8, + + ENA_ADMIN_LINK_SPEED_25G = 0x10, + + ENA_ADMIN_LINK_SPEED_40G = 0x20, + + ENA_ADMIN_LINK_SPEED_50G = 0x40, + + ENA_ADMIN_LINK_SPEED_100G = 0x80, + + ENA_ADMIN_LINK_SPEED_200G = 0x100, + + ENA_ADMIN_LINK_SPEED_400G = 0x200, +}; + +enum ena_admin_completion_policy_type { + /* completion queue entry for each sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC = 0, + + /* completion queue entry upon request in sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + + /* current queue head pointer is updated in OS memory upon sq + * descriptor request + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + + /* current queue head pointer is updated in OS memory for each sq + * descriptor + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, +}; + +/* basic stats return ena_admin_basic_stats while extanded stats return a + * buffer (string format) with additional statistics per queue and per + * device id + */ +enum ena_admin_get_stats_type { + ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, + + ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, +}; + +enum ena_admin_get_stats_scope { + ENA_ADMIN_SPECIFIC_QUEUE = 0, + + ENA_ADMIN_ETH_TRAFFIC = 1, +}; + +struct ena_admin_aq_common_desc { + /* 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in ena_admin_aq_opcode */ + u8 opcode; + + /* 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* used in ena_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct ena_admin_ctrl_buff_info { + u32 length; + + struct ena_common_mem_addr address; +}; + +struct ena_admin_sq { + u16 sq_idx; + + /* 4:0 : reserved + * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved1; +}; + +struct ena_admin_aq_entry { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct ena_admin_acq_common_desc { + /* command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* serves as a hint what AQ entries can be revoked */ + u16 sq_head_indx; +}; + +struct ena_admin_acq_entry { + struct ena_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct ena_admin_aq_create_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved0_w1 + * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved8_w1; + + /* 3:0 : placement_policy - Describing where the SQ + * descriptor ring and the SQ packet headers reside: + * 0x1 - descriptors and headers are in OS memory, + * 0x3 - descriptors and headers in device memory + * (a.k.a Low Latency Queue) + * 6:4 : completion_policy - Describing what policy + * to use for generation completion entry (cqe) in + * the CQ associated with this SQ: 0x0 - cqe for each + * sq descriptor, 0x1 - cqe upon request in sq + * descriptor, 0x2 - current queue head pointer is + * updated in OS memory upon sq descriptor request + * 0x3 - current queue head pointer is updated in OS + * memory for each sq descriptor + * 7 : reserved15_w1 + */ + u8 sq_caps_2; + + /* 0 : is_physically_contiguous - Described if the + * queue ring memory is allocated in physical + * contiguous pages or split. + * 7:1 : reserved17_w1 + */ + u8 sq_caps_3; + + /* associated completion queue id. This CQ must be created prior to + * SQ creation + */ + u16 cq_idx; + + /* submission queue depth in entries */ + u16 sq_depth; + + /* SQ physical base address in OS memory. This field should not be + * used for Low Latency queues. Has to be page aligned. + */ + struct ena_common_mem_addr sq_ba; + + /* specifies queue head writeback location in OS memory. Valid if + * completion_policy is set to completion_policy_head_on_demand or + * completion_policy_head. Has to be cache aligned + */ + struct ena_common_mem_addr sq_head_writeback; + + u32 reserved0_w7; + + u32 reserved0_w8; +}; + +enum ena_admin_sq_direction { + ENA_ADMIN_SQ_DIRECTION_TX = 1, + + ENA_ADMIN_SQ_DIRECTION_RX = 2, +}; + +struct ena_admin_acq_create_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 sq_idx; + + u16 reserved; + + /* queue doorbell address as an offset to PCIe MMIO REG BAR */ + u32 sq_doorbell_offset; + + /* low latency queue ring base address as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_descriptors_offset; + + /* low latency queue headers' memory as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_headers_offset; +}; + +struct ena_admin_aq_destroy_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_sq sq; +}; + +struct ena_admin_acq_destroy_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +struct ena_admin_aq_create_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode, otherwise - polling + * 7:6 : reserved6 + */ + u8 cq_caps_1; + + /* 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector; + + /* cq physical base address in OS memory. CQ must be physically + * contiguous + */ + struct ena_common_mem_addr cq_ba; +}; + +struct ena_admin_acq_create_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; + + u32 numa_node_register_offset; + + u32 cq_head_db_register_offset; + + u32 cq_interrupt_unmask_register_offset; +}; + +struct ena_admin_aq_destroy_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + u16 cq_idx; + + u16 reserved1; +}; + +struct ena_admin_acq_destroy_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +/* ENA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct ena_admin_aq_get_stats_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum ena_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum ena_admin_get_stats_scope */ + u8 scope; + + u16 reserved3; + + /* queue id. used when scope is specific_queue */ + u16 queue_idx; + + /* device id, value 0xFFFF means mine. only privileged device can get + * stats of other device + */ + u16 device_id; +}; + +/* Basic Statistics Command. */ +struct ena_admin_basic_stats { + u32 tx_bytes_low; + + u32 tx_bytes_high; + + u32 tx_pkts_low; + + u32 tx_pkts_high; + + u32 rx_bytes_low; + + u32 rx_bytes_high; + + u32 rx_pkts_low; + + u32 rx_pkts_high; + + u32 rx_drops_low; + + u32 rx_drops_high; +}; + +struct ena_admin_acq_get_stats_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + struct ena_admin_basic_stats basic_stats; +}; + +struct ena_admin_get_set_feature_common_desc { + /* 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in ena_admin_aq_feature_id */ + u8 feature_id; + + u16 reserved16; +}; + +struct ena_admin_device_attr_feature_desc { + u32 impl_id; + + u32 device_version; + + /* bitmap of ena_admin_aq_feature_id */ + u32 supported_features; + + u32 reserved3; + + /* Indicates how many bits are used physical address access. */ + u32 phys_addr_width; + + /* Indicates how many bits are used virtual address access. */ + u32 virt_addr_width; + + /* unicast MAC address (in Network byte order) */ + u8 mac_addr[6]; + + u8 reserved7[2]; + + u32 max_mtu; +}; + +struct ena_admin_queue_feature_desc { + /* including LLQs */ + u32 max_sq_num; + + u32 max_sq_depth; + + u32 max_cq_num; + + u32 max_cq_depth; + + u32 max_llq_num; + + u32 max_llq_depth; + + u32 max_header_size; + + /* Maximum Descriptors number, including meta descriptor, allowed for + * a single Tx packet + */ + u16 max_packet_tx_descs; + + /* Maximum Descriptors number allowed for a single Rx packet */ + u16 max_packet_rx_descs; +}; + +struct ena_admin_set_feature_mtu_desc { + /* exclude L2 */ + u32 mtu; +}; + +struct ena_admin_set_feature_host_attr_desc { + /* host OS info base address in OS memory. host info is 4KB of + * physically contiguous + */ + struct ena_common_mem_addr os_info_ba; + + /* host debug area base address in OS memory. debug area must be + * physically contiguous + */ + struct ena_common_mem_addr debug_ba; + + /* debug area size */ + u32 debug_area_size; +}; + +struct ena_admin_feature_intr_moder_desc { + /* interrupt delay granularity in usec */ + u16 intr_delay_resolution; + + u16 reserved; +}; + +struct ena_admin_get_feature_link_desc { + /* Link speed in Mb */ + u32 speed; + + /* bit field of enum ena_admin_link types */ + u32 supported; + + /* 0 : autoneg + * 1 : duplex - Full Duplex + * 31:2 : reserved2 + */ + u32 flags; +}; + +struct ena_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct ena_admin_feature_offload_desc { + /* 0 : TX_L3_csum_ipv4 + * 1 : TX_L4_ipv4_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 2 : TX_L4_ipv4_csum_full + * 3 : TX_L4_ipv6_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 4 : TX_L4_ipv6_csum_full + * 5 : tso_ipv4 + * 6 : tso_ipv6 + * 7 : tso_ecn + */ + u32 tx; + + /* Receive side supported stateless offload + * 0 : RX_L3_csum_ipv4 - IPv4 checksum + * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum + * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum + * 3 : RX_hash - Hash calculation + */ + u32 rx_supported; + + u32 rx_enabled; +}; + +enum ena_admin_hash_functions { + ENA_ADMIN_TOEPLITZ = 1, + + ENA_ADMIN_CRC32 = 2, +}; + +struct ena_admin_feature_rss_flow_hash_control { + u32 keys_num; + + u32 reserved; + + u32 key[10]; +}; + +struct ena_admin_feature_rss_flow_hash_function { + /* 7:0 : funcs - bitmask of ena_admin_hash_functions */ + u32 supported_func; + + /* 7:0 : selected_func - bitmask of + * ena_admin_hash_functions + */ + u32 selected_func; + + /* initial value */ + u32 init_val; +}; + +/* RSS flow hash protocols */ +enum ena_admin_flow_hash_proto { + ENA_ADMIN_RSS_TCP4 = 0, + + ENA_ADMIN_RSS_UDP4 = 1, + + ENA_ADMIN_RSS_TCP6 = 2, + + ENA_ADMIN_RSS_UDP6 = 3, + + ENA_ADMIN_RSS_IP4 = 4, + + ENA_ADMIN_RSS_IP6 = 5, + + ENA_ADMIN_RSS_IP4_FRAG = 6, + + ENA_ADMIN_RSS_NOT_IP = 7, + + ENA_ADMIN_RSS_PROTO_NUM = 16, +}; + +/* RSS flow hash fields */ +enum ena_admin_flow_hash_fields { + /* Ethernet Dest Addr */ + ENA_ADMIN_RSS_L2_DA = 0, + + /* Ethernet Src Addr */ + ENA_ADMIN_RSS_L2_SA = 1, + + /* ipv4/6 Dest Addr */ + ENA_ADMIN_RSS_L3_DA = 2, + + /* ipv4/6 Src Addr */ + ENA_ADMIN_RSS_L3_SA = 5, + + /* tcp/udp Dest Port */ + ENA_ADMIN_RSS_L4_DP = 6, + + /* tcp/udp Src Port */ + ENA_ADMIN_RSS_L4_SP = 7, +}; + +struct ena_admin_proto_input { + /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */ + u16 fields; + + u16 reserved2; +}; + +struct ena_admin_feature_rss_hash_control { + struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM]; +}; + +struct ena_admin_feature_rss_flow_hash_input { + /* supported hash input sorting + * 1 : L3_sort - support swap L3 addresses if DA is + * smaller than SA + * 2 : L4_sort - support swap L4 ports if DP smaller + * SP + */ + u16 supported_input_sort; + + /* enabled hash input sorting + * 1 : enable_L3_sort - enable swap L3 addresses if + * DA smaller than SA + * 2 : enable_L4_sort - enable swap L4 ports if DP + * smaller than SP + */ + u16 enabled_input_sort; +}; + +enum ena_admin_os_type { + ENA_ADMIN_OS_LINUX = 1, + + ENA_ADMIN_OS_WIN = 2, + + ENA_ADMIN_OS_DPDK = 3, + + ENA_ADMIN_OS_FREEBSD = 4, + + ENA_ADMIN_OS_IPXE = 5, +}; + +struct ena_admin_host_info { + /* defined in enum ena_admin_os_type */ + u32 os_type; + + /* os distribution string format */ + u8 os_dist_str[128]; + + /* OS distribution numeric format */ + u32 os_dist; + + /* kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* 7:0 : major + * 15:8 : minor + * 23:16 : sub_minor + */ + u32 driver_version; + + /* features bitmap */ + u32 supported_network_features[4]; +}; + +struct ena_admin_rss_ind_table_entry { + u16 cq_idx; + + u16 reserved; +}; + +struct ena_admin_feature_rss_ind_table { + /* min supported table size (2^min_size) */ + u16 min_size; + + /* max supported table size (2^max_size) */ + u16 max_size; + + /* table size (2^size) */ + u16 size; + + u16 reserved; + + /* index of the inline entry. 0xFFFFFFFF means invalid */ + u32 inline_index; + + /* used for updating single entry, ignored when setting the entire + * table through the control buffer. + */ + struct ena_admin_rss_ind_table_entry inline_entry; +}; + +struct ena_admin_get_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + u32 raw[11]; +}; + +struct ena_admin_get_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct ena_admin_device_attr_feature_desc dev_attr; + + struct ena_admin_queue_feature_desc max_queue; + + struct ena_admin_feature_aenq_desc aenq; + + struct ena_admin_get_feature_link_desc link; + + struct ena_admin_feature_offload_desc offload; + + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + struct ena_admin_feature_rss_ind_table ind_table; + + struct ena_admin_feature_intr_moder_desc intr_moderation; + } u; +}; + +struct ena_admin_set_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + union { + u32 raw[11]; + + /* mtu size */ + struct ena_admin_set_feature_mtu_desc mtu; + + /* host attributes */ + struct ena_admin_set_feature_host_attr_desc host_attr; + + /* AENQ configuration */ + struct ena_admin_feature_aenq_desc aenq; + + /* rss flow hash function */ + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + /* rss flow hash input */ + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + /* rss indirection table */ + struct ena_admin_feature_rss_ind_table ind_table; + } u; +}; + +struct ena_admin_set_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct ena_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* 0 : phase */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +/* asynchronous event notification groups */ +enum ena_admin_aenq_group { + ENA_ADMIN_LINK_CHANGE = 0, + + ENA_ADMIN_FATAL_ERROR = 1, + + ENA_ADMIN_WARNING = 2, + + ENA_ADMIN_NOTIFICATION = 3, + + ENA_ADMIN_KEEP_ALIVE = 4, + + ENA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum ena_admin_aenq_notification_syndrom { + ENA_ADMIN_SUSPEND = 0, + + ENA_ADMIN_RESUME = 1, +}; + +struct ena_admin_aenq_entry { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +struct ena_admin_aenq_link_change_desc { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* 0 : link_status */ + u32 flags; +}; + +struct ena_admin_ena_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* aq_common_desc */ +#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* sq */ +#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) + +/* acq_common_desc */ +#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aq_create_sq_cmd */ +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0) + +/* aq_create_cq_cmd */ +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +/* get_feature_link_desc */ +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) + +/* feature_offload_desc */ +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) + +/* feature_rss_flow_hash_function */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0) + +/* feature_rss_flow_hash_input */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2) + +/* host_info */ +#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) +#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 +#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) + +/* aenq_common_desc */ +#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_link_change_desc */ +#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) + +#endif /*_ENA_ADMIN_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c new file mode 100644 index 000000000000..3066d9c99984 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -0,0 +1,2666 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_com.h" + +/*****************************************************************************/ +/*****************************************************************************/ + +/* Timeout in micro-sec */ +#define ADMIN_CMD_TIMEOUT_US (1000000) + +#define ENA_ASYNC_QUEUE_DEPTH 4 +#define ENA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \ + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \ + | (ENA_COMMON_SPEC_VERSION_MINOR)) + +#define ENA_CTRL_MAJOR 0 +#define ENA_CTRL_MINOR 0 +#define ENA_CTRL_SUB_MINOR 1 + +#define MIN_ENA_CTRL_VER \ + (((ENA_CTRL_MAJOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((ENA_CTRL_MINOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (ENA_CTRL_SUB_MINOR)) + +#define ENA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ + +enum ena_cmd_status { + ENA_CMD_SUBMITTED, + ENA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + ENA_CMD_ABORTED, +}; + +struct ena_comp_ctx { + struct completion wait_event; + struct ena_admin_acq_entry *user_cqe; + u32 comp_size; + enum ena_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + bool occupied; +}; + +struct ena_com_stats_ctx { + struct ena_admin_aq_get_stats_cmd get_cmd; + struct ena_admin_acq_get_stats_resp get_resp; +}; + +static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, + struct ena_common_mem_addr *ena_addr, + dma_addr_t addr) +{ + if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) { + pr_err("dma address has more bits that the device supports\n"); + return -EINVAL; + } + + ena_addr->mem_addr_low = (u32)addr; + ena_addr->mem_addr_high = (u64)addr >> 32; + + return 0; +} + +static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_sq *sq = &queue->sq; + u16 size = ADMIN_SQ_SIZE(queue->q_depth); + + sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr, + GFP_KERNEL); + + if (!sq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + sq->head = 0; + sq->tail = 0; + sq->phase = 1; + + sq->db_addr = NULL; + + return 0; +} + +static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_cq *cq = &queue->cq; + u16 size = ADMIN_CQ_SIZE(queue->q_depth); + + cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr, + GFP_KERNEL); + + if (!cq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + cq->head = 0; + cq->phase = 1; + + return 0; +} + +static int ena_com_admin_init_aenq(struct ena_com_dev *dev, + struct ena_aenq_handlers *aenq_handlers) +{ + struct ena_com_aenq *aenq = &dev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; + size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); + aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + + if (!aenq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + aenq->head = aenq->q_depth; + aenq->phase = 1; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = 0; + aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct ena_admin_aenq_entry) + << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); + + if (unlikely(!aenq_handlers)) { + pr_err("aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + aenq->aenq_handlers = aenq_handlers; + + return 0; +} + +static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, + struct ena_comp_ctx *comp_ctx) +{ + comp_ctx->occupied = false; + atomic_dec(&queue->outstanding_cmds); +} + +static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, + u16 command_id, bool capture) +{ + if (unlikely(command_id >= queue->q_depth)) { + pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, queue->q_depth); + return NULL; + } + + if (unlikely(queue->comp_ctx[command_id].occupied && capture)) { + pr_err("Completion context is occupied\n"); + return NULL; + } + + if (capture) { + atomic_inc(&queue->outstanding_cmds); + queue->comp_ctx[command_id].occupied = true; + } + + return &queue->comp_ctx[command_id]; +} + +static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct ena_comp_ctx *comp_ctx; + u16 tail_masked, cmd_id; + u16 queue_size_mask; + u16 cnt; + + queue_size_mask = admin_queue->q_depth - 1; + + tail_masked = admin_queue->sq.tail & queue_size_mask; + + /* In case of queue FULL */ + cnt = admin_queue->sq.tail - admin_queue->sq.head; + if (cnt >= admin_queue->q_depth) { + pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", + admin_queue->sq.tail, admin_queue->sq.head, + admin_queue->q_depth); + admin_queue->stats.out_of_space++; + return ERR_PTR(-ENOSPC); + } + + cmd_id = admin_queue->curr_cmd_id; + + cmd->aq_common_descriptor.flags |= admin_queue->sq.phase & + ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= cmd_id & + ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true); + if (unlikely(!comp_ctx)) + return ERR_PTR(-EINVAL); + + comp_ctx->status = ENA_CMD_SUBMITTED; + comp_ctx->comp_size = (u32)comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes); + + admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) & + queue_size_mask; + + admin_queue->sq.tail++; + admin_queue->stats.submitted_cmd++; + + if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0)) + admin_queue->sq.phase = !admin_queue->sq.phase; + + writel(admin_queue->sq.tail, admin_queue->sq.db_addr); + + return comp_ctx; +} + +static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) +{ + size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); + struct ena_comp_ctx *comp_ctx; + u16 i; + + queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!queue->comp_ctx)) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + for (i = 0; i < queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(queue, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + } + + return 0; +} + +static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + unsigned long flags; + struct ena_comp_ctx *comp_ctx; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + if (unlikely(!admin_queue->running_state)) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + return ERR_PTR(-ENODEV); + } + comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd, + cmd_size_in_bytes, + comp, + comp_size_in_bytes); + if (unlikely(IS_ERR(comp_ctx))) + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + return comp_ctx; +} + +static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_sq *io_sq) +{ + size_t size; + int dev_node = 0; + + memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + io_sq->desc_entry_size = + (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_desc) : + sizeof(struct ena_eth_io_rx_desc); + + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + } + } else { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + } + } + + if (!io_sq->desc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_sq->tail = 0; + io_sq->next_to_comp = 0; + io_sq->phase = 1; + + return 0; +} + +static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_cq *io_cq) +{ + size_t size; + int prev_node = 0; + + memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + /* Use the basic completion descriptor for Rx */ + io_cq->cdesc_entry_size_in_bytes = + (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_cdesc) : + sizeof(struct ena_eth_io_rx_cdesc_base); + + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + prev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, + GFP_KERNEL); + } + + if (!io_cq->cdesc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_cq->phase = 1; + io_cq->head = 0; + + return 0; +} + +static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue, + struct ena_admin_acq_entry *cqe) +{ + struct ena_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false); + if (unlikely(!comp_ctx)) { + pr_err("comp_ctx is NULL. Changing the admin queue running state\n"); + admin_queue->running_state = false; + return; + } + + comp_ctx->status = ENA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size); + + if (!admin_queue->polling) + complete(&comp_ctx->wait_event); +} + +static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue) +{ + struct ena_admin_acq_entry *cqe = NULL; + u16 comp_num = 0; + u16 head_masked; + u8 phase; + + head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1); + phase = admin_queue->cq.phase; + + cqe = &admin_queue->cq.entries[head_masked]; + + /* Go over all the completions */ + while ((cqe->acq_common_descriptor.flags & + ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Do not read the rest of the completion entry before the + * phase bit was validated + */ + rmb(); + ena_com_handle_single_admin_completion(admin_queue, cqe); + + head_masked++; + comp_num++; + if (unlikely(head_masked == admin_queue->q_depth)) { + head_masked = 0; + phase = !phase; + } + + cqe = &admin_queue->cq.entries[head_masked]; + } + + admin_queue->cq.head += comp_num; + admin_queue->cq.phase = phase; + admin_queue->sq.head += comp_num; + admin_queue->stats.completed_cmd += comp_num; +} + +static int ena_com_comp_status_to_errno(u8 comp_status) +{ + if (unlikely(comp_status != 0)) + pr_err("admin command failed[%u]\n", comp_status); + + if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR)) + return -EINVAL; + + switch (comp_status) { + case ENA_ADMIN_SUCCESS: + return 0; + case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case ENA_ADMIN_UNSUPPORTED_OPCODE: + return -EPERM; + case ENA_ADMIN_BAD_OPCODE: + case ENA_ADMIN_MALFORMED_REQUEST: + case ENA_ADMIN_ILLEGAL_PARAMETER: + case ENA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + } + + return 0; +} + +static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + u32 start_time; + int ret; + + start_time = ((u32)jiffies_to_usecs(jiffies)); + + while (comp_ctx->status == ENA_CMD_SUBMITTED) { + if ((((u32)jiffies_to_usecs(jiffies)) - start_time) > + ADMIN_CMD_TIMEOUT_US) { + pr_err("Wait for completion (polling) timeout\n"); + /* ENA didn't have any completion */ + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.no_completion++; + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + ret = -ETIME; + goto err; + } + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + msleep(100); + } + + if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { + pr_err("Command was aborted\n"); + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.aborted_cmd++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + ret = -ENODEV; + goto err; + } + + WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", + comp_ctx->status); + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US)); + + /* In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + admin_queue->stats.no_completion++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + if (comp_ctx->status == ENA_CMD_COMPLETED) + pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", + comp_ctx->cmd_opcode); + else + pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", + comp_ctx->cmd_opcode, comp_ctx->status); + + admin_queue->running_state = false; + ret = -ETIME; + goto err; + } + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +/* This method read the hardware device register through posting writes + * and waiting for response + * On timeout the function will return ENA_MMIO_READ_TIMEOUT + */ +static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = + mmio_read->read_resp; + u32 mmio_read_reg, ret; + unsigned long flags; + int i; + + might_sleep(); + + /* If readless is disabled, perform regular read */ + if (!mmio_read->readless_supported) + return readl(ena_dev->reg_bar + offset); + + spin_lock_irqsave(&mmio_read->lock, flags); + mmio_read->seq_num++; + + read_resp->req_id = mmio_read->seq_num + 0xDEAD; + mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + ENA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + /* make sure read_resp->req_id get updated before the hw can write + * there + */ + wmb(); + + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + + for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) { + if (read_resp->req_id == mmio_read->seq_num) + break; + + udelay(1); + } + + if (unlikely(i == ENA_REG_READ_TIMEOUT)) { + pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + ret = ENA_MMIO_READ_TIMEOUT; + goto err; + } + + if (read_resp->reg_off != offset) { + pr_err("Read failure: wrong offset provided"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; + } +err: + spin_unlock_irqrestore(&mmio_read->lock, flags); + + return ret; +} + +/* There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called ena_com_handle_admin_completion + * to mark the completions. + */ +static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + if (admin_queue->polling) + return ena_com_wait_and_process_admin_cq_polling(comp_ctx, + admin_queue); + + return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx, + admin_queue); +} + +static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_sq_cmd destroy_cmd; + struct ena_admin_acq_destroy_sq_resp_desc destroy_resp; + u8 direction; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + destroy_cmd.sq.sq_identity |= (direction << + ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_SQ_SQ_DIRECTION_MASK; + + destroy_cmd.sq.sq_idx = io_sq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("failed to destroy io sq error: %d\n", ret); + + return ret; +} + +static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, + struct ena_com_io_cq *io_cq) +{ + size_t size; + + if (io_cq->cdesc_addr.virt_addr) { + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + dma_free_coherent(ena_dev->dmadev, size, + io_cq->cdesc_addr.virt_addr, + io_cq->cdesc_addr.phys_addr); + + io_cq->cdesc_addr.virt_addr = NULL; + } + + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + dma_free_coherent(ena_dev->dmadev, size, + io_sq->desc_addr.virt_addr, + io_sq->desc_addr.phys_addr); + else + devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr); + + io_sq->desc_addr.virt_addr = NULL; + } +} + +static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + /* The resolution of the timeout is 100ms */ + msleep(100); + } + + return -ETIME; +} + +static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev, + enum ena_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) && + !(ena_dev->supported_features & feature_mask)) + return false; + + return true; +} + +static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_get_feat_cmd get_cmd; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { + pr_info("Feature %d isn't supported\n", feature_id); + return -EPERM; + } + + memset(&get_cmd, 0x0, sizeof(get_cmd)); + admin_queue = &ena_dev->admin_queue; + + get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + else + get_cmd.aq_common_descriptor.flags = 0; + + ret = ena_com_mem_addr_set(ena_dev, + &get_cmd.control_buffer.address, + control_buf_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + get_cmd.control_buffer.length = control_buff_size; + + get_cmd.feat_common.feature_id = feature_id; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct ena_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to submit get_feature command %d error: %d\n", + feature_id, ret); + + return ret; +} + +static int ena_com_get_feature(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id) +{ + return ena_com_get_feature_ex(ena_dev, + get_resp, + feature_id, + 0, + 0); +} + +static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_key = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + &rss->hash_key_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_key)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_key) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + rss->hash_key, rss->hash_key_dma_addr); + rss->hash_key = NULL; +} + +static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_ctrl = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + &rss->hash_ctrl_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_ctrl)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_ctrl) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + rss->hash_ctrl, rss->hash_ctrl_dma_addr); + rss->hash_ctrl = NULL; +} + +static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + u16 log_size) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + size_t tbl_size; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + if (unlikely(ret)) + return ret; + + if ((get_resp.u.ind_table.min_size > log_size) || + (get_resp.u.ind_table.max_size < log_size)) { + pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n", + 1 << log_size, 1 << get_resp.u.ind_table.min_size, + 1 << get_resp.u.ind_table.max_size); + return -EINVAL; + } + + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rss->rss_ind_tbl = + dma_zalloc_coherent(ena_dev->dmadev, tbl_size, + &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); + if (unlikely(!rss->rss_ind_tbl)) + goto mem_err1; + + tbl_size = (1ULL << log_size) * sizeof(u16); + rss->host_rss_ind_tbl = + devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + if (unlikely(!rss->host_rss_ind_tbl)) + goto mem_err2; + + rss->tbl_log_size = log_size; + + return 0; + +mem_err2: + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; +mem_err1: + rss->tbl_log_size = 0; + return -ENOMEM; +} + +static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + size_t tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + if (rss->rss_ind_tbl) + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; + + if (rss->host_rss_ind_tbl) + devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl); + rss->host_rss_ind_tbl = NULL; +} + +static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, u16 cq_idx) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_sq_cmd create_cmd; + struct ena_admin_acq_create_sq_resp_desc cmd_completion; + u8 direction; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ; + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + create_cmd.sq_identity |= (direction << + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK; + + create_cmd.sq_caps_2 |= io_sq->mem_queue_type & + ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK; + + create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC << + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK; + + create_cmd.sq_caps_3 |= + ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK; + + create_cmd.cq_idx = cq_idx; + create_cmd.sq_depth = io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.sq_ba, + io_sq->desc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO SQ. error: %d\n", ret); + return ret; + } + + io_sq->idx = cmd_completion.sq_idx; + + io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + (uintptr_t)cmd_completion.sq_doorbell_offset); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_headers_offset); + + io_sq->desc_addr.pbuf_dev_addr = + (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_descriptors_offset); + } + + pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); + + return ret; +} + +static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_com_io_sq *io_sq; + u16 qid; + int i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + qid = rss->host_rss_ind_tbl[i]; + if (qid >= ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + io_sq = &ena_dev->io_sq_queues[qid]; + + if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX) + return -EINVAL; + + rss->rss_ind_tbl[i].cq_idx = io_sq->idx; + } + + return 0; +} + +static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) +{ + u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; + struct ena_rss *rss = &ena_dev->rss; + u8 idx; + u16 i; + + for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) + dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + idx = (u8)rss->rss_ind_tbl[i].cq_idx; + + if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; + } + + return 0; +} + +static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + size_t size; + + size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS; + + ena_dev->intr_moder_tbl = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + if (!ena_dev->intr_moder_tbl) + return -ENOMEM; + + ena_com_config_default_interrupt_moderation_table(ena_dev); + + return 0; +} + +static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int i; + + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); + intr_delay_resolution = 1; + } + ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ + for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++) + intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution; + + /* update Tx */ + ena_dev->intr_moder_tx_interval /= intr_delay_resolution; +} + +/*****************************************************************************/ +/******************************* API ******************************/ +/*****************************************************************************/ + +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *comp, + size_t comp_size) +{ + struct ena_comp_ctx *comp_ctx; + int ret; + + comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, + comp, comp_size); + if (unlikely(IS_ERR(comp_ctx))) { + pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); + return PTR_ERR(comp_ctx); + } + + ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue); + if (unlikely(ret)) { + if (admin_queue->running_state) + pr_err("Failed to process command. ret = %d\n", ret); + else + pr_debug("Failed to process command. ret = %d\n", ret); + } + return ret; +} + +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_cq_cmd create_cmd; + struct ena_admin_acq_create_cq_resp_desc cmd_completion; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ; + + create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) & + ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_caps_1 |= + ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK; + + create_cmd.msix_vector = io_cq->msix_vector; + create_cmd.cq_depth = io_cq->q_depth; + + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.cq_ba, + io_cq->cdesc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO CQ. error: %d\n", ret); + return ret; + } + + io_cq->idx = cmd_completion.cq_idx; + + io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_interrupt_unmask_register_offset); + + if (cmd_completion.cq_head_db_register_offset) + io_cq->cq_head_db_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_head_db_register_offset); + + if (cmd_completion.numa_node_register_offset) + io_cq->numa_node_cfg_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.numa_node_register_offset); + + pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); + + return ret; +} + +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq) +{ + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Invalid queue number %d but the max is %d\n", qid, + ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + *io_sq = &ena_dev->io_sq_queues[qid]; + *io_cq = &ena_dev->io_cq_queues[qid]; + + return 0; +} + +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_comp_ctx *comp_ctx; + u16 i; + + if (!admin_queue->comp_ctx) + return; + + for (i = 0; i < admin_queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(admin_queue, i, false); + if (unlikely(!comp_ctx)) + break; + + comp_ctx->status = ENA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } +} + +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + msleep(20); + spin_lock_irqsave(&admin_queue->q_lock, flags); + } + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_cq_cmd destroy_cmd; + struct ena_admin_acq_destroy_cq_resp_desc destroy_resp; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + destroy_cmd.cq_idx = io_cq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("Failed to destroy IO CQ. error: %d\n", ret); + + return ret; +} + +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev) +{ + return ena_dev->admin_queue.running_state; +} + +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_dev->admin_queue.running_state = state; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev) +{ + u16 depth = ena_dev->aenq.q_depth; + + WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n"); + + /* Init head_db to mark that all entries in the queue + * are initially available + */ + writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); + if (ret) { + pr_info("Can't get aenq configuration\n"); + return ret; + } + + if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { + pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", + get_resp.u.aenq.supported_groups, groups_flag); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG; + cmd.u.aenq.enabled_groups = groups_flag; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to config AENQ ret: %d\n", ret); + + return ret; +} + +int ena_com_get_dma_width(struct ena_com_dev *ena_dev) +{ + u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + int width; + + if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + pr_debug("ENA dma width: %d\n", width); + + if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { + pr_err("DMA width illegal value: %d\n", width); + return -EINVAL; + } + + ena_dev->dma_addr_bits = width; + + return width; +} + +int ena_com_validate_version(struct ena_com_dev *ena_dev) +{ + u32 ver; + u32 ctrl_ver; + u32 ctrl_ver_masked; + + /* Make sure the ENA version and the controller version are at least + * as the driver expects + */ + ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF); + ctrl_ver = ena_com_reg_bar_read32(ena_dev, + ENA_REGS_CONTROLLER_VERSION_OFF); + + if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || + (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + pr_info("ena device version: %d.%d\n", + (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_ENA_VER) { + pr_err("ENA version is lower than the minimal version the driver supports\n"); + return -1; + } + + pr_info("ena controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_ENA_CTRL_VER) { + pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -1; + } + + return 0; +} + +void ena_com_admin_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_com_admin_cq *cq = &admin_queue->cq; + struct ena_com_admin_sq *sq = &admin_queue->sq; + struct ena_com_aenq *aenq = &ena_dev->aenq; + u16 size; + + if (admin_queue->comp_ctx) + devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx); + admin_queue->comp_ctx = NULL; + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) + dma_free_coherent(ena_dev->dmadev, size, sq->entries, + sq->dma_addr); + sq->entries = NULL; + + size = ADMIN_CQ_SIZE(admin_queue->q_depth); + if (cq->entries) + dma_free_coherent(ena_dev->dmadev, size, cq->entries, + cq->dma_addr); + cq->entries = NULL; + + size = ADMIN_AENQ_SIZE(aenq->q_depth); + if (ena_dev->aenq.entries) + dma_free_coherent(ena_dev->dmadev, size, aenq->entries, + aenq->dma_addr); + aenq->entries = NULL; +} + +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) +{ + ena_dev->admin_queue.polling = polling; +} + +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_zalloc_coherent(ena_dev->dmadev, + sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) + return -ENOMEM; + + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + mmio_read->read_resp->req_id = 0x0; + mmio_read->seq_num = 0x0; + mmio_read->readless_supported = true; + + return 0; +} + +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + mmio_read->readless_supported = readless_supported; +} + +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); + + dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); + + mmio_read->read_resp = NULL; +} + +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + u32 addr_low, addr_high; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); +} + +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high; + int ret; + + dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { + pr_err("Device isn't ready, abort com init\n"); + return -ENODEV; + } + + admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH; + + admin_queue->q_dmadev = ena_dev->dmadev; + admin_queue->polling = false; + admin_queue->curr_cmd_id = 0; + + atomic_set(&admin_queue->outstanding_cmds, 0); + + if (init_spinlock) + spin_lock_init(&admin_queue->q_lock); + + ret = ena_com_init_comp_ctxt(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_sq(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_cq(admin_queue); + if (ret) + goto error; + + admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + ENA_REGS_AQ_DB_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF); + + aq_caps = 0; + aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct ena_admin_aq_entry) << + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + acq_caps = 0; + acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct ena_admin_acq_entry) << + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + + writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF); + writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF); + ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers); + if (ret) + goto error; + + admin_queue->running_state = true; + + return 0; +error: + ena_com_admin_destroy(ena_dev); + + return ret; +} + +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + int ret; + + if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", + ctx->qid, ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + io_sq = &ena_dev->io_sq_queues[ctx->qid]; + io_cq = &ena_dev->io_cq_queues[ctx->qid]; + + memset(io_sq, 0x0, sizeof(struct ena_com_io_sq)); + memset(io_cq, 0x0, sizeof(struct ena_com_io_cq)); + + /* Init CQ */ + io_cq->q_depth = ctx->queue_size; + io_cq->direction = ctx->direction; + io_cq->qid = ctx->qid; + + io_cq->msix_vector = ctx->msix_vector; + + io_sq->q_depth = ctx->queue_size; + io_sq->direction = ctx->direction; + io_sq->qid = ctx->qid; + + io_sq->mem_queue_type = ctx->mem_queue_type; + + if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + /* header length is limited to 8 bits */ + io_sq->tx_max_header_size = + min_t(u32, ena_dev->tx_max_header_size, SZ_256); + + ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); + if (ret) + goto error; + ret = ena_com_init_io_cq(ena_dev, ctx, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_cq(ena_dev, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx); + if (ret) + goto destroy_io_cq; + + return 0; + +destroy_io_cq: + ena_com_destroy_io_cq(ena_dev, io_cq); +error: + ena_com_io_queue_free(ena_dev, io_sq, io_cq); + return ret; +} + +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid, + ENA_TOTAL_NUM_QUEUES); + return; + } + + io_sq = &ena_dev->io_sq_queues[qid]; + io_cq = &ena_dev->io_cq_queues[qid]; + + ena_com_destroy_io_sq(ena_dev, io_sq); + ena_com_destroy_io_cq(ena_dev, io_cq); + + ena_com_io_queue_free(ena_dev, io_sq, io_cq); +} + +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp) +{ + return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); +} + +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_DEVICE_ATTRIBUTES); + if (rc) + return rc; + + memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr, + sizeof(get_resp.u.dev_attr)); + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_NUM); + if (rc) + return rc; + + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, + sizeof(get_resp.u.max_queue)); + ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_AENQ_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq, + sizeof(get_resp.u.aenq)); + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->offload, &get_resp.u.offload, + sizeof(get_resp.u.offload)); + + return 0; +} + +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev) +{ + ena_com_handle_admin_completion(&ena_dev->admin_queue); +} + +/* ena_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev, + u16 group) +{ + struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers; + + if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/* ena_aenq_intr_handler: + * handles the aenq incoming events. + * pop events from the queue and apply the specific handler + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) +{ + struct ena_admin_aenq_entry *aenq_e; + struct ena_admin_aenq_common_desc *aenq_common; + struct ena_com_aenq *aenq = &dev->aenq; + ena_aenq_handler handler_cb; + u16 masked_head, processed = 0; + u8 phase; + + masked_head = aenq->head & (aenq->q_depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[masked_head]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == + phase) { + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", + aenq_common->group, aenq_common->syndrom, + (u64)aenq_common->timestamp_low + + ((u64)aenq_common->timestamp_high << 32)); + + /* Handle specific event*/ + handler_cb = ena_com_get_specific_aenq_cb(dev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + masked_head++; + processed++; + + if (unlikely(masked_head == aenq->q_depth)) { + masked_head = 0; + phase = !phase; + } + aenq_e = &aenq->entries[masked_head]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->head += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* write the aenq doorbell after all AENQ descriptors were read */ + mb(); + writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_dev_reset(struct ena_com_dev *ena_dev) +{ + u32 stat, timeout, cap, reset_val; + int rc; + + stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + + if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || + (cap == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read32 timeout occurred\n"); + return -ETIME; + } + + if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { + pr_err("Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (timeout == 0) { + pr_err("Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK; + writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + + /* Write again the MMIO read request address */ + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + rc = wait_for_reset_state(ena_dev, timeout, + ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (rc != 0) { + pr_err("Reset indication didn't turn on\n"); + return rc; + } + + /* reset done */ + writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + rc = wait_for_reset_state(ena_dev, timeout, 0); + if (rc != 0) { + pr_err("Reset indication didn't turn off\n"); + return rc; + } + + return 0; +} + +static int ena_get_dev_stats(struct ena_com_dev *ena_dev, + struct ena_com_stats_ctx *ctx, + enum ena_admin_get_stats_type type) +{ + struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd; + struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp; + struct ena_com_admin_queue *admin_queue; + int ret; + + admin_queue = &ena_dev->admin_queue; + + get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS; + get_cmd->aq_common_descriptor.flags = 0; + get_cmd->type = type; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)get_cmd, + sizeof(*get_cmd), + (struct ena_admin_acq_entry *)get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to get stats. error: %d\n", ret); + + return ret; +} + +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats) +{ + struct ena_com_stats_ctx ctx; + int ret; + + memset(&ctx, 0x0, sizeof(ctx)); + ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC); + if (likely(ret == 0)) + memcpy(stats, &ctx.get_resp.basic_stats, + sizeof(ctx.get_resp.basic_stats)); + + return ret; +} + +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_MTU; + cmd.u.mtu.mtu = mtu; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set mtu %d. error: %d\n", mtu, ret); + + return ret; +} + +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload) +{ + int ret; + struct ena_admin_get_feat_resp resp; + + ret = ena_com_get_feature(ena_dev, &resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (unlikely(ret)) { + pr_err("Failed to get offload capabilities %d\n", ret); + return ret; + } + + memcpy(offload, &resp.u.offload, sizeof(resp.u.offload)); + + return 0; +} + +int ena_com_set_hash_function(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_FUNCTION)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_FUNCTION); + return -EPERM; + } + + /* Validate hash function is supported */ + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION); + if (unlikely(ret)) + return ret; + + if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + pr_err("Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION; + cmd.u.flow_hash_func.init_val = rss->hash_init_val; + cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_key_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = sizeof(*rss->hash_key); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) { + pr_err("Failed to set hash function %d. error: %d\n", + rss->hash_func, ret); + return -EINVAL; + } + + return 0; +} + +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return -EINVAL; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { + pr_err("Flow hash function %d isn't supported\n", func); + return -EPERM; + } + + switch (func) { + case ENA_ADMIN_TOEPLITZ: + if (key_len > sizeof(hash_key->key)) { + pr_err("key len (%hu) is bigger than the max supported (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; + break; + case ENA_ADMIN_CRC32: + rss->hash_init_val = init_val; + break; + default: + pr_err("Invalid hash function (%d)\n", func); + return -EINVAL; + } + + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) + ena_com_get_hash_function(ena_dev, NULL, NULL); + + return rc; +} + +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + rss->hash_func = get_resp.u.flow_hash_func.selected_func; + if (func) + *func = rss->hash_func; + + if (key) + memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2); + + return 0; +} + +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_INPUT, + rss->hash_ctrl_dma_addr, + sizeof(*rss->hash_ctrl)); + if (unlikely(rc)) + return rc; + + if (fields) + *fields = rss->hash_ctrl->selected_fields[proto].fields; + + return 0; +} + +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_INPUT)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT; + cmd.u.flow_hash_input.enabled_input_sort = + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK | + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_ctrl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + cmd.control_buffer.length = sizeof(*hash_ctrl); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) + pr_err("Failed to set hash input. error: %d\n", ret); + + return ret; +} + +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = + rss->hash_ctrl; + u16 available_fields = 0; + int rc, i; + + /* Get the supported hash input */ + rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL); + if (unlikely(rc)) + return rc; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA; + + for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) { + available_fields = hash_ctrl->selected_fields[i].fields & + hash_ctrl->supported_fields[i].fields; + if (available_fields != hash_ctrl->selected_fields[i].fields) { + pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n", + i, hash_ctrl->supported_fields[i].fields, + hash_ctrl->selected_fields[i].fields); + return -EPERM; + } + } + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return rc; +} + +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + u16 supported_fields; + int rc; + + if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { + pr_err("Invalid proto num (%u)\n", proto); + return -EINVAL; + } + + /* Get the ctrl table */ + rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL); + if (unlikely(rc)) + return rc; + + /* Make sure all the fields are supported */ + supported_fields = hash_ctrl->supported_fields[proto].fields; + if ((hash_fields & supported_fields) != hash_fields) { + pr_err("proto %d doesn't support the required fields %x. supports only: %x\n", + proto, hash_fields, supported_fields); + } + + hash_ctrl->selected_fields[proto].fields = hash_fields; + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return 0; +} + +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (unlikely(entry_idx >= (1 << rss->tbl_log_size))) + return -EINVAL; + + if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES))) + return -EINVAL; + + rss->host_rss_ind_tbl[entry_idx] = entry_value; + + return 0; +} + +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id( + ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + return -EPERM; + } + + ret = ena_com_ind_tbl_convert_to_device(ena_dev); + if (ret) { + pr_err("Failed to convert host indirection table to device table\n"); + return ret; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG; + cmd.u.ind_table.size = rss->tbl_log_size; + cmd.u.ind_table.inline_index = 0xFFFFFFFF; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->rss_ind_tbl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set indirect table. error: %d\n", ret); + + return ret; +} + +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + u32 tbl_size; + int i, rc; + + tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, + tbl_size); + if (unlikely(rc)) + return rc; + + if (!ind_tbl) + return 0; + + rc = ena_com_ind_tbl_convert_from_device(ena_dev); + if (unlikely(rc)) + return rc; + + for (i = 0; i < (1 << rss->tbl_log_size); i++) + ind_tbl[i] = rss->host_rss_ind_tbl[i]; + + return 0; +} + +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) +{ + int rc; + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); + + rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size); + if (unlikely(rc)) + goto err_indr_tbl; + + rc = ena_com_hash_key_allocate(ena_dev); + if (unlikely(rc)) + goto err_hash_key; + + rc = ena_com_hash_ctrl_init(ena_dev); + if (unlikely(rc)) + goto err_hash_ctrl; + + return 0; + +err_hash_ctrl: + ena_com_hash_key_destroy(ena_dev); +err_hash_key: + ena_com_indirect_table_destroy(ena_dev); +err_indr_tbl: + + return rc; +} + +void ena_com_rss_destroy(struct ena_com_dev *ena_dev) +{ + ena_com_indirect_table_destroy(ena_dev); + ena_com_hash_key_destroy(ena_dev); + ena_com_hash_ctrl_destroy(ena_dev); + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); +} + +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->host_info = + dma_zalloc_coherent(ena_dev->dmadev, SZ_4K, + &host_attr->host_info_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + + return 0; +} + +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->debug_area_virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, debug_area_size, + &host_attr->debug_area_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->debug_area_virt_addr)) { + host_attr->debug_area_size = 0; + return -ENOMEM; + } + + host_attr->debug_area_size = debug_area_size; + + return 0; +} + +void ena_com_delete_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->host_info) { + dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info, + host_attr->host_info_dma_addr); + host_attr->host_info = NULL; + } +} + +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->debug_area_virt_addr) { + dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, + host_attr->debug_area_virt_addr, + host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr = NULL; + } +} + +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_HOST_ATTR_CONFIG)) { + pr_warn("Set host attribute isn't supported\n"); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.debug_ba, + host_attr->debug_area_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.os_info_ba, + host_attr->host_info_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.u.host_attr.debug_area_size = host_attr->debug_area_size; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set host attributes: %d\n", ret); + + return ret; +} + +/* Interrupt moderation */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev) +{ + return ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_INTERRUPT_MODERATION); +} + +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + ena_dev->intr_moder_tx_interval = tx_coalesce_usecs / + ena_dev->intr_delay_resolution; + + return 0; +} + +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + /* We use LOWEST entry of moderation table for storing + * nonadaptive interrupt coalescing values + */ + ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + rx_coalesce_usecs / ena_dev->intr_delay_resolution; + + return 0; +} + +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + if (ena_dev->intr_moder_tbl) + devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl); + ena_dev->intr_moder_tbl = NULL; +} + +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + struct ena_admin_get_feat_resp get_resp; + u16 delay_resolution; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_INTERRUPT_MODERATION); + + if (rc) { + if (rc == -EPERM) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_INTERRUPT_MODERATION); + rc = 0; + } else { + pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n", + rc); + } + + /* no moderation supported, disable adaptive support */ + ena_com_disable_adaptive_moderation(ena_dev); + return rc; + } + + rc = ena_com_init_interrupt_moderation_table(ena_dev); + if (rc) + goto err; + + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); + ena_com_enable_adaptive_moderation(ena_dev); + + return 0; +err: + ena_com_destroy_interrupt_moderation(ena_dev); + return rc; +} + +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (!intr_moder_tbl) + return; + + intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + ENA_INTR_LOWEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval = + ENA_INTR_LOWEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval = + ENA_INTR_LOWEST_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval = + ENA_INTR_LOW_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval = + ENA_INTR_LOW_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval = + ENA_INTR_LOW_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval = + ENA_INTR_MID_USECS; + intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval = + ENA_INTR_MID_PKTS; + intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval = + ENA_INTR_MID_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval = + ENA_INTR_HIGH_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval = + ENA_INTR_HIGH_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval = + ENA_INTR_HIGH_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval = + ENA_INTR_HIGHEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval = + ENA_INTR_HIGHEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval = + ENA_INTR_HIGHEST_BYTES; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev) +{ + return ena_dev->intr_moder_tx_interval; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (intr_moder_tbl) + return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval; + + return 0; +} + +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval; + if (ena_dev->intr_delay_resolution) + intr_moder_tbl[level].intr_moder_interval /= + ena_dev->intr_delay_resolution; + intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval; + + /* use hardcoded value until ethtool supports bytecount parameter */ + if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED) + intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval; +} + +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval; + if (ena_dev->intr_delay_resolution) + entry->intr_moder_interval *= ena_dev->intr_delay_resolution; + entry->pkts_per_interval = + intr_moder_tbl[level].pkts_per_interval; + entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h new file mode 100644 index 000000000000..509d7b8e15ab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -0,0 +1,1038 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_COM +#define ENA_COM + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_common_defs.h" +#include "ena_admin_defs.h" +#include "ena_eth_io_defs.h" +#include "ena_regs_defs.h" + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define ENA_MAX_NUM_IO_QUEUES 128U +/* We need to queues for each IO (on for Tx and one for Rx) */ +#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + +#define ENA_MAX_HANDLERS 256 + +#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48 + +/* Unit in usec */ +#define ENA_REG_READ_TIMEOUT 200000 + +#define ADMIN_SQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aq_entry)) +#define ADMIN_CQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_acq_entry)) +#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry)) + +/*****************************************************************************/ +/*****************************************************************************/ +/* ENA adaptive interrupt moderation settings */ + +#define ENA_INTR_LOWEST_USECS (0) +#define ENA_INTR_LOWEST_PKTS (3) +#define ENA_INTR_LOWEST_BYTES (2 * 1524) + +#define ENA_INTR_LOW_USECS (32) +#define ENA_INTR_LOW_PKTS (12) +#define ENA_INTR_LOW_BYTES (16 * 1024) + +#define ENA_INTR_MID_USECS (80) +#define ENA_INTR_MID_PKTS (48) +#define ENA_INTR_MID_BYTES (64 * 1024) + +#define ENA_INTR_HIGH_USECS (128) +#define ENA_INTR_HIGH_PKTS (96) +#define ENA_INTR_HIGH_BYTES (128 * 1024) + +#define ENA_INTR_HIGHEST_USECS (192) +#define ENA_INTR_HIGHEST_PKTS (128) +#define ENA_INTR_HIGHEST_BYTES (192 * 1024) + +#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 +#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4 +#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 +#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 +#define ENA_INTR_MODER_LEVEL_STRIDE 2 +#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, + ENA_INTR_MODER_LOW, + ENA_INTR_MODER_MID, + ENA_INTR_MODER_HIGH, + ENA_INTR_MODER_HIGHEST, + ENA_INTR_MAX_NUM_OF_LEVELS, +}; + +struct ena_intr_moder_entry { + unsigned int intr_moder_interval; + unsigned int pkts_per_interval; + unsigned int bytes_per_interval; +}; + +enum queue_direction { + ENA_COM_IO_QUEUE_DIRECTION_TX, + ENA_COM_IO_QUEUE_DIRECTION_RX +}; + +struct ena_com_buf { + dma_addr_t paddr; /**< Buffer physical address */ + u16 len; /**< Buffer length in bytes */ +}; + +struct ena_com_rx_buf_info { + u16 len; + u16 req_id; +}; + +struct ena_com_io_desc_addr { + u8 __iomem *pbuf_dev_addr; /* LLQ address */ + u8 *virt_addr; + dma_addr_t phys_addr; +}; + +struct ena_com_tx_meta { + u16 mss; + u16 l3_hdr_len; + u16 l3_hdr_offset; + u16 l4_hdr_len; /* In words */ +}; + +struct ena_com_io_cq { + struct ena_com_io_desc_addr cdesc_addr; + + /* Interrupt unmask register */ + u32 __iomem *unmask_reg; + + /* The completion queue head doorbell register */ + u32 __iomem *cq_head_db_reg; + + /* numa configuration register (for TPH) */ + u32 __iomem *numa_node_cfg_reg; + + /* The value to write to the above register to unmask + * the interrupt of this queue + */ + u32 msix_vector; + + enum queue_direction direction; + + /* holds the number of cdesc of the current packet */ + u16 cur_rx_pkt_cdesc_count; + /* save the firt cdesc idx of the current packet */ + u16 cur_rx_pkt_cdesc_start_idx; + + u16 q_depth; + /* Caller qid */ + u16 qid; + + /* Device queue index */ + u16 idx; + u16 head; + u16 last_head_update; + u8 phase; + u8 cdesc_entry_size_in_bytes; + +} ____cacheline_aligned; + +struct ena_com_io_sq { + struct ena_com_io_desc_addr desc_addr; + + u32 __iomem *db_addr; + u8 __iomem *header_addr; + + enum queue_direction direction; + enum ena_admin_placement_policy_type mem_queue_type; + + u32 msix_vector; + struct ena_com_tx_meta cached_tx_meta; + + u16 q_depth; + u16 qid; + + u16 idx; + u16 tail; + u16 next_to_comp; + u32 tx_max_header_size; + u8 phase; + u8 desc_entry_size; + u8 dma_addr_bits; +} ____cacheline_aligned; + +struct ena_com_admin_cq { + struct ena_admin_acq_entry *entries; + dma_addr_t dma_addr; + + u16 head; + u8 phase; +}; + +struct ena_com_admin_sq { + struct ena_admin_aq_entry *entries; + dma_addr_t dma_addr; + + u32 __iomem *db_addr; + + u16 head; + u16 tail; + u8 phase; + +}; + +struct ena_com_stats_admin { + u32 aborted_cmd; + u32 submitted_cmd; + u32 completed_cmd; + u32 out_of_space; + u32 no_completion; +}; + +struct ena_com_admin_queue { + void *q_dmadev; + spinlock_t q_lock; /* spinlock for the admin queue */ + struct ena_comp_ctx *comp_ctx; + u16 q_depth; + struct ena_com_admin_cq cq; + struct ena_com_admin_sq sq; + + /* Indicate if the admin queue should poll for completion */ + bool polling; + + u16 curr_cmd_id; + + /* Indicate that the ena was initialized and can + * process new admin commands + */ + bool running_state; + + /* Count the number of outstanding admin commands */ + atomic_t outstanding_cmds; + + struct ena_com_stats_admin stats; +}; + +struct ena_aenq_handlers; + +struct ena_com_aenq { + u16 head; + u8 phase; + struct ena_admin_aenq_entry *entries; + dma_addr_t dma_addr; + u16 q_depth; + struct ena_aenq_handlers *aenq_handlers; +}; + +struct ena_com_mmio_read { + struct ena_admin_ena_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + bool readless_supported; + /* spin lock to ensure a single outstanding read */ + spinlock_t lock; +}; + +struct ena_rss { + /* Indirect table */ + u16 *host_rss_ind_tbl; + struct ena_admin_rss_ind_table_entry *rss_ind_tbl; + dma_addr_t rss_ind_tbl_dma_addr; + u16 tbl_log_size; + + /* Hash key */ + enum ena_admin_hash_functions hash_func; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + dma_addr_t hash_key_dma_addr; + u32 hash_init_val; + + /* Flow Control */ + struct ena_admin_feature_rss_hash_control *hash_ctrl; + dma_addr_t hash_ctrl_dma_addr; + +}; + +struct ena_host_attribute { + /* Debug area */ + u8 *debug_area_virt_addr; + dma_addr_t debug_area_dma_addr; + u32 debug_area_size; + + /* Host information */ + struct ena_admin_host_info *host_info; + dma_addr_t host_info_dma_addr; +}; + +/* Each ena_dev is a PCI function. */ +struct ena_com_dev { + struct ena_com_admin_queue admin_queue; + struct ena_com_aenq aenq; + struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES]; + struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES]; + u8 __iomem *reg_bar; + void __iomem *mem_bar; + void *dmadev; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + u32 tx_max_header_size; + u16 stats_func; /* Selected function for extended statistic dump */ + u16 stats_queue; /* Selected queue for extended statistic dump */ + + struct ena_com_mmio_read mmio_read; + + struct ena_rss rss; + u32 supported_features; + u32 dma_addr_bits; + + struct ena_host_attribute host_attr; + bool adaptive_coalescing; + u16 intr_delay_resolution; + u32 intr_moder_tx_interval; + struct ena_intr_moder_entry *intr_moder_tbl; +}; + +struct ena_com_dev_get_features_ctx { + struct ena_admin_queue_feature_desc max_queues; + struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; +}; + +struct ena_com_create_io_ctx { + enum ena_admin_placement_policy_type mem_queue_type; + enum queue_direction direction; + int numa_node; + u32 msix_vector; + u16 queue_size; + u16 qid; +}; + +typedef void (*ena_aenq_handler)(void *data, + struct ena_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct ena_aenq_handlers { + ena_aenq_handler handlers[ENA_MAX_HANDLERS]; + ena_aenq_handler unimplemented_handler; +}; + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * + * Initialize the register read mechanism. + * + * @note: This method must be the first stage in the initialization sequence. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, + bool readless_supported); + +/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return + * value physical address. + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev); + +/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_admin_init - Init the admin and the async queues + * @ena_dev: ENA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * @init_spinlock: Indicate if this method should init the admin spinlock or + * the spinlock was init before (for example, in a case of FLR). + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock); + +/* ena_com_admin_destroy - Destroy the admin and the async events queues. + * @ena_dev: ENA communication layer struct + * + * @note: Before calling this method, the caller must validate that the device + * won't send any additional admin completions/aenq. + * To achieve that, a FLR is recommended. + */ +void ena_com_admin_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_dev_reset - Perform device FLR to the device. + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_dev_reset(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_queue - Create io queue. + * @ena_dev: ENA communication layer struct + * @ctx - create context structure + * + * Create the submission and the completion queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx); + +/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid. + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + */ +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid); + +/* ena_com_get_io_handlers - Return the io queue handlers + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + * @io_sq - IO submission queue handler + * @io_cq - IO completion queue handler. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq); + +/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications + * @ena_dev: ENA communication layer struct + * + * After this method, aenq event can be received via AENQ. + */ +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_running_state - Set the state of the admin queue + * @ena_dev: ENA communication layer struct + * + * Change the state of the admin queue (enable/disable) + */ +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state); + +/* ena_com_get_admin_running_state - Get the admin queue state + * @ena_dev: ENA communication layer struct + * + * Retrieve the state of the admin queue (enable/disable) + * + * @return - current polling mode (enable/disable) + */ +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * @polling: ENAble/Disable polling mode + * + * Set the admin completion mode. + */ +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling); + +/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * + * Get the admin completion mode. + * If polling mode is on, ena_com_execute_admin_command will perform a + * polling on the admin completion queue for the commands completion, + * otherwise it will wait on wait event. + * + * @return state + */ +bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + +/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the admin completion queue and wake up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + +/* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the async event notification queue and call the proper + * aenq handler. + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); + +/* ena_com_abort_admin_commands - Abort all the outstanding admin commands. + * @ena_dev: ENA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call ena_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + +/* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + +/* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * + * This method validate the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. + * + * @return - 0 on success negative value otherwise. + */ +int ena_com_validate_version(struct ena_com_dev *ena_dev); + +/* ena_com_get_link_params - Retrieve physical link parameters. + * @ena_dev: ENA communication layer struct + * @resp: Link parameters + * + * Retrieve the physical link parameters, + * like speed, auto-negotiation and full duplex support. + * + * @return - 0 on Success negative value otherwise. + */ +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp); + +/* ena_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @ena_dev: ENA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int ena_com_get_dma_width(struct ena_com_dev *ena_dev); + +/* ena_com_set_aenq_config - Set aenq groups configurations + * @ena_dev: ENA communication layer struct + * @groups flag: bit fields flags of enum ena_admin_aenq_group. + * + * Configure which aenq event group the driver would like to receive. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag); + +/* ena_com_get_dev_attr_feat - Get device features + * @ena_dev: ENA communication layer struct + * @get_feat_ctx: returned context that contain the get features. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx); + +/* ena_com_get_dev_basic_stats - Get device basic statistics + * @ena_dev: ENA communication layer struct + * @stats: stats return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats); + +/* ena_com_set_dev_mtu - Configure the device mtu. + * @ena_dev: ENA communication layer struct + * @mtu: mtu value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu); + +/* ena_com_get_offload_settings - Retrieve the device offloads capabilities + * @ena_dev: ENA communication layer struct + * @offlad: offload return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload); + +/* ena_com_rss_init - Init RSS + * @ena_dev: ENA communication layer struct + * @log_size: indirection log size + * + * Allocate RSS/RFS resources. + * The caller then can configure rss using ena_com_set_hash_function, + * ena_com_set_hash_ctrl and ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); + +/* ena_com_rss_destroy - Destroy rss + * @ena_dev: ENA communication layer struct + * + * Free all the RSS/RFS resources. + */ +void ena_com_rss_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_fill_hash_function - Fill RSS hash function + * @ena_dev: ENA communication layer struct + * @func: The hash function (Toeplitz or crc) + * @key: Hash key (for toeplitz hash) + * @key_len: key length (max length 10 DW) + * @init_val: initial value for the hash function + * + * Fill the ena_dev resources with the desire hash function, hash key, key_len + * and key initial value (if needed by the hash function). + * To flush the key into the device the caller should call + * ena_com_set_hash_function. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val); + +/* ena_com_set_hash_function - Flush the hash function and it dependencies to + * the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash function and it dependencies (key, key length and + * initial value) if needed. + * + * @note: Prior to this method the caller should call ena_com_fill_hash_function + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_function(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_function - Retrieve the hash function and the hash key + * from the device. + * @ena_dev: ENA communication layer struct + * @func: hash function + * @key: hash key + * + * Retrieve the hash function and the hash key from the device. + * + * @note: If the caller called ena_com_fill_hash_function but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key); + +/* ena_com_fill_hash_ctrl - Fill RSS hash control + * @ena_dev: ENA communication layer struct. + * @proto: The protocol to configure. + * @hash_fields: bit mask of ena_admin_flow_hash_fields + * + * Fill the ena_dev resources with the desire hash control (the ethernet + * fields that take part of the hash) for a specific protocol. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields); + +/* ena_com_set_hash_ctrl - Flush the hash control resources to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash control (the ethernet fields that take part of the hash) + * + * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_ctrl - Retrieve the hash control from the device. + * @ena_dev: ENA communication layer struct + * @proto: The protocol to retrieve. + * @fields: bit mask of ena_admin_flow_hash_fields. + * + * Retrieve the hash control from the device. + * + * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields); + +/* ena_com_set_default_hash_ctrl - Set the hash control to a default + * configuration. + * @ena_dev: ENA communication layer struct + * + * Fill the ena_dev resources with the default hash control configuration. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS + * indirection table + * @ena_dev: ENA communication layer struct. + * @entry_idx - indirection table entry. + * @entry_value - redirection value + * + * Fill a single entry of the RSS indirection table in the ena_dev resources. + * To flush the indirection table to the device, the called should call + * ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value); + +/* ena_com_indirect_table_set - Flush the indirection table to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the indirection hash control to the device. + * Prior to this method the caller should call ena_com_indirect_table_fill_entry + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_get - Retrieve the indirection table from the device. + * @ena_dev: ENA communication layer struct + * @ind_tbl: indirection table + * + * Retrieve the RSS indirection table from the device. + * + * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl); + +/* ena_com_allocate_host_info - Allocate host info resources. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_allocate_debug_area - Allocate debug area. + * @ena_dev: ENA communication layer struct + * @debug_area_size - debug area size. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size); + +/* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate debug area. + */ +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + +/* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate host info. + */ +void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_set_host_attributes - Update the device with the host + * attributes (debug area and host info) base address. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_cq - Create io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Create IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_destroy_io_cq - Destroy io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Destroy IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_execute_admin_command - Execute admin command + * @admin_queue: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copyed into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *cmd_comp, + size_t cmd_comp_size); + +/* ena_com_init_interrupt_moderation - Init interrupt moderation + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources + * @ena_dev: ENA communication layer struct + */ +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_interrupt_moderation_supported - Return if interrupt moderation + * capability is supported by the device. + * + * @return - supported or not. + */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev); + +/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt + * moderation table back to the default parameters. + * @ena_dev: ENA communication layer struct + */ +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev); + +/* ena_com_update_nonadaptive_moderation_interval_tx - Update the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * @tx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs); + +/* ena_com_update_nonadaptive_moderation_interval_rx - Update the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * @rx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs); + +/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev); + +/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev); + +/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt + * moderation table. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry value + * + * Update a single entry in the interrupt moderation table. + */ +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry to fill. + * + * Initialize the entry according to the adaptive interrupt moderation table. + */ +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev) +{ + return ena_dev->adaptive_coalescing; +} + +static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = true; +} + +static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = false; +} + +/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay + * @ena_dev: ENA communication layer struct + * @pkts: Number of packets since the last update + * @bytes: Number of bytes received since the last update. + * @smoothed_interval: Returned interval + * @moder_tbl_idx: Current table level as input update new level as return + * value. + */ +static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev, + unsigned int pkts, + unsigned int bytes, + unsigned int *smoothed_interval, + unsigned int *moder_tbl_idx) +{ + enum ena_intr_moder_level curr_moder_idx, new_moder_idx; + struct ena_intr_moder_entry *curr_moder_entry; + struct ena_intr_moder_entry *pred_moder_entry; + struct ena_intr_moder_entry *new_moder_entry; + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int interval; + + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + if (!pkts || !bytes) + /* Tx interrupt, or spurious interrupt, + * in both cases we just use same delay values + */ + return; + + curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx); + if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) { + pr_err("Wrong moderation index %u\n", curr_moder_idx); + return; + } + + curr_moder_entry = &intr_moder_tbl[curr_moder_idx]; + new_moder_idx = curr_moder_idx; + + if (curr_moder_idx == ENA_INTR_MODER_LOWEST) { + if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } else { + pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE]; + + if ((pkts <= pred_moder_entry->pkts_per_interval) || + (bytes <= pred_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE); + else if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) { + if (curr_moder_idx != ENA_INTR_MODER_HIGHEST) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } + } + new_moder_entry = &intr_moder_tbl[new_moder_idx]; + + interval = new_moder_entry->intr_moder_interval; + *smoothed_interval = ( + (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT + + ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) / + 10; + + *moder_tbl_idx = new_moder_idx; +} + +/* ena_com_update_intr_reg - Prepare interrupt register + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs + * @unmask: unask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg, + u32 rx_delay_interval, + u32 tx_delay_interval, + bool unmask) +{ + intr_reg->intr_control = 0; + intr_reg->intr_control |= rx_delay_interval & + ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK; + + intr_reg->intr_control |= + (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT) + & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK; + + if (unmask) + intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK; +} + +#endif /* !(ENA_COM) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h new file mode 100644 index 000000000000..bb8d73676eab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h @@ -0,0 +1,48 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_COMMON_H_ +#define _ENA_COMMON_H_ + +#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */ +#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */ + +/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */ +struct ena_common_mem_addr { + u32 mem_addr_low; + + u16 mem_addr_high; + + /* MBZ */ + u16 reserved16; +}; + +#endif /*_ENA_COMMON_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c new file mode 100644 index 000000000000..539c536464a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -0,0 +1,501 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_eth_com.h" + +static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + struct ena_com_io_cq *io_cq) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 expected_phase, head_masked; + u16 desc_phase; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + + (head_masked * io_cq->cdesc_entry_size_in_bytes)); + + desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; + + if (desc_phase != expected_phase) + return NULL; + + return cdesc; +} + +static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) +{ + io_cq->head++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) + io_cq->phase ^= 1; +} + +static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked; + u32 offset; + + tail_masked = io_sq->tail & (io_sq->q_depth - 1); + + offset = tail_masked * io_sq->desc_entry_size; + + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); +} + +static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u32 offset = tail_masked * io_sq->desc_entry_size; + + /* In case this queue isn't a LLQ */ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return; + + memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, + io_sq->desc_addr.virt_addr + offset, + io_sq->desc_entry_size); +} + +static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +{ + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; +} + +static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, + u8 *head_src, u16 header_len) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u8 __iomem *dev_head_addr = + io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + + if (unlikely(!io_sq->header_addr)) { + pr_err("Push buffer header ptr is NULL\n"); + return -EINVAL; + } + + memcpy_toio(dev_head_addr, head_src, header_len); + + return 0; +} + +static inline struct ena_eth_io_rx_cdesc_base * + ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) +{ + idx &= (io_cq->q_depth - 1); + return (struct ena_eth_io_rx_cdesc_base *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + idx * io_cq->cdesc_entry_size_in_bytes); +} + +static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 count = 0, head_masked; + u32 last = 0; + + do { + cdesc = ena_com_get_next_rx_cdesc(io_cq); + if (!cdesc) + break; + + ena_com_cq_inc_head(io_cq); + count++; + last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + } while (!last); + + if (last) { + *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx; + count += io_cq->cur_rx_pkt_cdesc_count; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + + io_cq->cur_rx_pkt_cdesc_count = 0; + io_cq->cur_rx_pkt_cdesc_start_idx = head_masked; + + pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n", + io_cq->qid, *first_cdesc_idx, count); + } else { + io_cq->cur_rx_pkt_cdesc_count += count; + count = 0; + } + + return count; +} + +static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + int rc; + + if (ena_tx_ctx->meta_valid) { + rc = memcmp(&io_sq->cached_tx_meta, + &ena_tx_ctx->ena_meta, + sizeof(struct ena_com_tx_meta)); + + if (unlikely(rc != 0)) + return true; + } + + return false; +} + +static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + + meta_desc = get_sq_desc(io_sq); + memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc)); + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK; + + /* bits 0-9 of the mss */ + meta_desc->word2 |= (ena_meta->mss << + ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK; + /* bits 10-13 of the mss */ + meta_desc->len_ctrl |= ((ena_meta->mss >> 10) << + ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK; + + /* Extended meta desc */ + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK; + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + meta_desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_META_DESC_PHASE_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK; + meta_desc->word2 |= ena_meta->l3_hdr_len & + ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK; + meta_desc->word2 |= (ena_meta->l3_hdr_offset << + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK; + + meta_desc->word2 |= (ena_meta->l4_hdr_len << + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + + /* Cached the meta desc */ + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); +} + +static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, + struct ena_eth_io_rx_cdesc_base *cdesc) +{ + ena_rx_ctx->l3_proto = cdesc->status & + ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK; + ena_rx_ctx->l4_proto = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; + ena_rx_ctx->l3_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + ena_rx_ctx->l4_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + ena_rx_ctx->hash = cdesc->hash; + ena_rx_ctx->frag = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT; + + pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n", + ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, + ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, + ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); +} + +/*****************************************************************************/ +/***************************** API **********************************/ +/*****************************************************************************/ + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc) +{ + struct ena_eth_io_tx_desc *desc = NULL; + struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; + void *push_header = ena_tx_ctx->push_header; + u16 header_len = ena_tx_ctx->header_len; + u16 num_bufs = ena_tx_ctx->num_bufs; + int total_desc, i, rc; + bool have_meta; + u64 addr_hi; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); + + /* num_bufs +1 for potential meta desc */ + if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { + pr_err("Not enough space in the tx queue\n"); + return -ENOMEM; + } + + if (unlikely(header_len > io_sq->tx_max_header_size)) { + pr_err("header size is too large %d max header: %d\n", + header_len, io_sq->tx_max_header_size); + return -EINVAL; + } + + /* start with pushing the header (if needed) */ + rc = ena_com_write_header(io_sq, push_header, header_len); + if (unlikely(rc)) + return rc; + + have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, + ena_tx_ctx); + if (have_meta) + ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + + /* If the caller doesn't want send packets */ + if (unlikely(!num_bufs && !header_len)) { + *nb_hw_desc = have_meta ? 0 : 1; + return 0; + } + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + /* Set first desc when we don't have meta descriptor */ + if (!have_meta) + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK; + + desc->buff_addr_hi_hdr_sz |= (header_len << + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) & + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK; + desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK; + + /* Bits 0-9 */ + desc->meta_ctrl |= (ena_tx_ctx->req_id << + ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK; + + desc->meta_ctrl |= (ena_tx_ctx->df << + ENA_ETH_IO_TX_DESC_DF_SHIFT) & + ENA_ETH_IO_TX_DESC_DF_MASK; + + /* Bits 10-15 */ + desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) << + ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK; + + if (ena_tx_ctx->meta_valid) { + desc->meta_ctrl |= (ena_tx_ctx->tso_enable << + ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_TSO_EN_MASK; + desc->meta_ctrl |= ena_tx_ctx->l3_proto & + ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_proto << + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable << + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable << + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial << + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK; + } + + for (i = 0; i < num_bufs; i++) { + /* The first desc share the same desc as the header */ + if (likely(i != 0)) { + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + } + + desc->len_ctrl |= ena_bufs->len & + ENA_ETH_IO_TX_DESC_LENGTH_MASK; + + addr_hi = ((ena_bufs->paddr & + GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + desc->buff_addr_lo = (u32)ena_bufs->paddr; + desc->buff_addr_hi_hdr_sz |= addr_hi & + ENA_ETH_IO_TX_DESC_ADDR_HI_MASK; + ena_bufs++; + } + + /* set the last desc indicator */ + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + + ena_com_sq_update_tail(io_sq); + + total_desc = max_t(u16, num_bufs, 1); + total_desc += have_meta ? 1 : 0; + + *nb_hw_desc = total_desc; + return 0; +} + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx) +{ + struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0]; + struct ena_eth_io_rx_cdesc_base *cdesc = NULL; + u16 cdesc_idx = 0; + u16 nb_hw_desc; + u16 i; + + WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx); + if (nb_hw_desc == 0) { + ena_rx_ctx->descs = nb_hw_desc; + return 0; + } + + pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, + nb_hw_desc); + + if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { + pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, + ena_rx_ctx->max_bufs); + return -ENOSPC; + } + + for (i = 0; i < nb_hw_desc; i++) { + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i); + + ena_buf->len = cdesc->length; + ena_buf->req_id = cdesc->req_id; + ena_buf++; + } + + /* Update SQ head ptr */ + io_sq->next_to_comp += nb_hw_desc; + + pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, + io_sq->next_to_comp); + + /* Get rx flags from the last pkt */ + ena_com_rx_set_flags(ena_rx_ctx, cdesc); + + ena_rx_ctx->descs = nb_hw_desc; + return 0; +} + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id) +{ + struct ena_eth_io_rx_desc *desc; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) + return -ENOSPC; + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); + + desc->length = ena_buf->len; + + desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; + desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; + + desc->req_id = req_id; + + desc->buff_addr_lo = (u32)ena_buf->paddr; + desc->buff_addr_hi = + ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + ena_com_sq_update_tail(io_sq); + + return 0; +} + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) +{ + u8 expected_phase, cdesc_phase; + struct ena_eth_io_tx_cdesc *cdesc; + u16 masked_head; + + masked_head = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_tx_cdesc *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + (masked_head * io_cq->cdesc_entry_size_in_bytes)); + + /* When the current completion descriptor phase isn't the same as the + * expected, it mean that the device still didn't update + * this completion. + */ + cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK; + if (cdesc_phase != expected_phase) + return -EAGAIN; + + ena_com_cq_inc_head(io_cq); + + *req_id = cdesc->req_id; + + return 0; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h new file mode 100644 index 000000000000..bb53c3a4f8e9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_ETH_COM_H_ +#define ENA_ETH_COM_H_ + +#include "ena_com.h" + +/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */ +#define ENA_COMP_HEAD_THRESH 4 + +struct ena_com_tx_ctx { + struct ena_com_tx_meta ena_meta; + struct ena_com_buf *ena_bufs; + /* For LLQ, header buffer - pushed to the device mem space */ + void *push_header; + + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + u16 num_bufs; + u16 req_id; + /* For regular queue, indicate the size of the header + * For LLQ, indicate the size of the pushed buffer + */ + u16 header_len; + + u8 meta_valid; + u8 tso_enable; + u8 l3_csum_enable; + u8 l4_csum_enable; + u8 l4_csum_partial; + u8 df; /* Don't fragment */ +}; + +struct ena_com_rx_ctx { + struct ena_com_rx_buf_info *ena_bufs; + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + bool l3_csum_err; + bool l4_csum_err; + /* fragmented packet */ + bool frag; + u32 hash; + u16 descs; + int max_bufs; +}; + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc); + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx); + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id); + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id); + +static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, + struct ena_eth_io_intr_reg *intr_reg) +{ + writel(intr_reg->intr_control, io_cq->unmask_reg); +} + +static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) +{ + u16 tail, next_to_comp, cnt; + + next_to_comp = io_sq->next_to_comp; + tail = io_sq->tail; + cnt = tail - next_to_comp; + + return io_sq->q_depth - 1 - cnt; +} + +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) +{ + u16 tail; + + tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", + io_sq->qid, tail); + + writel(tail, io_sq->db_addr); + + return 0; +} + +static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) +{ + u16 unreported_comp, head; + bool need_update; + + head = io_cq->head; + unreported_comp = head - io_cq->last_head_update; + need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); + + if (io_cq->cq_head_db_reg && need_update) { + pr_debug("Write completion queue doorbell for queue %d: head: %d\n", + io_cq->qid, head); + writel(head, io_cq->cq_head_db_reg); + io_cq->last_head_update = head; + } + + return 0; +} + +static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq, + u8 numa_node) +{ + struct ena_eth_io_numa_node_cfg_reg numa_cfg; + + if (!io_cq->numa_node_cfg_reg) + return; + + numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK) + | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK; + + writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg); +} + +static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem) +{ + io_sq->next_to_comp += elem; +} + +#endif /* ENA_ETH_COM_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h new file mode 100644 index 000000000000..f320c58793a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h @@ -0,0 +1,416 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ETH_IO_H_ +#define _ENA_ETH_IO_H_ + +enum ena_eth_io_l3_proto_index { + ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L3_PROTO_IPV4 = 8, + + ENA_ETH_IO_L3_PROTO_IPV6 = 11, + + ENA_ETH_IO_L3_PROTO_FCOE = 21, + + ENA_ETH_IO_L3_PROTO_ROCE = 22, +}; + +enum ena_eth_io_l4_proto_index { + ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L4_PROTO_TCP = 12, + + ENA_ETH_IO_L4_PROTO_UDP = 13, + + ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, +}; + +struct ena_eth_io_tx_desc { + /* 15:0 : length - Buffer length in bytes, must + * include any packet trailers that the ENA supposed + * to update like End-to-End CRC, Authentication GMAC + * etc. This length must not include the + * 'Push_Buffer' length. This length must not include + * the 4-byte added in the end for 802.3 Ethernet FCS + * 21:16 : req_id_hi - Request ID[15:10] + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBZ + * 24 : phase + * 25 : reserved1 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 3:0 : l3_proto_idx - L3 protocol. This field + * required when l3_csum_en,l3_csum or tso_en are set. + * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and + * DF flags of the IPv4 header is 0. Otherwise must + * be set to 1 + * 6:5 : reserved5 + * 7 : tso_en - Enable TSO, For TCP only. + * 12:8 : l4_proto_idx - L4 protocol. This field need + * to be set when l4_csum_en or tso_en are set. + * 13 : l3_csum_en - enable IPv4 header checksum. + * 14 : l4_csum_en - enable TCP/UDP checksum. + * 15 : ethernet_fcs_dis - when set, the controller + * will not append the 802.3 Ethernet Frame Check + * Sequence to the packet + * 16 : reserved16 + * 17 : l4_csum_partial - L4 partial checksum. when + * set to 0, the ENA calculates the L4 checksum, + * where the Destination Address required for the + * TCP/UDP pseudo-header is taken from the actual + * packet L3 header. when set to 1, the ENA doesn't + * calculate the sum of the pseudo-header, instead, + * the checksum field of the L4 is used instead. When + * TSO enabled, the checksum of the pseudo-header + * must not include the tcp length field. L4 partial + * checksum should be used for IPv6 packet that + * contains Routing Headers. + * 20:18 : reserved18 - MBZ + * 21 : reserved21 - MBZ + * 31:22 : req_id_lo - Request ID[9:0] + */ + u32 meta_ctrl; + + u32 buff_addr_lo; + + /* address high and header size + * 15:0 : addr_hi - Buffer Pointer[47:32] + * 23:16 : reserved16_w2 + * 31:24 : header_length - Header length. For Low + * Latency Queues, this fields indicates the number + * of bytes written to the headers' memory. For + * normal queues, if packet is TCP or UDP, and longer + * than max_header_size, then this field should be + * set to the sum of L4 header offset and L4 header + * size(without options), otherwise, this field + * should be set to 0. For both modes, this field + * must not exceed the max_header_size. + * max_header_size value is reported by the Max + * Queues Feature descriptor + */ + u32 buff_addr_hi_hdr_sz; +}; + +struct ena_eth_io_tx_meta_desc { + /* 9:0 : req_id_lo - Request ID[9:0] + * 11:10 : reserved10 - MBZ + * 12 : reserved12 - MBZ + * 13 : reserved13 - MBZ + * 14 : ext_valid - if set, offset fields in Word2 + * are valid Also MSS High in Word 0 and bits [31:24] + * in Word 3 + * 15 : reserved15 + * 19:16 : mss_hi + * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1: + * Extended Metadata Descriptor + * 21 : meta_store - Store extended metadata in queue + * cache + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBO + * 24 : phase + * 25 : reserved25 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 5:0 : req_id_hi + * 31:6 : reserved6 - MBZ + */ + u32 word1; + + /* 7:0 : l3_hdr_len + * 15:8 : l3_hdr_off + * 21:16 : l4_hdr_len_in_words - counts the L4 header + * length in words. there is an explicit assumption + * that L4 header appears right after L3 header and + * L4 offset is based on l3_hdr_off+l3_hdr_len + * 31:22 : mss_lo + */ + u32 word2; + + u32 reserved; +}; + +struct ena_eth_io_tx_cdesc { + /* Request ID[15:0] */ + u16 req_id; + + u8 status; + + /* flags + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 sub_qid; + + u16 sq_head_idx; +}; + +struct ena_eth_io_rx_desc { + /* In bytes. 0 means 64KB */ + u16 length; + + /* MBZ */ + u8 reserved2; + + /* 0 : phase + * 1 : reserved1 - MBZ + * 2 : first - Indicates first descriptor in + * transaction + * 3 : last - Indicates last descriptor in transaction + * 4 : comp_req + * 5 : reserved5 - MBO + * 7:6 : reserved6 - MBZ + */ + u8 ctrl; + + u16 req_id; + + /* MBZ */ + u16 reserved6; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + /* MBZ */ + u16 reserved16_w3; +}; + +/* 4-word format Note: all ethernet parsing information are valid only when + * last=1 + */ +struct ena_eth_io_rx_cdesc_base { + /* 4:0 : l3_proto_idx + * 6:5 : src_vlan_cnt + * 7 : reserved7 - MBZ + * 12:8 : l4_proto_idx + * 13 : l3_csum_err - when set, either the L3 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l3_proto_idx indicates IPv4 packet + * 14 : l4_csum_err - when set, either the L4 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l4_proto_idx indicates TCP/UDP packet, and, + * ipv4_frag is not set + * 15 : ipv4_frag - Indicates IPv4 fragmented packet + * 23:16 : reserved16 + * 24 : phase + * 25 : l3_csum2 - second checksum engine result + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 29:28 : reserved28 + * 30 : buffer - 0: Metadata descriptor. 1: Buffer + * Descriptor was used + * 31 : reserved31 + */ + u32 status; + + u16 length; + + u16 req_id; + + /* 32-bit hash result */ + u32 hash; + + u16 sub_qid; + + u16 reserved; +}; + +/* 8-word format */ +struct ena_eth_io_rx_cdesc_ext { + struct ena_eth_io_rx_cdesc_base base; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + u16 reserved16; + + u32 reserved_w6; + + u32 reserved_w7; +}; + +struct ena_eth_io_intr_reg { + /* 14:0 : rx_intr_delay + * 29:15 : tx_intr_delay + * 30 : intr_unmask + * 31 : reserved + */ + u32 intr_control; +}; + +struct ena_eth_io_numa_node_cfg_reg { + /* 7:0 : numa + * 30:8 : reserved + * 31 : enabled + */ + u32 numa_cfg; +}; + +/* tx_desc */ +#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) +#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 +#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) +#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 +#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) +#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) + +/* tx_meta_desc */ +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) +#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 +#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) +#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) + +/* tx_cdesc */ +#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) + +/* rx_desc */ +#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) +#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 +#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) +#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 +#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) +#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 +#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) + +/* rx_cdesc_base */ +#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) + +/* intr_reg */ +#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) + +/* numa_node_cfg_reg */ +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) + +#endif /*_ENA_ETH_IO_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c new file mode 100644 index 000000000000..67b2338f8fb3 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -0,0 +1,895 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ena_netdev.h" + +struct ena_stats { + char name[ETH_GSTRING_LEN]; + int stat_offset; +}; + +#define ENA_STAT_ENA_COM_ENTRY(stat) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_com_stats_admin, stat) \ +} + +#define ENA_STAT_ENTRY(stat, stat_type) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ +} + +#define ENA_STAT_RX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, rx) + +#define ENA_STAT_TX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, tx) + +#define ENA_STAT_GLOBAL_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, dev) + +static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(tx_timeout), + ENA_STAT_GLOBAL_ENTRY(io_suspend), + ENA_STAT_GLOBAL_ENTRY(io_resume), + ENA_STAT_GLOBAL_ENTRY(wd_expired), + ENA_STAT_GLOBAL_ENTRY(interface_up), + ENA_STAT_GLOBAL_ENTRY(interface_down), + ENA_STAT_GLOBAL_ENTRY(admin_q_pause), +}; + +static const struct ena_stats ena_stats_tx_strings[] = { + ENA_STAT_TX_ENTRY(cnt), + ENA_STAT_TX_ENTRY(bytes), + ENA_STAT_TX_ENTRY(queue_stop), + ENA_STAT_TX_ENTRY(queue_wakeup), + ENA_STAT_TX_ENTRY(dma_mapping_err), + ENA_STAT_TX_ENTRY(linearize), + ENA_STAT_TX_ENTRY(linearize_failed), + ENA_STAT_TX_ENTRY(napi_comp), + ENA_STAT_TX_ENTRY(tx_poll), + ENA_STAT_TX_ENTRY(doorbells), + ENA_STAT_TX_ENTRY(prepare_ctx_err), + ENA_STAT_TX_ENTRY(missing_tx_comp), + ENA_STAT_TX_ENTRY(bad_req_id), +}; + +static const struct ena_stats ena_stats_rx_strings[] = { + ENA_STAT_RX_ENTRY(cnt), + ENA_STAT_RX_ENTRY(bytes), + ENA_STAT_RX_ENTRY(refil_partial), + ENA_STAT_RX_ENTRY(bad_csum), + ENA_STAT_RX_ENTRY(page_alloc_fail), + ENA_STAT_RX_ENTRY(skb_alloc_fail), + ENA_STAT_RX_ENTRY(dma_mapping_err), + ENA_STAT_RX_ENTRY(bad_desc_num), + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), +}; + +static const struct ena_stats ena_stats_ena_com_strings[] = { + ENA_STAT_ENA_COM_ENTRY(aborted_cmd), + ENA_STAT_ENA_COM_ENTRY(submitted_cmd), + ENA_STAT_ENA_COM_ENTRY(completed_cmd), + ENA_STAT_ENA_COM_ENTRY(out_of_space), + ENA_STAT_ENA_COM_ENTRY(no_completion), +}; + +#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) +#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) +#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) +#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings) + +static void ena_safe_update_stat(u64 *src, u64 *dst, + struct u64_stats_sync *syncp) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(syncp); + *(dst) = *src; + } while (u64_stats_fetch_retry_irq(syncp, start)); +} + +static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + struct ena_ring *ring; + + u64 *ptr; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + ring = &adapter->tx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->tx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + + /* Rx stats */ + ring = &adapter->rx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->rx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + } +} + +static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + u32 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats + + (uintptr_t)ena_stats->stat_offset); + + *(*data)++ = *ptr; + } +} + +static void ena_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + u64 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + ptr = (u64 *)((uintptr_t)&adapter->dev_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, data++, &adapter->syncp); + } + + ena_queue_stats(adapter, &data); + ena_dev_admin_queue_stats(adapter, &data); +} + +int ena_get_sset_count(struct net_device *netdev, int sset) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; +} + +static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) +{ + const struct ena_stats *ena_stats; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_tx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + /* Rx stats */ + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_rx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + } +} + +static void ena_com_dev_strings(u8 **data) +{ + const struct ena_stats *ena_stats; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + snprintf(*data, ETH_GSTRING_LEN, + "ena_admin_q_%s", ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } +} + +static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + int i; + + if (sset != ETH_SS_STATS) + return; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + memcpy(data, ena_stats->name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + ena_queue_strings(adapter, &data); + ena_com_dev_strings(&data); +} + +static int ena_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_admin_get_feature_link_desc *link; + struct ena_admin_get_feat_resp feat_resp; + int rc; + + rc = ena_com_get_link_params(ena_dev, &feat_resp); + if (rc) + return rc; + + link = &feat_resp.u.link; + link_ksettings->base.speed = link->speed; + + if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + } + + link_ksettings->base.autoneg = + (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + link_ksettings->base.duplex = DUPLEX_FULL; + + return 0; +} + +static int ena_get_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + coalesce->tx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / + ena_dev->intr_delay_resolution; + if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) { + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) + / ena_dev->intr_delay_resolution; + } else { + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval; + } + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); + + return 0; +} + +static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) +{ + unsigned int val; + int i; + + val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); + + for (i = 0; i < adapter->num_queues; i++) + adapter->tx_ring[i].smoothed_interval = val; +} + +static int ena_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + int rc; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + + if (coalesce->rx_coalesce_usecs_irq || + coalesce->rx_max_coalesced_frames_irq || + coalesce->tx_coalesce_usecs_irq || + coalesce->tx_max_coalesced_frames || + coalesce->tx_max_coalesced_frames_irq || + coalesce->stats_block_coalesce_usecs || + coalesce->use_adaptive_tx_coalesce || + coalesce->pkt_rate_low || + coalesce->tx_coalesce_usecs_low || + coalesce->tx_max_coalesced_frames_low || + coalesce->pkt_rate_high || + coalesce->tx_coalesce_usecs_high || + coalesce->tx_max_coalesced_frames_high || + coalesce->rate_sample_interval) + return -EINVAL; + + rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, + coalesce->tx_coalesce_usecs); + if (rc) + return rc; + + ena_update_tx_rings_intr_moderation(adapter); + + if (ena_com_get_adaptive_moderation_enabled(ena_dev)) { + if (!coalesce->use_adaptive_rx_coalesce) { + ena_com_disable_adaptive_moderation(ena_dev); + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } else { /* was in non-adaptive mode */ + if (coalesce->use_adaptive_rx_coalesce) { + ena_com_enable_adaptive_moderation(ena_dev); + } else { + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + + return 0; +} + +static u32 ena_get_msglevel(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ena_set_msglevel(struct net_device *netdev, u32 value) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = value; +} + +static void ena_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static void ena_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_ring *tx_ring = &adapter->tx_ring[0]; + struct ena_ring *rx_ring = &adapter->rx_ring[0]; + + ring->rx_max_pending = rx_ring->ring_size; + ring->tx_max_pending = tx_ring->ring_size; + ring->rx_pending = rx_ring->ring_size; + ring->tx_pending = tx_ring->ring_size; +} + +static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +{ + u32 data = 0; + + if (hash_fields & ENA_ADMIN_RSS_L2_DA) + data |= RXH_L2DA; + + if (hash_fields & ENA_ADMIN_RSS_L3_DA) + data |= RXH_IP_DST; + + if (hash_fields & ENA_ADMIN_RSS_L3_SA) + data |= RXH_IP_SRC; + + if (hash_fields & ENA_ADMIN_RSS_L4_DP) + data |= RXH_L4_B_2_3; + + if (hash_fields & ENA_ADMIN_RSS_L4_SP) + data |= RXH_L4_B_0_1; + + return data; +} + +static u16 ena_flow_data_to_flow_hash(u32 hash_fields) +{ + u16 data = 0; + + if (hash_fields & RXH_L2DA) + data |= ENA_ADMIN_RSS_L2_DA; + + if (hash_fields & RXH_IP_DST) + data |= ENA_ADMIN_RSS_L3_DA; + + if (hash_fields & RXH_IP_SRC) + data |= ENA_ADMIN_RSS_L3_SA; + + if (hash_fields & RXH_L4_B_2_3) + data |= ENA_ADMIN_RSS_L4_DP; + + if (hash_fields & RXH_L4_B_0_1) + data |= ENA_ADMIN_RSS_L4_SP; + + return data; +} + +static int ena_get_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + int rc; + + cmd->data = 0; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields); + if (rc) { + /* If device don't have permission, return unsupported */ + if (rc == -EPERM) + rc = -EOPNOTSUPP; + return rc; + } + + cmd->data = ena_flow_hash_to_flow_type(hash_fields); + + return 0; +} + +static int ena_set_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + hash_fields = ena_flow_data_to_flow_hash(cmd->data); + + return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields); +} + +static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_SRXFH: + rc = ena_set_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, + u32 *rules) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = adapter->num_queues; + rc = 0; + break; + case ETHTOOL_GRXFH: + rc = ena_get_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static u32 ena_get_rxfh_indir_size(struct net_device *netdev) +{ + return ENA_RX_RSS_TABLE_SIZE; +} + +static u32 ena_get_rxfh_key_size(struct net_device *netdev) +{ + return ENA_HASH_KEY_SIZE; +} + +static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + enum ena_admin_hash_functions ena_func; + u8 func; + int rc; + + rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + if (rc) + return rc; + + rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + if (rc) + return rc; + + switch (ena_func) { + case ENA_ADMIN_TOEPLITZ: + func = ETH_RSS_HASH_TOP; + break; + case ENA_ADMIN_CRC32: + func = ETH_RSS_HASH_XOR; + break; + default: + netif_err(adapter, drv, netdev, + "Command parameter is not supported\n"); + return -EOPNOTSUPP; + } + + if (hfunc) + *hfunc = func; + + return rc; +} + +static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + enum ena_admin_hash_functions func; + int rc, i; + + if (indir) { + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + rc = ena_com_indirect_table_fill_entry(ena_dev, + ENA_IO_RXQ_IDX(indir[i]), + i); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, + "Cannot fill indirect table (index is too large)\n"); + return rc; + } + } + + rc = ena_com_indirect_table_set(ena_dev); + if (rc) { + netif_err(adapter, drv, netdev, + "Cannot set indirect table\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + switch (hfunc) { + case ETH_RSS_HASH_TOP: + func = ENA_ADMIN_TOEPLITZ; + break; + case ETH_RSS_HASH_XOR: + func = ENA_ADMIN_CRC32; + break; + default: + netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n", + hfunc); + return -EOPNOTSUPP; + } + + if (key) { + rc = ena_com_fill_hash_function(ena_dev, func, key, + ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, "Cannot fill key\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + return 0; +} + +static void ena_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = ENA_MAX_NUM_IO_QUEUES; + channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->num_queues; + channels->tx_count = adapter->num_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ena_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = adapter->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ena_set_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + u32 len; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; + if (len > adapter->netdev->mtu) { + ret = -EINVAL; + break; + } + adapter->rx_copybreak = len; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct ethtool_ops ena_ethtool_ops = { + .get_link_ksettings = ena_get_link_ksettings, + .get_drvinfo = ena_get_drvinfo, + .get_msglevel = ena_get_msglevel, + .set_msglevel = ena_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = ena_get_coalesce, + .set_coalesce = ena_set_coalesce, + .get_ringparam = ena_get_ringparam, + .get_sset_count = ena_get_sset_count, + .get_strings = ena_get_strings, + .get_ethtool_stats = ena_get_ethtool_stats, + .get_rxnfc = ena_get_rxnfc, + .set_rxnfc = ena_set_rxnfc, + .get_rxfh_indir_size = ena_get_rxfh_indir_size, + .get_rxfh_key_size = ena_get_rxfh_key_size, + .get_rxfh = ena_get_rxfh, + .set_rxfh = ena_set_rxfh, + .get_channels = ena_get_channels, + .get_tunable = ena_get_tunable, + .set_tunable = ena_set_tunable, +}; + +void ena_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ena_ethtool_ops; +} + +static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf) +{ + struct net_device *netdev = adapter->netdev; + u8 *strings_buf; + u64 *data_buf; + int strings_num; + int i, rc; + + strings_num = ena_get_sset_count(netdev, ETH_SS_STATS); + if (strings_num <= 0) { + netif_err(adapter, drv, netdev, "Can't get stats num\n"); + return; + } + + strings_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * ETH_GSTRING_LEN, + GFP_ATOMIC); + if (!strings_buf) { + netif_err(adapter, drv, netdev, + "failed to alloc strings_buf\n"); + return; + } + + data_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * sizeof(u64), + GFP_ATOMIC); + if (!data_buf) { + netif_err(adapter, drv, netdev, + "failed to allocate data buf\n"); + devm_kfree(&adapter->pdev->dev, strings_buf); + return; + } + + ena_get_strings(netdev, ETH_SS_STATS, strings_buf); + ena_get_ethtool_stats(netdev, NULL, data_buf); + + /* If there is a buffer, dump stats, otherwise print them to dmesg */ + if (buf) + for (i = 0; i < strings_num; i++) { + rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64), + "%s %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + buf += rc; + } + else + for (i = 0; i < strings_num; i++) + netif_err(adapter, drv, netdev, "%s: %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + + devm_kfree(&adapter->pdev->dev, strings_buf); + devm_kfree(&adapter->pdev->dev, data_buf); +} + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf) +{ + if (!buf) + return; + + ena_dump_stats_ex(adapter, buf); +} + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter) +{ + ena_dump_stats_ex(adapter, NULL); +} diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c new file mode 100644 index 000000000000..bfeaec5bd7b9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -0,0 +1,3272 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_RFS_ACCEL +#include +#endif /* CONFIG_RFS_ACCEL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_netdev.h" +#include "ena_pci_id_tbl.h" + +static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (5 * HZ) + +#define ENA_NAPI_BUDGET 64 + +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ + NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static struct ena_aenq_handlers aenq_handlers; + +static struct workqueue_struct *ena_wq; + +MODULE_DEVICE_TABLE(pci, ena_pci_tbl); + +static int ena_rss_init_default(struct ena_adapter *adapter); + +static void ena_tx_timeout(struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.tx_timeout++; + u64_stats_update_end(&adapter->syncp); + + netif_err(adapter, tx_err, dev, "Transmit time out\n"); + + /* Change the state of the device to trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + +static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + adapter->rx_ring[i].mtu = mtu; +} + +static int ena_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int ret; + + if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { + netif_err(adapter, drv, dev, + "Invalid MTU setting. new_mtu: %d\n", new_mtu); + + return -EINVAL; + } + + ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); + if (!ret) { + netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); + update_rx_ring_mtu(adapter, new_mtu); + dev->mtu = new_mtu; + } else { + netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", + new_mtu); + } + + return ret; +} + +static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) +{ +#ifdef CONFIG_RFS_ACCEL + u32 i; + int rc; + + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + if (!adapter->netdev->rx_cpu_rmap) + return -ENOMEM; + for (i = 0; i < adapter->num_queues; i++) { + int irq_idx = ENA_IO_IRQ_IDX(i); + + rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, + adapter->msix_entries[irq_idx].vector); + if (rc) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + return rc; + } + } +#endif /* CONFIG_RFS_ACCEL */ + return 0; +} + +static void ena_init_io_rings_common(struct ena_adapter *adapter, + struct ena_ring *ring, u16 qid) +{ + ring->qid = qid; + ring->pdev = adapter->pdev; + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + ring->napi = &adapter->ena_napi[qid].napi; + ring->adapter = adapter; + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->per_napi_bytes = 0; + ring->cpu = 0; + u64_stats_init(&ring->syncp); +} + +static void ena_init_io_rings(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev; + struct ena_ring *txr, *rxr; + int i; + + ena_dev = adapter->ena_dev; + + for (i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_ring[i]; + rxr = &adapter->rx_ring[i]; + + /* TX/RX common ring state */ + ena_init_io_rings_common(adapter, txr, i); + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ + txr->ring_size = adapter->tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; + txr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ + rxr->ring_size = adapter->rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + } +} + +/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Return 0 on success, negative on failure + */ +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, i, node; + + if (tx_ring->tx_buffer_info) { + netif_err(adapter, ifup, + adapter->netdev, "tx_buffer_info info is not NULL"); + return -EEXIST; + } + + size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; + node = cpu_to_node(ena_irq->cpu); + + tx_ring->tx_buffer_info = vzalloc_node(size, node); + if (!tx_ring->tx_buffer_info) { + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + return -ENOMEM; + } + + size = sizeof(u16) * tx_ring->ring_size; + tx_ring->free_tx_ids = vzalloc_node(size, node); + if (!tx_ring->free_tx_ids) { + tx_ring->free_tx_ids = vzalloc(size); + if (!tx_ring->free_tx_ids) { + vfree(tx_ring->tx_buffer_info); + return -ENOMEM; + } + } + + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) + tx_ring->free_tx_ids[i] = i; + + /* Reset tx statistics */ + memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; + return 0; +} + +/* ena_free_tx_resources - Free I/O Tx Resources per Queue + * @adapter: network interface device structure + * @qid: queue index + * + * Free all transmit software resources + */ +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +} + +/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues + * @adapter: private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_tx_resources(adapter, i); + if (rc) + goto err_setup_tx; + } + + return 0; + +err_setup_tx: + + netif_err(adapter, ifup, adapter->netdev, + "Tx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_tx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_tx_resources(adapter, i); +} + +/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Returns 0 on success, negative on failure + */ +static int ena_setup_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, node; + + if (rx_ring->rx_buffer_info) { + netif_err(adapter, ifup, adapter->netdev, + "rx_buffer_info is not NULL"); + return -EEXIST; + } + + /* alloc extra element so in rx path + * we can always prefetch rx_info + 1 + */ + size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); + node = cpu_to_node(ena_irq->cpu); + + rx_ring->rx_buffer_info = vzalloc_node(size, node); + if (!rx_ring->rx_buffer_info) { + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + return -ENOMEM; + } + + /* Reset rx statistics */ + memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); + + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; + + return 0; +} + +/* ena_free_rx_resources - Free I/O Rx Resources + * @adapter: network interface device structure + * @qid: queue index + * + * Free all receive software resources + */ +static void ena_free_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; +} + +/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_rx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_rx_resources(adapter, i); + if (rc) + goto err_setup_rx; + } + + return 0; + +err_setup_rx: + + netif_err(adapter, ifup, adapter->netdev, + "Rx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_rx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + */ +static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_resources(adapter, i); +} + +static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, gfp_t gfp) +{ + struct ena_com_buf *ena_buf; + struct page *page; + dma_addr_t dma; + + /* if previous allocated page is not used */ + if (unlikely(rx_info->page)) + return 0; + + page = alloc_page(gfp); + if (unlikely(!page)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.page_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return -ENOMEM; + } + + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.dma_mapping_err++; + u64_stats_update_end(&rx_ring->syncp); + + __free_page(page); + return -EIO; + } + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "alloc page %p, rx_info %p\n", page, rx_info); + + rx_info->page = page; + rx_info->page_offset = 0; + ena_buf = &rx_info->ena_buf; + ena_buf->paddr = dma; + ena_buf->len = PAGE_SIZE; + + return 0; +} + +static void ena_free_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info) +{ + struct page *page = rx_info->page; + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + if (unlikely(!page)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "Trying to free unallocated buffer\n"); + return; + } + + dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + DMA_FROM_DEVICE); + + __free_page(page); + rx_info->page = NULL; +} + +static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) +{ + u16 next_to_use; + u32 i; + int rc; + + next_to_use = rx_ring->next_to_use; + + for (i = 0; i < num; i++) { + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[next_to_use]; + + rc = ena_alloc_rx_page(rx_ring, rx_info, + __GFP_COLD | GFP_ATOMIC | __GFP_COMP); + if (unlikely(rc < 0)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "failed to alloc buffer for rx queue %d\n", + rx_ring->qid); + break; + } + rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, + &rx_info->ena_buf, + next_to_use); + if (unlikely(rc)) { + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "failed to add buffer for rx queue %d\n", + rx_ring->qid); + break; + } + next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, + rx_ring->ring_size); + } + + if (unlikely(i < num)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.refil_partial++; + u64_stats_update_end(&rx_ring->syncp); + netdev_warn(rx_ring->netdev, + "refilled rx qid %d with only %d buffers (from %d)\n", + rx_ring->qid, i, num); + } + + if (likely(i)) { + /* Add memory barrier to make sure the desc were written before + * issue a doorbell + */ + wmb(); + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); + } + + rx_ring->next_to_use = next_to_use; + + return i; +} + +static void ena_free_rx_bufs(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + u32 i; + + for (i = 0; i < rx_ring->ring_size; i++) { + struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; + + if (rx_info->page) + ena_free_rx_page(rx_ring, rx_info); + } +} + +/* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure + * + */ +static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, rc, bufs_num; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + bufs_num = rx_ring->ring_size - 1; + rc = ena_refill_rx_bufs(rx_ring, bufs_num); + + if (unlikely(rc != bufs_num)) + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "refilling Queue %d failed. allocated %d buffers from: %d\n", + i, rc, bufs_num); + } +} + +static void ena_free_all_rx_bufs(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_bufs(adapter, i); +} + +/* ena_free_tx_bufs - Free Tx Buffers per Queue + * @tx_ring: TX ring for which buffers be freed + */ +static void ena_free_tx_bufs(struct ena_ring *tx_ring) +{ + u32 i; + + for (i = 0; i < tx_ring->ring_size; i++) { + struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; + struct ena_com_buf *ena_buf; + int nr_frags; + int j; + + if (!tx_info->skb) + continue; + + netdev_notice(tx_ring->netdev, + "free uncompleted tx skb qid %d idx 0x%x\n", + tx_ring->qid, i); + + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (j = 0; j < nr_frags; j++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + } + + dev_kfree_skb_any(tx_info->skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); +} + +static void ena_free_all_tx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *tx_ring; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + ena_free_tx_bufs(tx_ring); + } +} + +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_TXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_RXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_io_queues(struct ena_adapter *adapter) +{ + ena_destroy_all_tx_queues(adapter); + ena_destroy_all_rx_queues(adapter); +} + +static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; + + if (likely(req_id < tx_ring->ring_size)) { + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; + } + + if (tx_info) + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_info doesn't have valid skb\n"); + else + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "Invalid req_id: %hu\n", req_id); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.bad_req_id++; + u64_stats_update_end(&tx_ring->syncp); + + /* Trigger device reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); + return -EFAULT; +} + +static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) +{ + struct netdev_queue *txq; + bool above_thresh; + u32 tx_bytes = 0; + u32 total_done = 0; + u16 next_to_clean; + u16 req_id; + int tx_pkts = 0; + int rc; + + next_to_clean = tx_ring->next_to_clean; + txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct sk_buff *skb; + struct ena_com_buf *ena_buf; + int i, nr_frags; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_tx_req_id(tx_ring, req_id); + if (rc) + break; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + skb = tx_info->skb; + + /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ + prefetch(&skb->end); + + tx_info->skb = NULL; + tx_info->last_jiffies = 0; + + if (likely(tx_info->num_of_bufs != 0)) { + ena_buf = tx_info->bufs; + + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (i = 0; i < nr_frags; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + } + } + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d skb %p completed\n", tx_ring->qid, + skb); + + tx_bytes += skb->len; + dev_kfree_skb(skb); + tx_pkts++; + total_done += tx_info->tx_descs; + + tx_ring->free_tx_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + } + + tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); + + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + tx_ring->qid, tx_pkts); + + /* need to make the rings circular update visible to + * ena_start_xmit() before checking for netif_queue_stopped(). + */ + smp_mb(); + + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { + __netif_tx_lock(txq, smp_processor_id()); + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (netif_tx_queue_stopped(txq) && above_thresh) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + __netif_tx_unlock(txq); + } + + tx_ring->per_napi_bytes += tx_bytes; + tx_ring->per_napi_packets += tx_pkts; + + return tx_pkts; +} + +static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + struct ena_com_rx_buf_info *ena_bufs, + u32 descs, + u16 *next_to_clean) +{ + struct sk_buff *skb; + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[*next_to_clean]; + u32 len; + u32 buf = 0; + void *va; + + len = ena_bufs[0].len; + if (unlikely(!rx_info->page)) { + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Page is NULL\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_info %p page %p\n", + rx_info, rx_info->page); + + /* save virt address of first buffer */ + va = page_address(rx_info->page) + rx_info->page_offset; + prefetch(va + NET_IP_ALIGN); + + if (len <= rx_ring->rx_copybreak) { + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_copybreak); + if (unlikely(!skb)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Failed to allocate skb\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx allocated small packet. len %d. data_len %d\n", + skb->len, skb->data_len); + + /* sync this buffer for CPU use */ + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, va, len); + dma_sync_single_for_device(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, + rx_ring->ring_size); + return skb; + } + + skb = napi_get_frags(rx_ring->napi); + if (unlikely(!skb)) { + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "Failed allocating skb\n"); + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return NULL; + } + + do { + dma_unmap_page(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + PAGE_SIZE, DMA_FROM_DEVICE); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + rx_info->page_offset, len, PAGE_SIZE); + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx skb updated. len %d. data_len %d\n", + skb->len, skb->data_len); + + rx_info->page = NULL; + *next_to_clean = + ENA_RX_RING_IDX_NEXT(*next_to_clean, + rx_ring->ring_size); + if (likely(--descs == 0)) + break; + rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; + len = ena_bufs[++buf].len; + } while (1); + + return skb; +} + +/* ena_rx_checksum - indicate in skb if hw indicated a good cksum + * @adapter: structure containing adapter specific data + * @ena_rx_ctx: received packet context/metadata + * @skb: skb currently being received and modified + */ +static inline void ena_rx_checksum(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + /* Rx csum disabled */ + if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* For fragmented packets the checksum isn't valid */ + if (ena_rx_ctx->frag) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* if IP and error */ + if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && + (ena_rx_ctx->l3_csum_err))) { + /* ipv4 checksum error */ + skb->ip_summed = CHECKSUM_NONE; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX IPv4 header checksum error\n"); + return; + } + + /* if TCP/UDP */ + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { + if (unlikely(ena_rx_ctx->l4_csum_err)) { + /* TCP/UDP checksum error */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX L4 checksum error\n"); + skb->ip_summed = CHECKSUM_NONE; + return; + } + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +static void ena_set_rx_hash(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + enum pkt_hash_types hash_type; + + if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) + + hash_type = PKT_HASH_TYPE_L4; + else + hash_type = PKT_HASH_TYPE_NONE; + + /* Override hash type if the packet is fragmented */ + if (ena_rx_ctx->frag) + hash_type = PKT_HASH_TYPE_NONE; + + skb_set_hash(skb, ena_rx_ctx->hash, hash_type); + } +} + +/* ena_clean_rx_irq - Cleanup RX irq + * @rx_ring: RX ring to clean + * @napi: napi handler + * @budget: how many packets driver is allowed to clean + * + * Returns the number of cleaned buffers. + */ +static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + u32 budget) +{ + u16 next_to_clean = rx_ring->next_to_clean; + u32 res_budget, work_done; + + struct ena_com_rx_ctx ena_rx_ctx; + struct ena_adapter *adapter; + struct sk_buff *skb; + int refill_required; + int refill_threshold; + int rc = 0; + int total_len = 0; + int rx_copybreak_pkt = 0; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "%s qid %d\n", __func__, rx_ring->qid); + res_budget = budget; + + do { + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; + ena_rx_ctx.descs = 0; + rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, + rx_ring->ena_com_io_sq, + &ena_rx_ctx); + if (unlikely(rc)) + goto error; + + if (unlikely(ena_rx_ctx.descs == 0)) + break; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", + rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + + /* allocate skb and fill it */ + skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, + &next_to_clean); + + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean, + ena_rx_ctx.descs, + rx_ring->ring_size); + break; + } + + ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); + + ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); + + skb_record_rx_queue(skb, rx_ring->qid); + + if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { + total_len += rx_ring->ena_bufs[0].len; + rx_copybreak_pkt++; + napi_gro_receive(napi, skb); + } else { + total_len += skb->len; + napi_gro_frags(napi); + } + + res_budget--; + } while (likely(res_budget)); + + work_done = budget - res_budget; + rx_ring->per_napi_bytes += total_len; + rx_ring->per_napi_packets += work_done; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bytes += total_len; + rx_ring->rx_stats.cnt += work_done; + rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; + u64_stats_update_end(&rx_ring->syncp); + + rx_ring->next_to_clean = next_to_clean; + + refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + + /* Optimization, try to batch new rx buffers */ + if (refill_required > refill_threshold) { + ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + ena_refill_rx_bufs(rx_ring, refill_required); + } + + return work_done; + +error: + adapter = netdev_priv(rx_ring->netdev); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_desc_num++; + u64_stats_update_end(&rx_ring->syncp); + + /* Too many desc from the device. Trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + + return 0; +} + +inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, + struct ena_ring *tx_ring) +{ + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + ena_com_calculate_interrupt_delay(rx_ring->ena_dev, + rx_ring->per_napi_packets, + rx_ring->per_napi_bytes, + &rx_ring->smoothed_interval, + &rx_ring->moder_tbl_idx); + + /* Reset per napi packets/bytes */ + tx_ring->per_napi_packets = 0; + tx_ring->per_napi_bytes = 0; + rx_ring->per_napi_packets = 0; + rx_ring->per_napi_bytes = 0; +} + +static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + int cpu = get_cpu(); + int numa_node; + + /* Check only one ring since the 2 rings are running on the same cpu */ + if (likely(tx_ring->cpu == cpu)) + goto out; + + numa_node = cpu_to_node(cpu); + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); + ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } + + tx_ring->cpu = cpu; + rx_ring->cpu = cpu; + + return; +out: + put_cpu(); +} + +static int ena_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring, *rx_ring; + struct ena_eth_io_intr_reg intr_reg; + + u32 tx_work_done; + u32 rx_work_done; + int tx_budget; + int napi_comp_call = 0; + int ret; + + tx_ring = ena_napi->tx_ring; + rx_ring = ena_napi->rx_ring; + + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; + + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { + napi_complete_done(napi, rx_work_done); + + napi_comp_call = 1; + /* Tx and Rx share the same interrupt vector */ + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_intr_moderation(rx_ring, tx_ring); + + /* Update intr register: rx intr delay, tx intr delay and + * interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. Tx and Rx CQ have pointer to it. + * So we use one of them to reach the intr reg + */ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + + ena_update_ring_numa_node(tx_ring, rx_ring); + + ret = rx_work_done; + } else { + ret = budget; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.napi_comp += napi_comp_call; + tx_ring->tx_stats.tx_poll++; + u64_stats_update_end(&tx_ring->syncp); + + return ret; +} + +static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + ena_com_admin_q_comp_intr_handler(adapter->ena_dev); + + /* Don't call the aenq handler before probe is done */ + if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) + ena_com_aenq_intr_handler(adapter->ena_dev, data); + + return IRQ_HANDLED; +} + +/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx + * @irq: interrupt number + * @data: pointer to a network interface private napi device structure + */ +static irqreturn_t ena_intr_msix_io(int irq, void *data) +{ + struct ena_napi *ena_napi = data; + + napi_schedule(&ena_napi->napi); + + return IRQ_HANDLED; +} + +static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +{ + int i, msix_vecs, rc; + + if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, probe, adapter->netdev, + "Error, MSI-X is already enabled\n"); + return -EPERM; + } + + /* Reserved the max msix vectors we might need */ + msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + + netif_dbg(adapter, probe, adapter->netdev, + "trying to enable MSI-X, vectors %d\n", msix_vecs); + + adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry)); + + if (!adapter->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_vecs; i++) + adapter->msix_entries[i].entry = i; + + rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs); + if (rc != 0) { + netif_err(adapter, probe, adapter->netdev, + "Failed to enable MSI-X, vectors %d rc %d\n", + msix_vecs, rc); + return -ENOSPC; + } + + netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n", + msix_vecs); + + if (msix_vecs >= 1) { + if (ena_init_rx_cpu_rmap(adapter)) + netif_warn(adapter, probe, adapter->netdev, + "Failed to map IRQs to CPUs\n"); + } + + adapter->msix_vecs = msix_vecs; + set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); + + return 0; +} + +static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) +{ + u32 cpu; + + snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, + ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", + pci_name(adapter->pdev)); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = + ena_intr_msix_mgmnt; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = + adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; + cpu = cpumask_first(cpu_online_mask); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; + cpumask_set_cpu(cpu, + &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); +} + +static void ena_setup_io_intr(struct ena_adapter *adapter) +{ + struct net_device *netdev; + int irq_idx, i, cpu; + + netdev = adapter->netdev; + + for (i = 0; i < adapter->num_queues; i++) { + irq_idx = ENA_IO_IRQ_IDX(i); + cpu = i % num_online_cpus(); + + snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, + "%s-Tx-Rx-%d", netdev->name, i); + adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; + adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; + adapter->irq_tbl[irq_idx].vector = + adapter->msix_entries[irq_idx].vector; + adapter->irq_tbl[irq_idx].cpu = cpu; + + cpumask_set_cpu(cpu, + &adapter->irq_tbl[irq_idx].affinity_hint_mask); + } +} + +static int ena_request_mgmnt_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, probe, adapter->netdev, + "failed to request admin irq\n"); + return rc; + } + + netif_dbg(adapter, probe, adapter->netdev, + "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", + irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return rc; +} + +static int ena_request_io_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc = 0, i, k; + + if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ: MSI-X is not enabled\n"); + return -EINVAL; + } + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ. index %d rc %d\n", + i, rc); + goto err; + } + + netif_dbg(adapter, ifup, adapter->netdev, + "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", + i, irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + } + + return rc; + +err: + for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { + irq = &adapter->irq_tbl[k]; + free_irq(irq->vector, irq->data); + } + + return rc; +} + +static void ena_free_mgmnt_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + synchronize_irq(irq->vector); + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static void ena_free_io_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + int i; + +#ifdef CONFIG_RFS_ACCEL + if (adapter->msix_vecs >= 1) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); + } +} + +static void ena_disable_msix(struct ena_adapter *adapter) +{ + if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) + pci_disable_msix(adapter->pdev); + + if (adapter->msix_entries) + vfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +static void ena_disable_io_intr_sync(struct ena_adapter *adapter) +{ + int i; + + if (!netif_running(adapter->netdev)) + return; + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + synchronize_irq(adapter->irq_tbl[i].vector); +} + +static void ena_del_napi(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + netif_napi_del(&adapter->ena_napi[i].napi); +} + +static void ena_init_napi(struct ena_adapter *adapter) +{ + struct ena_napi *napi; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + napi = &adapter->ena_napi[i]; + + netif_napi_add(adapter->netdev, + &adapter->ena_napi[i].napi, + ena_io_poll, + ENA_NAPI_BUDGET); + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + napi->qid = i; + } +} + +static void ena_napi_disable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_disable(&adapter->ena_napi[i].napi); +} + +static void ena_napi_enable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_enable(&adapter->ena_napi[i].napi); +} + +static void ena_restore_ethtool_params(struct ena_adapter *adapter) +{ + adapter->tx_usecs = 0; + adapter->rx_usecs = 0; + adapter->tx_frames = 1; + adapter->rx_frames = 1; +} + +/* Configure the Rx forwarding */ +static int ena_rss_configure(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc; + + /* In case the RSS table wasn't initialized by probe */ + if (!ena_dev->rss.tbl_log_size) { + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to init RSS rc: %d\n", rc); + return rc; + } + } + + /* Set indirect table */ + rc = ena_com_indirect_table_set(ena_dev); + if (unlikely(rc && rc != -EPERM)) + return rc; + + /* Configure hash function (if supported) */ + rc = ena_com_set_hash_function(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + /* Configure hash inputs (if supported) */ + rc = ena_com_set_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + return 0; +} + +static int ena_up_complete(struct ena_adapter *adapter) +{ + int rc, i; + + rc = ena_rss_configure(adapter); + if (rc) + return rc; + + ena_init_napi(adapter); + + ena_change_mtu(adapter->netdev, adapter->netdev->mtu); + + ena_refill_all_rx_bufs(adapter); + + /* enable transmits */ + netif_tx_start_all_queues(adapter->netdev); + + ena_restore_ethtool_params(adapter); + + ena_napi_enable_all(adapter); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + + return 0; +} + +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_dev *ena_dev; + struct ena_ring *tx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + tx_ring = &adapter->tx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_TXQ_IDX(qid); + + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->tx_ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O TX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &tx_ring->ena_com_io_sq, + &tx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get TX queue handlers. TX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); + return rc; +} + +static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_tx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); + + return rc; +} + +static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_dev *ena_dev; + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_ring *rx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + rx_ring = &adapter->rx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_RXQ_IDX(qid); + + ctx.qid = ena_qid; + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->rx_ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O RX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &rx_ring->ena_com_io_sq, + &rx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get RX queue handlers. RX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); + + return rc; +} + +static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_rx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); + + return rc; +} + +static int ena_up(struct ena_adapter *adapter) +{ + int rc; + + netdev_dbg(adapter->netdev, "%s\n", __func__); + + ena_setup_io_intr(adapter); + + rc = ena_request_io_irq(adapter); + if (rc) + goto err_req_irq; + + /* allocate transmit descriptors */ + rc = ena_setup_all_tx_resources(adapter); + if (rc) + goto err_setup_tx; + + /* allocate receive descriptors */ + rc = ena_setup_all_rx_resources(adapter); + if (rc) + goto err_setup_rx; + + /* Create TX queues */ + rc = ena_create_all_io_tx_queues(adapter); + if (rc) + goto err_create_tx_queues; + + /* Create RX queues */ + rc = ena_create_all_io_rx_queues(adapter); + if (rc) + goto err_create_rx_queues; + + rc = ena_up_complete(adapter); + if (rc) + goto err_up; + + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_up++; + u64_stats_update_end(&adapter->syncp); + + set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + return rc; + +err_up: + ena_destroy_all_rx_queues(adapter); +err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +err_create_tx_queues: + ena_free_all_io_rx_resources(adapter); +err_setup_rx: + ena_free_all_io_tx_resources(adapter); +err_setup_tx: + ena_free_io_irq(adapter); +err_req_irq: + + return rc; +} + +static void ena_down(struct ena_adapter *adapter) +{ + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); + + clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_down++; + u64_stats_update_end(&adapter->syncp); + + /* After this point the napi handler won't enable the tx queue */ + ena_napi_disable_all(adapter); + netif_carrier_off(adapter->netdev); + netif_tx_disable(adapter->netdev); + + /* After destroy the queue there won't be any new interrupts */ + ena_destroy_all_io_queues(adapter); + + ena_disable_io_intr_sync(adapter); + ena_free_io_irq(adapter); + ena_del_napi(adapter); + + ena_free_all_tx_bufs(adapter); + ena_free_all_rx_bufs(adapter); + ena_free_all_io_tx_resources(adapter); + ena_free_all_io_rx_resources(adapter); +} + +/* ena_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + */ +static int ena_open(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc; + + /* Notify the stack of the actual queue counts. */ + rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); + return rc; + } + + rc = ena_up(adapter); + if (rc) + return rc; + + return rc; +} + +/* ena_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + */ +static int ena_close(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); + + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + + return 0; +} + +static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) +{ + u32 mss = skb_shinfo(skb)->gso_size; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + u8 l4_protocol = 0; + + if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { + ena_tx_ctx->l4_csum_enable = 1; + if (mss) { + ena_tx_ctx->tso_enable = 1; + ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; + ena_tx_ctx->l4_csum_partial = 0; + } else { + ena_tx_ctx->tso_enable = 0; + ena_meta->l4_hdr_len = 0; + ena_tx_ctx->l4_csum_partial = 1; + } + + switch (ip_hdr(skb)->version) { + case IPVERSION: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; + if (ip_hdr(skb)->frag_off & htons(IP_DF)) + ena_tx_ctx->df = 1; + if (mss) + ena_tx_ctx->l3_csum_enable = 1; + l4_protocol = ip_hdr(skb)->protocol; + break; + case 6: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; + l4_protocol = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + if (l4_protocol == IPPROTO_TCP) + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; + else + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; + + ena_meta->mss = mss; + ena_meta->l3_hdr_len = skb_network_header_len(skb); + ena_meta->l3_hdr_offset = skb_network_offset(skb); + ena_tx_ctx->meta_valid = 1; + + } else { + ena_tx_ctx->meta_valid = 0; + } +} + +static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, + struct sk_buff *skb) +{ + int num_frags, header_len, rc; + + num_frags = skb_shinfo(skb)->nr_frags; + header_len = skb_headlen(skb); + + if (num_frags < tx_ring->sgl_size) + return 0; + + if ((num_frags == tx_ring->sgl_size) && + (header_len < tx_ring->tx_max_header_size)) + return 0; + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize++; + u64_stats_update_end(&tx_ring->syncp); + + rc = skb_linearize(skb); + if (unlikely(rc)) { + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize_failed++; + u64_stats_update_end(&tx_ring->syncp); + } + + return rc; +} + +/* Called with netif_tx_lock. */ +static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_tx_buffer *tx_info; + struct ena_com_tx_ctx ena_tx_ctx; + struct ena_ring *tx_ring; + struct netdev_queue *txq; + struct ena_com_buf *ena_buf; + void *push_hdr; + u32 len, last_frag; + u16 next_to_use; + u16 req_id; + u16 push_len; + u16 header_len; + dma_addr_t dma; + int qid, rc, nb_hw_desc; + int i = -1; + + netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); + /* Determine which tx ring we will be placed on */ + qid = skb_get_queue_mapping(skb); + tx_ring = &adapter->tx_ring[qid]; + txq = netdev_get_tx_queue(dev, qid); + + rc = ena_check_and_linearize_skb(tx_ring, skb); + if (unlikely(rc)) + goto error_drop_packet; + + skb_tx_timestamp(skb); + len = skb_headlen(skb); + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_tx_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); + ena_buf = tx_info->bufs; + tx_info->skb = skb; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* prepared the push buffer */ + push_len = min_t(u32, len, tx_ring->tx_max_header_size); + header_len = push_len; + push_hdr = skb->data; + } else { + push_len = 0; + header_len = min_t(u32, len, tx_ring->tx_max_header_size); + push_hdr = NULL; + } + + netif_dbg(adapter, tx_queued, dev, + "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, + push_hdr, push_len); + + if (len > push_len) { + dma = dma_map_single(tx_ring->dev, skb->data + push_len, + len - push_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len - push_len; + + ena_buf++; + tx_info->num_of_bufs++; + } + + last_frag = skb_shinfo(skb)->nr_frags; + + for (i = 0; i < last_frag; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len; + ena_buf++; + } + + tx_info->num_of_bufs += last_frag; + + memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = header_len; + + /* set flags and meta data */ + ena_tx_csum(&ena_tx_ctx, skb); + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + tx_ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&tx_ring->syncp); + netif_tx_stop_queue(txq); + goto error_unmap_dma; + } + + netdev_tx_sent_queue(txq, skb->len); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.cnt++; + tx_ring->tx_stats.bytes += skb->len; + u64_stats_update_end(&tx_ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + + tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + tx_ring->ring_size); + + /* This WMB is aimed to: + * 1 - perform smp barrier before reading next_to_completion + * 2 - make sure the desc were written before trigger DB + */ + wmb(); + + /* stop the queue when no more space available, the packet can have up + * to sgl_size + 2. one for the meta descriptor and one for header + * (if the header is larger than tx_max_header_size). + */ + if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < + (tx_ring->sgl_size + 2))) { + netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", + __func__, qid); + + netif_tx_stop_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + u64_stats_update_end(&tx_ring->syncp); + + /* There is a rare condition where this function decide to + * stop the queue but meanwhile clean_tx_irq updates + * next_to_completion and terminates. + * The queue will remain stopped forever. + * To solve this issue this function perform rmb, check + * the wakeup condition and wake up the queue if needed. + */ + smp_rmb(); + + if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) + > ENA_TX_WAKEUP_THRESH) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + } + + if (netif_xmit_stopped(txq) || !skb->xmit_more) { + /* trigger the dma engine */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.doorbells++; + u64_stats_update_end(&tx_ring->syncp); + } + + return NETDEV_TX_OK; + +error_report_dma_error: + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&tx_ring->syncp); + netdev_warn(adapter->netdev, "failed to map skb\n"); + + tx_info->skb = NULL; + +error_unmap_dma: + if (i >= 0) { + /* save value of frag that failed */ + last_frag = i; + + /* start back at beginning and unmap skb */ + tx_info->skb = NULL; + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + for (i = 0; i < last_frag; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + } + } + +error_drop_packet: + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ena_netpoll(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + u16 qid; + /* we suspect that this is good for in--kernel network services that + * want to loop incoming skb rx to tx in normal user generated traffic, + * most probably we will not get to this + */ + if (skb_rx_queue_recorded(skb)) + qid = skb_get_rx_queue(skb); + else + qid = fallback(dev, skb); + + return qid; +} + +static void ena_config_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_admin_host_info *host_info; + int rc; + + /* Allocate only the host info */ + rc = ena_com_allocate_host_info(ena_dev); + if (rc) { + pr_err("Cannot allocate host info\n"); + return; + } + + host_info = ena_dev->host_attr.host_info; + + host_info->os_type = ENA_ADMIN_OS_LINUX; + host_info->kernel_ver = LINUX_VERSION_CODE; + strncpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str) - 1); + host_info->os_dist = 0; + strncpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str) - 1); + host_info->driver_version = + (DRV_MODULE_VER_MAJOR) | + (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | + (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); + + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { + if (rc == -EPERM) + pr_warn("Cannot set host attributes\n"); + else + pr_err("Cannot set host attributes\n"); + + goto err; + } + + return; + +err: + ena_com_delete_host_info(ena_dev); +} + +static void ena_config_debug_area(struct ena_adapter *adapter) +{ + u32 debug_area_size; + int rc, ss_count; + + ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); + if (ss_count <= 0) { + netif_err(adapter, drv, adapter->netdev, + "SS count is negative\n"); + return; + } + + /* allocate 32 bytes for each string and 64bit for the value */ + debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; + + rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); + if (rc) { + pr_err("Cannot allocate debug area\n"); + return; + } + + rc = ena_com_set_host_attributes(adapter->ena_dev); + if (rc) { + if (rc == -EPERM) + netif_warn(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + else + netif_err(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + goto err; + } + + return; +err: + ena_com_delete_debug_area(adapter->ena_dev); +} + +static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_admin_basic_stats ena_stats; + int rc; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return NULL; + + rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); + if (rc) + return NULL; + + stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) | + ena_stats.tx_bytes_low; + stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) | + ena_stats.rx_bytes_low; + + stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) | + ena_stats.rx_pkts_low; + stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) | + ena_stats.tx_pkts_low; + + stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) | + ena_stats.rx_drops_low; + + stats->multicast = 0; + stats->collisions = 0; + + stats->rx_length_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_window_errors = 0; + + stats->rx_errors = 0; + stats->tx_errors = 0; + + return stats; +} + +static const struct net_device_ops ena_netdev_ops = { + .ndo_open = ena_open, + .ndo_stop = ena_close, + .ndo_start_xmit = ena_start_xmit, + .ndo_select_queue = ena_select_queue, + .ndo_get_stats64 = ena_get_stats64, + .ndo_tx_timeout = ena_tx_timeout, + .ndo_change_mtu = ena_change_mtu, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ena_netpoll, +#endif /* CONFIG_NET_POLL_CONTROLLER */ +}; + +static void ena_device_io_suspend(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, suspend_io_task); + struct net_device *netdev = adapter->netdev; + + /* ena_napi_disable_all disables only the IO handling. + * We are still subject to AENQ keep alive watchdog. + */ + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_suspend++; + u64_stats_update_begin(&adapter->syncp); + ena_napi_disable_all(adapter); + netif_tx_lock(netdev); + netif_device_detach(netdev); + netif_tx_unlock(netdev); +} + +static void ena_device_io_resume(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, resume_io_task); + struct net_device *netdev = adapter->netdev; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_resume++; + u64_stats_update_end(&adapter->syncp); + + netif_device_attach(netdev); + ena_napi_enable_all(adapter); +} + +static int ena_device_validate_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, + adapter->mac_addr); + if (!rc) { + netif_err(adapter, drv, netdev, + "Error, mac address are different\n"); + return -EINVAL; + } + + if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || + (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { + netif_err(adapter, drv, netdev, + "Error, device doesn't support enough queues\n"); + return -EINVAL; + } + + if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { + netif_err(adapter, drv, netdev, + "Error, device max mtu is smaller than netdev MTU\n"); + return -EINVAL; + } + + return 0; +} + +static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, + struct ena_com_dev_get_features_ctx *get_feat_ctx, + bool *wd_state) +{ + struct device *dev = &pdev->dev; + bool readless_supported; + u32 aenq_groups; + int dma_width; + int rc; + + rc = ena_com_mmio_reg_read_request_init(ena_dev); + if (rc) { + dev_err(dev, "failed to init mmio read less\n"); + return rc; + } + + /* The PCIe configuration space revision id indicate if mmio reg + * read is disabled + */ + readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); + ena_com_set_mmio_read_mode(ena_dev, readless_supported); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(dev, "Can not reset device\n"); + goto err_mmio_read_less; + } + + rc = ena_com_validate_version(ena_dev); + if (rc) { + dev_err(dev, "device version is too low\n"); + goto err_mmio_read_less; + } + + dma_width = ena_com_get_dma_width(ena_dev); + if (dma_width < 0) { + dev_err(dev, "Invalid dma width value %d", dma_width); + rc = dma_width; + goto err_mmio_read_less; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); + goto err_mmio_read_less; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", + rc); + goto err_mmio_read_less; + } + + /* ENA admin level init */ + rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); + if (rc) { + dev_err(dev, + "Can not initialize ena admin queue with device\n"); + goto err_mmio_read_less; + } + + /* To enable the msix interrupts the driver needs to know the number + * of queues. So the driver uses polling mode to retrieve this + * information + */ + ena_com_set_admin_polling_mode(ena_dev, true); + + /* Get Device Attributes*/ + rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); + if (rc) { + dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); + goto err_admin_init; + } + + /* Try to turn all the available aenq groups */ + aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | + BIT(ENA_ADMIN_FATAL_ERROR) | + BIT(ENA_ADMIN_WARNING) | + BIT(ENA_ADMIN_NOTIFICATION) | + BIT(ENA_ADMIN_KEEP_ALIVE); + + aenq_groups &= get_feat_ctx->aenq.supported_groups; + + rc = ena_com_set_aenq_config(ena_dev, aenq_groups); + if (rc) { + dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); + goto err_admin_init; + } + + *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); + + ena_config_host_info(ena_dev); + + return 0; + +err_admin_init: + ena_com_admin_destroy(ena_dev); +err_mmio_read_less: + ena_com_mmio_reg_read_request_destroy(ena_dev); + + return rc; +} + +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, + int io_vectors) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc; + + rc = ena_enable_msix(adapter, io_vectors); + if (rc) { + dev_err(dev, "Can not reserve msix vectors\n"); + return rc; + } + + ena_setup_mgmnt_intr(adapter); + + rc = ena_request_mgmnt_irq(adapter); + if (rc) { + dev_err(dev, "Can not setup management interrupts\n"); + goto err_disable_msix; + } + + ena_com_set_admin_polling_mode(ena_dev, false); + + ena_com_admin_aenq_enable(ena_dev); + + return 0; + +err_disable_msix: + ena_disable_msix(adapter); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct net_device *netdev = adapter->netdev; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool dev_up, wd_state; + int rc; + + del_timer_sync(&adapter->timer_service); + + rtnl_lock(); + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_com_set_admin_running_state(ena_dev, false); + + /* After calling ena_close the tx queues and the napi + * are disabled so no one can interfere or touch the + * data structures + */ + ena_close(netdev); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(&pdev->dev, "Device reset failed\n"); + goto err; + } + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + /* Finish with the destroy part. Start the init part */ + + rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "Can not initialize device\n"); + goto err; + } + adapter->wd_state = wd_state; + + rc = ena_device_validate_params(adapter, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Validation of device parameters failed\n"); + goto err_device_destroy; + } + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, + adapter->num_queues); + if (rc) { + dev_err(&pdev->dev, "Enable MSI-X failed\n"); + goto err_device_destroy; + } + /* If the interface was up before the reset bring it up */ + if (dev_up) { + rc = ena_up(adapter); + if (rc) { + dev_err(&pdev->dev, "Failed to create I/O queues\n"); + goto err_disable_msix; + } + } + + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + + rtnl_unlock(); + + dev_err(&pdev->dev, "Device reset completed successfully\n"); + + return; +err_disable_msix: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_device_destroy: + ena_com_admin_destroy(ena_dev); +err: + rtnl_unlock(); + + dev_err(&pdev->dev, + "Reset attempt failed. Can not reset the device\n"); +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ + struct ena_tx_buffer *tx_buf; + unsigned long last_jiffies; + struct ena_ring *tx_ring; + int i, j, budget; + u32 missed_tx; + + /* Make sure the driver doesn't turn the device in other process */ + smp_rmb(); + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + budget = ENA_MONITORED_TX_QUEUES; + + for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + + for (j = 0; j < tx_ring->ring_size; j++) { + tx_buf = &tx_ring->tx_buffer_info[j]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + netif_notice(adapter, tx_err, adapter->netdev, + "Found a Tx that wasn't completed on time, qid %d, index %d.\n", + tx_ring->qid, j); + + u64_stats_update_begin(&tx_ring->syncp); + missed_tx = tx_ring->tx_stats.missing_tx_comp++; + u64_stats_update_end(&tx_ring->syncp); + + /* Clear last jiffies so the lost buffer won't + * be counted twice. + */ + tx_buf->last_jiffies = 0; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + } + } + + budget--; + if (!budget) + break; + } + + adapter->last_monitored_tx_qid = i % adapter->num_queues; +} + +/* Check for keep alive expiration */ +static void check_for_missing_keep_alive(struct ena_adapter *adapter) +{ + unsigned long keep_alive_expired; + + if (!adapter->wd_state) + return; + + keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + + ENA_DEVICE_KALIVE_TIMEOUT); + if (unlikely(time_is_before_jiffies(keep_alive_expired))) { + netif_err(adapter, drv, adapter->netdev, + "Keep alive watchdog timeout.\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.wd_expired++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void check_for_admin_com_state(struct ena_adapter *adapter) +{ + if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { + netif_err(adapter, drv, adapter->netdev, + "ENA admin queue is not in running state!\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.admin_q_pause++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void ena_update_host_info(struct ena_admin_host_info *host_info, + struct net_device *netdev) +{ + host_info->supported_network_features[0] = + netdev->features & GENMASK_ULL(31, 0); + host_info->supported_network_features[1] = + (netdev->features & GENMASK_ULL(63, 32)) >> 32; +} + +static void ena_timer_service(unsigned long data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; + struct ena_admin_host_info *host_info = + adapter->ena_dev->host_attr.host_info; + + check_for_missing_keep_alive(adapter); + + check_for_admin_com_state(adapter); + + check_for_missing_tx_completions(adapter); + + if (debug_area) + ena_dump_stats_to_buf(adapter, debug_area); + + if (host_info) + ena_update_host_info(host_info, adapter->netdev); + + if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, drv, adapter->netdev, + "Trigger reset is on\n"); + ena_dump_stats_to_dmesg(adapter); + queue_work(ena_wq, &adapter->reset_task); + return; + } + + /* Reset the timer */ + mod_timer(&adapter->timer_service, jiffies + HZ); +} + +static int ena_calc_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + int io_sq_num, io_queue_num; + + /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq_num = get_feat_ctx->max_queues.max_llq_num; + + if (io_sq_num == 0) { + dev_err(&pdev->dev, + "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); + + ena_dev->tx_mem_queue_type = + ENA_ADMIN_PLACEMENT_POLICY_HOST; + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + } else { + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + + io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES); + io_queue_num = min_t(int, io_queue_num, io_sq_num); + io_queue_num = min_t(int, io_queue_num, + get_feat_ctx->max_queues.max_cq_num); + /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); + if (unlikely(!io_queue_num)) { + dev_err(&pdev->dev, "The device doesn't have io queues\n"); + return -EFAULT; + } + + return io_queue_num; +} + +static void ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + bool has_mem_bar; + + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + + /* Enable push mode if device supports LLQ */ + if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; + else + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; +} + +static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, + struct net_device *netdev) +{ + netdev_features_t dev_features = 0; + + /* Set offload features */ + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) + dev_features |= NETIF_F_IP_CSUM; + + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) + dev_features |= NETIF_F_IPV6_CSUM; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) + dev_features |= NETIF_F_TSO; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) + dev_features |= NETIF_F_TSO6; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) + dev_features |= NETIF_F_TSO_ECN; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + netdev->features = + dev_features | + NETIF_F_SG | + NETIF_F_NTUPLE | + NETIF_F_RXHASH | + NETIF_F_HIGHDMA; + + netdev->hw_features |= netdev->features; + netdev->vlan_features |= netdev->features; +} + +static void ena_set_conf_feat_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *feat) +{ + struct net_device *netdev = adapter->netdev; + + /* Copy mac address */ + if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + } else { + ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); + ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + } + + /* Set offload features */ + ena_set_dev_offloads(feat, netdev); + + adapter->max_mtu = feat->dev_attr.max_mtu; +} + +static int ena_rss_init_default(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc, i; + u32 val; + + rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); + if (unlikely(rc)) { + dev_err(dev, "Cannot init indirect table\n"); + goto err_rss_init; + } + + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + val = ethtool_rxfh_indir_default(i, adapter->num_queues); + rc = ena_com_indirect_table_fill_entry(ena_dev, i, + ENA_IO_RXQ_IDX(val)); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill indirect table\n"); + goto err_fill_indir; + } + } + + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, + ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash function\n"); + goto err_fill_indir; + } + + rc = ena_com_set_default_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash control\n"); + goto err_fill_indir; + } + + return 0; + +err_fill_indir: + ena_com_rss_destroy(ena_dev); +err_rss_init: + + return rc; +} + +static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) +{ + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + pci_release_selected_regions(pdev, release_bars); +} + +static int ena_calc_queue_size(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + u16 *max_tx_sgl_size, + u16 *max_rx_sgl_size, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + u32 queue_size = ENA_DEFAULT_RING_SIZE; + + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_cq_depth); + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_sq_depth); + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + + if (unlikely(!queue_size)) { + dev_err(&pdev->dev, "Invalid queue size\n"); + return -EFAULT; + } + + *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_tx_descs); + *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_rx_descs); + + return queue_size; +} + +/* ena_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ena_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ena_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + */ +static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + static int version_printed; + struct net_device *netdev; + struct ena_adapter *adapter; + struct ena_com_dev *ena_dev = NULL; + static int adapters_found; + int io_queue_num, bars, rc; + int queue_size; + u16 tx_sgl_size = 0; + u16 rx_sgl_size = 0; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + if (version_printed++ == 0) + dev_info(&pdev->dev, "%s", version); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return rc; + } + + pci_set_master(pdev); + + ena_dev = vzalloc(sizeof(*ena_dev)); + if (!ena_dev) { + rc = -ENOMEM; + goto err_disable_device; + } + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (rc) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + rc); + goto err_free_ena_dev; + } + + ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); + if (!ena_dev->reg_bar) { + dev_err(&pdev->dev, "failed to remap regs bar\n"); + rc = -EFAULT; + goto err_free_region; + } + + ena_dev->dmadev = &pdev->dev; + + rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "ena device init failed\n"); + if (rc == -ETIME) + rc = -EPROBE_DEFER; + goto err_free_region; + } + + ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); + if (!ena_dev->mem_bar) { + rc = -EFAULT; + goto err_device_destroy; + } + } + + /* initial Tx interrupt delay, Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, + &rx_sgl_size, &get_feat_ctx); + if ((queue_size <= 0) || (io_queue_num <= 0)) { + rc = -EFAULT; + goto err_device_destroy; + } + + dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", + io_queue_num, queue_size); + + /* dev zeroed in init_etherdev */ + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); + rc = -ENOMEM; + goto err_device_destroy; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + pci_set_drvdata(pdev, adapter); + + adapter->ena_dev = ena_dev; + adapter->netdev = netdev; + adapter->pdev = pdev; + + ena_set_conf_feat_params(adapter, &get_feat_ctx); + + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + + adapter->tx_ring_size = queue_size; + adapter->rx_ring_size = queue_size; + + adapter->max_tx_sgl_size = tx_sgl_size; + adapter->max_rx_sgl_size = rx_sgl_size; + + adapter->num_queues = io_queue_num; + adapter->last_monitored_tx_qid = 0; + + adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; + adapter->wd_state = wd_state; + + snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); + + rc = ena_com_init_interrupt_moderation(adapter->ena_dev); + if (rc) { + dev_err(&pdev->dev, + "Failed to query interrupt moderation feature\n"); + goto err_netdev_destroy; + } + ena_init_io_rings(adapter); + + netdev->netdev_ops = &ena_netdev_ops; + netdev->watchdog_timeo = TX_TIMEOUT; + ena_set_ethtool_ops(netdev); + + netdev->priv_flags |= IFF_UNICAST_FLT; + + u64_stats_init(&adapter->syncp); + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + if (rc) { + dev_err(&pdev->dev, + "Failed to enable and set the admin interrupts\n"); + goto err_worker_destroy; + } + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); + goto err_free_msix; + } + + ena_config_debug_area(adapter); + + memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); + + netif_carrier_off(netdev); + + rc = register_netdev(netdev); + if (rc) { + dev_err(&pdev->dev, "Cannot register net device\n"); + goto err_rss; + } + + INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); + INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); + INIT_WORK(&adapter->reset_task, ena_fw_reset_device); + + adapter->last_keep_alive_jiffies = jiffies; + + init_timer(&adapter->timer_service); + adapter->timer_service.expires = round_jiffies(jiffies + HZ); + adapter->timer_service.function = ena_timer_service; + adapter->timer_service.data = (unsigned long)adapter; + + add_timer(&adapter->timer_service); + + dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", + DEVICE_NAME, (long)pci_resource_start(pdev, 0), + netdev->dev_addr, io_queue_num); + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + adapters_found++; + + return 0; + +err_rss: + ena_com_delete_debug_area(ena_dev); + ena_com_rss_destroy(ena_dev); +err_free_msix: + ena_com_dev_reset(ena_dev); + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_worker_destroy: + ena_com_destroy_interrupt_moderation(ena_dev); + del_timer(&adapter->timer_service); + cancel_work_sync(&adapter->suspend_io_task); + cancel_work_sync(&adapter->resume_io_task); +err_netdev_destroy: + free_netdev(netdev); +err_device_destroy: + ena_com_delete_host_info(ena_dev); + ena_com_admin_destroy(ena_dev); +err_free_region: + ena_release_bars(ena_dev, pdev); +err_free_ena_dev: + vfree(ena_dev); +err_disable_device: + pci_disable_device(pdev); + return rc; +} + +/*****************************************************************************/ +static int ena_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + + if (numvfs > 0) { + rc = pci_enable_sriov(dev, numvfs); + if (rc != 0) { + dev_err(&dev->dev, + "pci_enable_sriov failed to enable: %d vfs with the error: %d\n", + numvfs, rc); + return rc; + } + + return numvfs; + } + + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + + return -EINVAL; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void ena_remove(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + struct ena_com_dev *ena_dev; + struct net_device *netdev; + + if (!adapter) + /* This device didn't load properly and it's resources + * already released, nothing to do + */ + return; + + ena_dev = adapter->ena_dev; + netdev = adapter->netdev; + +#ifdef CONFIG_RFS_ACCEL + if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { + free_irq_cpu_rmap(netdev->rx_cpu_rmap); + netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + unregister_netdev(netdev); + del_timer_sync(&adapter->timer_service); + + cancel_work_sync(&adapter->reset_task); + + cancel_work_sync(&adapter->suspend_io_task); + + cancel_work_sync(&adapter->resume_io_task); + + ena_com_dev_reset(ena_dev); + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + free_netdev(netdev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_rss_destroy(ena_dev); + + ena_com_delete_debug_area(ena_dev); + + ena_com_delete_host_info(ena_dev); + + ena_release_bars(ena_dev, pdev); + + pci_disable_device(pdev); + + ena_com_destroy_interrupt_moderation(ena_dev); + + vfree(ena_dev); +} + +static struct pci_driver ena_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = ena_pci_tbl, + .probe = ena_probe, + .remove = ena_remove, + .sriov_configure = ena_sriov_configure, +}; + +static int __init ena_init(void) +{ + pr_info("%s", version); + + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); + if (!ena_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + + return pci_register_driver(&ena_pci_driver); +} + +static void __exit ena_cleanup(void) +{ + pci_unregister_driver(&ena_pci_driver); + + if (ena_wq) { + destroy_workqueue(ena_wq); + ena_wq = NULL; + } +} + +/****************************************************************************** + ******************************** AENQ Handlers ******************************* + *****************************************************************************/ +/* ena_update_on_link_change: + * Notify the network interface about the change in link status + */ +static void ena_update_on_link_change(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + struct ena_admin_aenq_link_change_desc *aenq_desc = + (struct ena_admin_aenq_link_change_desc *)aenq_e; + int status = aenq_desc->flags & + ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; + + if (status) { + netdev_dbg(adapter->netdev, "%s\n", __func__); + set_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_on(adapter->netdev); + } else { + clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_off(adapter->netdev); + } +} + +static void ena_keep_alive_wd(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + adapter->last_keep_alive_jiffies = jiffies; +} + +static void ena_notification(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, + "Invalid group(%x) expected %x\n", + aenq_e->aenq_common_desc.group, + ENA_ADMIN_NOTIFICATION); + + switch (aenq_e->aenq_common_desc.syndrom) { + case ENA_ADMIN_SUSPEND: + /* Suspend just the IO queues. + * We deliberately don't suspend admin so the timer and + * the keep_alive events should remain. + */ + queue_work(ena_wq, &adapter->suspend_io_task); + break; + case ENA_ADMIN_RESUME: + queue_work(ena_wq, &adapter->resume_io_task); + break; + default: + netif_err(adapter, drv, adapter->netdev, + "Invalid aenq notification link state %d\n", + aenq_e->aenq_common_desc.syndrom); + } +} + +/* This handler will called for unknown event group or unimplemented handlers*/ +static void unimplemented_aenq_handler(void *data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + netif_err(adapter, drv, adapter->netdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static struct ena_aenq_handlers aenq_handlers = { + .handlers = { + [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, + [ENA_ADMIN_NOTIFICATION] = ena_notification, + [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +module_init(ena_init); +module_exit(ena_cleanup); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h new file mode 100644 index 000000000000..69d7e9ed5bc8 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -0,0 +1,324 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_H +#define ENA_H + +#include +#include +#include +#include +#include +#include + +#include "ena_com.h" +#include "ena_eth_com.h" + +#define DRV_MODULE_VER_MAJOR 1 +#define DRV_MODULE_VER_MINOR 0 +#define DRV_MODULE_VER_SUBMINOR 2 + +#define DRV_MODULE_NAME "ena" +#ifndef DRV_MODULE_VERSION +#define DRV_MODULE_VERSION \ + __stringify(DRV_MODULE_VER_MAJOR) "." \ + __stringify(DRV_MODULE_VER_MINOR) "." \ + __stringify(DRV_MODULE_VER_SUBMINOR) +#endif + +#define DEVICE_NAME "Elastic Network Adapter (ENA)" + +/* 1 for AENQ + ADMIN */ +#define ENA_MAX_MSIX_VEC(io_queues) (1 + (io_queues)) + +#define ENA_REG_BAR 0 +#define ENA_MEM_BAR 2 +#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + +#define ENA_DEFAULT_RING_SIZE (1024) + +#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) +#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN) + +/* limit the buffer size to 600 bytes to handle MTU changes from very + * small to very large, in which case the number of buffers per packet + * could exceed ENA_PKT_MAX_BUFS + */ +#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600 + +#define ENA_MIN_MTU 128 + +#define ENA_NAME_MAX_LEN 20 +#define ENA_IRQNAME_SIZE 40 + +#define ENA_PKT_MAX_BUFS 19 + +#define ENA_RX_RSS_TABLE_LOG_SIZE 7 +#define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) + +#define ENA_HASH_KEY_SIZE 40 + +/* The number of tx packet completions that will be handled each NAPI poll + * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER. + */ +#define ENA_TX_POLL_BUDGET_DIVIDER 4 + +/* Refill Rx queue when number of available descriptors is below + * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER + */ +#define ENA_RX_REFILL_THRESH_DIVIDER 8 + +/* Number of queues to check for missing queues per timer service */ +#define ENA_MONITORED_TX_QUEUES 4 +/* Max timeout packets before device reset */ +#define MAX_NUM_OF_TIMEOUTED_PACKETS 32 + +#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) + +#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) +#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \ + (((idx) + (n)) & ((ring_size) - 1)) + +#define ENA_IO_TXQ_IDX(q) (2 * (q)) +#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) + +#define ENA_MGMNT_IRQ_IDX 0 +#define ENA_IO_IRQ_FIRST_IDX 1 +#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) + +/* ENA device should send keep alive msg every 1 sec. + * We wait for 3 sec just to be on the safe side. + */ +#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ) + +#define ENA_MMIO_DISABLE_REG_READ BIT(0) + +struct ena_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[ENA_IRQNAME_SIZE]; +}; + +struct ena_napi { + struct napi_struct napi ____cacheline_aligned; + struct ena_ring *tx_ring; + struct ena_ring *rx_ring; + u32 qid; +}; + +struct ena_tx_buffer { + struct sk_buff *skb; + /* num of ena desc for this specific skb + * (includes data desc and metadata desc) + */ + u32 tx_descs; + /* num of buffers used by this skb */ + u32 num_of_bufs; + /* Save the last jiffies to detect missing tx packets */ + unsigned long last_jiffies; + struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; +} ____cacheline_aligned; + +struct ena_rx_buffer { + struct sk_buff *skb; + struct page *page; + u32 page_offset; + struct ena_com_buf ena_buf; +} ____cacheline_aligned; + +struct ena_stats_tx { + u64 cnt; + u64 bytes; + u64 queue_stop; + u64 prepare_ctx_err; + u64 queue_wakeup; + u64 dma_mapping_err; + u64 linearize; + u64 linearize_failed; + u64 napi_comp; + u64 tx_poll; + u64 doorbells; + u64 missing_tx_comp; + u64 bad_req_id; +}; + +struct ena_stats_rx { + u64 cnt; + u64 bytes; + u64 refil_partial; + u64 bad_csum; + u64 page_alloc_fail; + u64 skb_alloc_fail; + u64 dma_mapping_err; + u64 bad_desc_num; + u64 rx_copybreak_pkt; +}; + +struct ena_ring { + /* Holds the empty requests for TX out of order completions */ + u16 *free_tx_ids; + union { + struct ena_tx_buffer *tx_buffer_info; + struct ena_rx_buffer *rx_buffer_info; + }; + + /* cache ptr to avoid using the adapter */ + struct device *dev; + struct pci_dev *pdev; + struct napi_struct *napi; + struct net_device *netdev; + struct ena_com_dev *ena_dev; + struct ena_adapter *adapter; + struct ena_com_io_cq *ena_com_io_cq; + struct ena_com_io_sq *ena_com_io_sq; + + u16 next_to_use; + u16 next_to_clean; + u16 rx_copybreak; + u16 qid; + u16 mtu; + u16 sgl_size; + + /* The maximum header length the device can handle */ + u8 tx_max_header_size; + + /* cpu for TPH */ + int cpu; + /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + + struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; + u32 smoothed_interval; + u32 per_napi_packets; + u32 per_napi_bytes; + enum ena_intr_moder_level moder_tbl_idx; + struct u64_stats_sync syncp; + union { + struct ena_stats_tx tx_stats; + struct ena_stats_rx rx_stats; + }; +} ____cacheline_aligned; + +struct ena_stats_dev { + u64 tx_timeout; + u64 io_suspend; + u64 io_resume; + u64 wd_expired; + u64 interface_up; + u64 interface_down; + u64 admin_q_pause; +}; + +enum ena_flags_t { + ENA_FLAG_DEVICE_RUNNING, + ENA_FLAG_DEV_UP, + ENA_FLAG_LINK_UP, + ENA_FLAG_MSIX_ENABLED, + ENA_FLAG_TRIGGER_RESET +}; + +/* adapter specific private data structure */ +struct ena_adapter { + struct ena_com_dev *ena_dev; + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + /* rx packets that shorter that this len will be copied to the skb + * header + */ + u32 rx_copybreak; + u32 max_mtu; + + int num_queues; + + struct msix_entry *msix_entries; + int msix_vecs; + + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + + u32 tx_ring_size; + u32 rx_ring_size; + + u32 msg_enable; + + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; + + u8 mac_addr[ETH_ALEN]; + + char name[ENA_NAME_MAX_LEN]; + + unsigned long flags; + /* TX */ + struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + /* RX */ + struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES]; + + struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; + + /* timer service */ + struct work_struct reset_task; + struct work_struct suspend_io_task; + struct work_struct resume_io_task; + struct timer_list timer_service; + + bool wd_state; + unsigned long last_keep_alive_jiffies; + + struct u64_stats_sync syncp; + struct ena_stats_dev dev_stats; + + /* last queue index that was checked for uncompleted tx packets */ + u32 last_monitored_tx_qid; +}; + +void ena_set_ethtool_ops(struct net_device *netdev); + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter); + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); + +int ena_get_sset_count(struct net_device *netdev, int sset); + +#endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h new file mode 100644 index 000000000000..f80d2a47fa94 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h @@ -0,0 +1,67 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_PCI_ID_TBL_H_ +#define ENA_PCI_ID_TBL_H_ + +#ifndef PCI_VENDOR_ID_AMAZON +#define PCI_VENDOR_ID_AMAZON 0x1d0f +#endif + +#ifndef PCI_DEV_ID_ENA_PF +#define PCI_DEV_ID_ENA_PF 0x0ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_PF +#define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_VF +#define PCI_DEV_ID_ENA_VF 0xec20 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_VF +#define PCI_DEV_ID_ENA_LLQ_VF 0xec21 +#endif + +#define ENA_PCI_ID_TABLE_ENTRY(devid) \ + {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)}, + +static const struct pci_device_id ena_pci_tbl[] = { + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF) + { } +}; + +#endif /* ENA_PCI_ID_TBL_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h new file mode 100644 index 000000000000..26097a2b6030 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -0,0 +1,133 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_REGS_H_ +#define _ENA_REGS_H_ + +/* ena_registers offsets */ +#define ENA_REGS_VERSION_OFF 0x0 +#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define ENA_REGS_CAPS_OFF 0x8 +#define ENA_REGS_CAPS_EXT_OFF 0xc +#define ENA_REGS_AQ_BASE_LO_OFF 0x10 +#define ENA_REGS_AQ_BASE_HI_OFF 0x14 +#define ENA_REGS_AQ_CAPS_OFF 0x18 +#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 +#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 +#define ENA_REGS_ACQ_CAPS_OFF 0x28 +#define ENA_REGS_AQ_DB_OFF 0x2c +#define ENA_REGS_ACQ_TAIL_OFF 0x30 +#define ENA_REGS_AENQ_CAPS_OFF 0x34 +#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 +#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c +#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 +#define ENA_REGS_AENQ_TAIL_OFF 0x44 +#define ENA_REGS_INTR_MASK_OFF 0x4c +#define ENA_REGS_DEV_CTL_OFF 0x54 +#define ENA_REGS_DEV_STS_OFF 0x58 +#define ENA_REGS_MMIO_REG_READ_OFF 0x5c +#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 +#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 + +/* version register */ +#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 + +/* aq_caps register */ +#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 + +/* aenq_caps register */ +#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 + +/* dev_ctl register */ +#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 +#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 +#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 +#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 + +/* dev_sts register */ +#define ENA_REGS_DEV_STS_READY_MASK 0x1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 + +/* mmio_reg_read register */ +#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +/* rss_ind_entry_update register */ +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 + +#endif /*_ENA_REGS_H_ */ diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c index dcf2a1f3643d..dc57f2759f44 100644 --- a/drivers/net/ethernet/amd/7990.c +++ b/drivers/net/ethernet/amd/7990.c @@ -45,14 +45,14 @@ #define WRITERDP(lp, x) out_be16(lp->base + LANCE_RDP, (x)) #define READRDP(lp) in_be16(lp->base + LANCE_RDP) -#if defined(CONFIG_HPLANCE) || defined(CONFIG_HPLANCE_MODULE) +#if IS_ENABLED(CONFIG_HPLANCE) #include "hplance.h" #undef WRITERAP #undef WRITERDP #undef READRDP -#if defined(CONFIG_MVME147_NET) || defined(CONFIG_MVME147_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME147_NET) /* Lossage Factor Nine, Mr Sulu. */ #define WRITERAP(lp, x) (lp->writerap(lp, x)) @@ -86,7 +86,7 @@ static inline __u16 READRDP(struct lance_private *lp) } #endif -#endif /* CONFIG_HPLANCE || CONFIG_HPLANCE_MODULE */ +#endif /* IS_ENABLED(CONFIG_HPLANCE) */ /* debugging output macros, various flavours */ /* #define TEST_HITS */ diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c index 94960055fa1f..f92cc97151ec 100644 --- a/drivers/net/ethernet/amd/amd8111e.c +++ b/drivers/net/ethernet/amd/amd8111e.c @@ -89,7 +89,7 @@ Revision History: #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define AMD8111E_VLAN_TAG_USED 1 #else #define AMD8111E_VLAN_TAG_USED 0 diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c index a9b2709567ec..7f9216db026f 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c @@ -1708,9 +1708,9 @@ static const struct net_device_ops xgbe_netdev_ops = { .ndo_set_features = xgbe_set_features, }; -struct net_device_ops *xgbe_get_netdev_ops(void) +const struct net_device_ops *xgbe_get_netdev_ops(void) { - return (struct net_device_ops *)&xgbe_netdev_ops; + return &xgbe_netdev_ops; } static void xgbe_rx_refresh(struct xgbe_channel *channel) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c index 11d9f0c5b78b..4007b429c80c 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c @@ -623,7 +623,7 @@ static const struct ethtool_ops xgbe_ethtool_ops = { .get_ts_info = xgbe_get_ts_info, }; -struct ethtool_ops *xgbe_get_ethtool_ops(void) +const struct ethtool_ops *xgbe_get_ethtool_ops(void) { - return (struct ethtool_ops *)&xgbe_ethtool_ops; + return &xgbe_ethtool_ops; } diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-main.c b/drivers/net/ethernet/amd/xgbe/xgbe-main.c index 3eee3201b58f..9de078819aa6 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-main.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-main.c @@ -861,9 +861,15 @@ static int xgbe_resume(struct device *dev) pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); - if (netif_running(netdev)) + if (netif_running(netdev)) { ret = xgbe_powerup(netdev, XGMAC_DRIVER_CONTEXT); + /* Schedule a restart in case the link or phy state changed + * while we were powered down. + */ + schedule_work(&pdata->restart_work); + } + DBGPR("<--xgbe_resume\n"); return ret; diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h index 98d9d63c4353..5dd17dcea2f8 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe.h +++ b/drivers/net/ethernet/amd/xgbe/xgbe.h @@ -956,8 +956,9 @@ struct xgbe_prv_data { void xgbe_init_function_ptrs_dev(struct xgbe_hw_if *); void xgbe_init_function_ptrs_phy(struct xgbe_phy_if *); void xgbe_init_function_ptrs_desc(struct xgbe_desc_if *); -struct net_device_ops *xgbe_get_netdev_ops(void); -struct ethtool_ops *xgbe_get_ethtool_ops(void); +const struct net_device_ops *xgbe_get_netdev_ops(void); +const struct ethtool_ops *xgbe_get_ethtool_ops(void); + #ifdef CONFIG_AMD_XGBE_DCB const struct dcbnl_rtnl_ops *xgbe_get_dcbnl_ops(void); #endif diff --git a/drivers/net/ethernet/apm/xgene/Kconfig b/drivers/net/ethernet/apm/xgene/Kconfig index 300e3b5c54e0..afccb033177b 100644 --- a/drivers/net/ethernet/apm/xgene/Kconfig +++ b/drivers/net/ethernet/apm/xgene/Kconfig @@ -4,6 +4,7 @@ config NET_XGENE depends on ARCH_XGENE || COMPILE_TEST select PHYLIB select MDIO_XGENE + select GPIOLIB help This is the Ethernet driver for the on-chip ethernet interface on the APM X-Gene SoC. diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c index 472c0fb3f4c4..23d72af83d82 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.c @@ -32,12 +32,19 @@ static void xgene_cle_sband_to_hw(u8 frag, enum xgene_cle_prot_version ver, SET_VAL(SB_HDRLEN, len); } -static void xgene_cle_idt_to_hw(u32 dstqid, u32 fpsel, +static void xgene_cle_idt_to_hw(struct xgene_enet_pdata *pdata, + u32 dstqid, u32 fpsel, u32 nfpsel, u32 *idt_reg) { - *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | - SET_VAL(IDT_FPSEL, fpsel) | - SET_VAL(IDT_NFPSEL, nfpsel); + if (pdata->enet_id == XGENE_ENET1) { + *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | + SET_VAL(IDT_FPSEL1, fpsel) | + SET_VAL(IDT_NFPSEL1, nfpsel); + } else { + *idt_reg = SET_VAL(IDT_DSTQID, dstqid) | + SET_VAL(IDT_FPSEL, fpsel) | + SET_VAL(IDT_NFPSEL, nfpsel); + } } static void xgene_cle_dbptr_to_hw(struct xgene_enet_pdata *pdata, @@ -344,7 +351,7 @@ static int xgene_cle_set_rss_idt(struct xgene_enet_pdata *pdata) nfpsel = 0; idt_reg = 0; - xgene_cle_idt_to_hw(dstqid, fpsel, nfpsel, &idt_reg); + xgene_cle_idt_to_hw(pdata, dstqid, fpsel, nfpsel, &idt_reg); ret = xgene_cle_dram_wr(&pdata->cle, &idt_reg, 1, i, RSS_IDT, CLE_CMD_WR); if (ret) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h index 33c5f6b25824..9ac9f8e145ec 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_cle.h @@ -196,9 +196,13 @@ enum xgene_cle_ptree_dbptrs { #define IDT_DSTQID_POS 0 #define IDT_DSTQID_LEN 12 #define IDT_FPSEL_POS 12 -#define IDT_FPSEL_LEN 4 -#define IDT_NFPSEL_POS 16 -#define IDT_NFPSEL_LEN 4 +#define IDT_FPSEL_LEN 5 +#define IDT_NFPSEL_POS 17 +#define IDT_NFPSEL_LEN 5 +#define IDT_FPSEL1_POS 12 +#define IDT_FPSEL1_LEN 4 +#define IDT_NFPSEL1_POS 16 +#define IDT_NFPSEL1_LEN 4 struct xgene_cle_ptree_branch { bool valid; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c index 22a7b26ca1d6..d372d4235c81 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_ethtool.c @@ -54,55 +54,68 @@ static void xgene_get_drvinfo(struct net_device *ndev, sprintf(info->bus_info, "%s", pdev->name); } -static int xgene_get_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; + u32 supported; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (phydev == NULL) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } else if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { if (pdata->mdio_driver) { if (!phydev) return -ENODEV; - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | - SUPPORTED_MII; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_1000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_ENABLE; + supported = SUPPORTED_1000baseT_Full | SUPPORTED_Autoneg | + SUPPORTED_MII; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_1000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_MII; + cmd->base.autoneg = AUTONEG_ENABLE; } else { - cmd->supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; - cmd->advertising = cmd->supported; - ethtool_cmd_speed_set(cmd, SPEED_10000); - cmd->duplex = DUPLEX_FULL; - cmd->port = PORT_FIBRE; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = AUTONEG_DISABLE; + supported = SUPPORTED_10000baseT_Full | SUPPORTED_FIBRE; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, + supported); + + cmd->base.speed = SPEED_10000; + cmd->base.duplex = DUPLEX_FULL; + cmd->base.port = PORT_FIBRE; + cmd->base.autoneg = AUTONEG_DISABLE; } return 0; } -static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) +static int xgene_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) { if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } if (pdata->phy_mode == PHY_INTERFACE_MODE_SGMII) { @@ -110,7 +123,7 @@ static int xgene_set_settings(struct net_device *ndev, struct ethtool_cmd *cmd) if (!phydev) return -ENODEV; - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } } @@ -152,12 +165,12 @@ static void xgene_get_ethtool_stats(struct net_device *ndev, static const struct ethtool_ops xgene_ethtool_ops = { .get_drvinfo = xgene_get_drvinfo, - .get_settings = xgene_get_settings, - .set_settings = xgene_set_settings, .get_link = ethtool_op_get_link, .get_strings = xgene_get_strings, .get_sset_count = xgene_get_sset_count, - .get_ethtool_stats = xgene_get_ethtool_stats + .get_ethtool_stats = xgene_get_ethtool_stats, + .get_link_ksettings = xgene_get_link_ksettings, + .set_link_ksettings = xgene_set_link_ksettings, }; void xgene_enet_set_ethtool_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c index 18bb9556dd00..c481f104a8fe 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.c @@ -713,7 +713,7 @@ static void xgene_enet_adjust_link(struct net_device *ndev) { struct xgene_enet_pdata *pdata = netdev_priv(ndev); const struct xgene_mac_ops *mac_ops = pdata->mac_ops; - struct phy_device *phydev = pdata->phy_dev; + struct phy_device *phydev = ndev->phydev; if (phydev->link) { if (pdata->phy_speed != phydev->speed) { @@ -761,31 +761,25 @@ int xgene_enet_phy_connect(struct net_device *ndev) if (dev->of_node) { for (i = 0 ; i < 2; i++) { np = of_parse_phandle(dev->of_node, "phy-handle", i); - if (np) + phy_dev = of_phy_connect(ndev, np, + &xgene_enet_adjust_link, + 0, pdata->phy_mode); + of_node_put(np); + if (phy_dev) break; } - if (!np) { - netdev_dbg(ndev, "No phy-handle found in DT\n"); - return -ENODEV; - } - - phy_dev = of_phy_connect(ndev, np, &xgene_enet_adjust_link, - 0, pdata->phy_mode); - of_node_put(np); if (!phy_dev) { netdev_err(ndev, "Could not connect to PHY\n"); return -ENODEV; } - - pdata->phy_dev = phy_dev; } else { #ifdef CONFIG_ACPI struct acpi_device *adev = acpi_phy_find_device(dev); if (adev) - pdata->phy_dev = adev->driver_data; - - phy_dev = pdata->phy_dev; + phy_dev = adev->driver_data; + else + phy_dev = NULL; if (!phy_dev || phy_connect_direct(ndev, phy_dev, &xgene_enet_adjust_link, @@ -853,8 +847,6 @@ static int xgene_mdiobus_register(struct xgene_enet_pdata *pdata, if (!phy) return -EIO; - pdata->phy_dev = phy; - return ret; } @@ -894,14 +886,18 @@ int xgene_enet_mdio_config(struct xgene_enet_pdata *pdata) void xgene_enet_phy_disconnect(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); } void xgene_enet_mdio_remove(struct xgene_enet_pdata *pdata) { - if (pdata->phy_dev) - phy_disconnect(pdata->phy_dev); + struct net_device *ndev = pdata->ndev; + + if (ndev->phydev) + phy_disconnect(ndev->phydev); mdiobus_unregister(pdata->mdio_bus); mdiobus_free(pdata->mdio_bus); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h index 179a44dceb29..8456337a237d 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_hw.h @@ -124,6 +124,12 @@ enum xgene_enet_rm { #define MAC_READ_REG_OFFSET 0x0c #define MAC_COMMAND_DONE_REG_OFFSET 0x10 +#define PCS_ADDR_REG_OFFSET 0x00 +#define PCS_COMMAND_REG_OFFSET 0x04 +#define PCS_WRITE_REG_OFFSET 0x08 +#define PCS_READ_REG_OFFSET 0x0c +#define PCS_COMMAND_DONE_REG_OFFSET 0x10 + #define MII_MGMT_CONFIG_ADDR 0x20 #define MII_MGMT_COMMAND_ADDR 0x24 #define MII_MGMT_ADDRESS_ADDR 0x28 @@ -231,6 +237,8 @@ enum xgene_enet_rm { #define TCPHDR_LEN 6 #define IPHDR_POS 6 #define IPHDR_LEN 6 +#define MSS_POS 20 +#define MSS_LEN 2 #define EC_POS 22 /* Enable checksum */ #define EC_LEN 1 #define ET_POS 23 /* Enable TSO */ @@ -247,6 +255,11 @@ enum xgene_enet_rm { #define LAST_BUFFER (0x7800ULL << BUFDATALEN_POS) +#define TSO_MSS0_POS 0 +#define TSO_MSS0_LEN 14 +#define TSO_MSS1_POS 16 +#define TSO_MSS1_LEN 14 + struct xgene_enet_raw_desc { __le64 m0; __le64 m1; diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c index d1d6b5eeb613..429f18fc5503 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.c @@ -19,6 +19,7 @@ * along with this program. If not, see . */ +#include #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_sgmac.h" @@ -72,7 +73,6 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, skb = netdev_alloc_skb_ip_align(ndev, len); if (unlikely(!skb)) return -ENOMEM; - buf_pool->rx_skb[tail] = skb; dma_addr = dma_map_single(dev, skb->data, len, DMA_FROM_DEVICE); if (dma_mapping_error(dev, dma_addr)) { @@ -81,6 +81,8 @@ static int xgene_enet_refill_bufpool(struct xgene_enet_desc_ring *buf_pool, return -EINVAL; } + buf_pool->rx_skb[tail] = skb; + raw_desc->m1 = cpu_to_le64(SET_VAL(DATAADDR, dma_addr) | SET_VAL(BUFDATALEN, bufdatalen) | SET_BIT(COHERENT)); @@ -102,12 +104,21 @@ static u8 xgene_enet_hdr_len(const void *data) static void xgene_enet_delete_bufpool(struct xgene_enet_desc_ring *buf_pool) { + struct device *dev = ndev_to_dev(buf_pool->ndev); + struct xgene_enet_raw_desc16 *raw_desc; + dma_addr_t dma_addr; int i; /* Free up the buffers held by hardware */ for (i = 0; i < buf_pool->slots; i++) { - if (buf_pool->rx_skb[i]) + if (buf_pool->rx_skb[i]) { dev_kfree_skb_any(buf_pool->rx_skb[i]); + + raw_desc = &buf_pool->raw_desc16[i]; + dma_addr = GET_VAL(DATAADDR, le64_to_cpu(raw_desc->m1)); + dma_unmap_single(dev, dma_addr, XGENE_ENET_MAX_MTU, + DMA_FROM_DEVICE); + } } } @@ -126,6 +137,7 @@ static irqreturn_t xgene_enet_rx_irq(const int irq, void *data) static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, struct xgene_enet_raw_desc *raw_desc) { + struct xgene_enet_pdata *pdata = netdev_priv(cp_ring->ndev); struct sk_buff *skb; struct device *dev; skb_frag_t *frag; @@ -133,6 +145,7 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, u16 skb_index; u8 status; int i, ret = 0; + u8 mss_index; skb_index = GET_VAL(USERINFO, le64_to_cpu(raw_desc->m0)); skb = cp_ring->cp_skb[skb_index]; @@ -149,6 +162,13 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, DMA_TO_DEVICE); } + if (GET_BIT(ET, le64_to_cpu(raw_desc->m3))) { + mss_index = GET_VAL(MSS, le64_to_cpu(raw_desc->m3)); + spin_lock(&pdata->mss_lock); + pdata->mss_refcnt[mss_index]--; + spin_unlock(&pdata->mss_lock); + } + /* Checking for error */ status = GET_VAL(LERR, le64_to_cpu(raw_desc->m0)); if (unlikely(status > 2)) { @@ -167,15 +187,53 @@ static int xgene_enet_tx_completion(struct xgene_enet_desc_ring *cp_ring, return ret; } -static u64 xgene_enet_work_msg(struct sk_buff *skb) +static int xgene_enet_setup_mss(struct net_device *ndev, u32 mss) +{ + struct xgene_enet_pdata *pdata = netdev_priv(ndev); + bool mss_index_found = false; + int mss_index; + int i; + + spin_lock(&pdata->mss_lock); + + /* Reuse the slot if MSS matches */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (pdata->mss[i] == mss) { + pdata->mss_refcnt[i]++; + mss_index = i; + mss_index_found = true; + } + } + + /* Overwrite the slot with ref_count = 0 */ + for (i = 0; !mss_index_found && i < NUM_MSS_REG; i++) { + if (!pdata->mss_refcnt[i]) { + pdata->mss_refcnt[i]++; + pdata->mac_ops->set_mss(pdata, mss, i); + pdata->mss[i] = mss; + mss_index = i; + mss_index_found = true; + } + } + + spin_unlock(&pdata->mss_lock); + + /* No slots with ref_count = 0 available, return busy */ + if (!mss_index_found) + return -EBUSY; + + return mss_index; +} + +static int xgene_enet_work_msg(struct sk_buff *skb, u64 *hopinfo) { struct net_device *ndev = skb->dev; struct iphdr *iph; u8 l3hlen = 0, l4hlen = 0; u8 ethhdr, proto = 0, csum_enable = 0; - u64 hopinfo = 0; u32 hdr_len, mss = 0; u32 i, len, nr_frags; + int mss_index; ethhdr = xgene_enet_hdr_len(skb->data); @@ -215,7 +273,11 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) if (!mss || ((skb->len - hdr_len) <= mss)) goto out; - hopinfo |= SET_BIT(ET); + mss_index = xgene_enet_setup_mss(ndev, mss); + if (unlikely(mss_index < 0)) + return -EBUSY; + + *hopinfo |= SET_BIT(ET) | SET_VAL(MSS, mss_index); } } else if (iph->protocol == IPPROTO_UDP) { l4hlen = UDP_HDR_SIZE; @@ -223,15 +285,15 @@ static u64 xgene_enet_work_msg(struct sk_buff *skb) } out: l3hlen = ip_hdrlen(skb) >> 2; - hopinfo |= SET_VAL(TCPHDR, l4hlen) | - SET_VAL(IPHDR, l3hlen) | - SET_VAL(ETHHDR, ethhdr) | - SET_VAL(EC, csum_enable) | - SET_VAL(IS, proto) | - SET_BIT(IC) | - SET_BIT(TYPE_ETH_WORK_MESSAGE); + *hopinfo |= SET_VAL(TCPHDR, l4hlen) | + SET_VAL(IPHDR, l3hlen) | + SET_VAL(ETHHDR, ethhdr) | + SET_VAL(EC, csum_enable) | + SET_VAL(IS, proto) | + SET_BIT(IC) | + SET_BIT(TYPE_ETH_WORK_MESSAGE); - return hopinfo; + return 0; } static u16 xgene_enet_encode_len(u16 len) @@ -271,20 +333,22 @@ static int xgene_enet_setup_tx_desc(struct xgene_enet_desc_ring *tx_ring, dma_addr_t dma_addr, pbuf_addr, *frag_dma_addr; skb_frag_t *frag; u16 tail = tx_ring->tail; - u64 hopinfo; + u64 hopinfo = 0; u32 len, hw_len; u8 ll = 0, nv = 0, idx = 0; bool split = false; u32 size, offset, ell_bytes = 0; u32 i, fidx, nr_frags, count = 1; + int ret; raw_desc = &tx_ring->raw_desc[tail]; tail = (tail + 1) & (tx_ring->slots - 1); memset(raw_desc, 0, sizeof(struct xgene_enet_raw_desc)); - hopinfo = xgene_enet_work_msg(skb); - if (!hopinfo) - return -EINVAL; + ret = xgene_enet_work_msg(skb, &hopinfo); + if (ret) + return ret; + raw_desc->m3 = cpu_to_le64(SET_VAL(HENQNUM, tx_ring->dst_ring_num) | hopinfo); @@ -424,6 +488,9 @@ static netdev_tx_t xgene_enet_start_xmit(struct sk_buff *skb, return NETDEV_TX_OK; count = xgene_enet_setup_tx_desc(tx_ring, skb); + if (count == -EBUSY) + return NETDEV_TX_BUSY; + if (count <= 0) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; @@ -452,7 +519,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, struct xgene_enet_raw_desc *raw_desc) { struct net_device *ndev; - struct xgene_enet_pdata *pdata; struct device *dev; struct xgene_enet_desc_ring *buf_pool; u32 datalen, skb_index; @@ -461,7 +527,6 @@ static int xgene_enet_rx_frame(struct xgene_enet_desc_ring *rx_ring, int ret = 0; ndev = rx_ring->ndev; - pdata = netdev_priv(ndev); dev = ndev_to_dev(rx_ring->ndev); buf_pool = rx_ring->buf_pool; @@ -739,8 +804,8 @@ static int xgene_enet_open(struct net_device *ndev) if (ret) return ret; - if (pdata->phy_dev) { - phy_start(pdata->phy_dev); + if (ndev->phydev) { + phy_start(ndev->phydev); } else { schedule_delayed_work(&pdata->link_work, PHY_POLL_LINK_OFF); netif_carrier_off(ndev); @@ -763,8 +828,8 @@ static int xgene_enet_close(struct net_device *ndev) mac_ops->tx_disable(pdata); mac_ops->rx_disable(pdata); - if (pdata->phy_dev) - phy_stop(pdata->phy_dev); + if (ndev->phydev) + phy_stop(ndev->phydev); else cancel_delayed_work_sync(&pdata->link_work); @@ -1312,6 +1377,18 @@ static int xgene_enet_check_phy_handle(struct xgene_enet_pdata *pdata) return 0; } +static void xgene_enet_gpiod_get(struct xgene_enet_pdata *pdata) +{ + struct device *dev = &pdata->pdev->dev; + + if (pdata->phy_mode != PHY_INTERFACE_MODE_XGMII) + return; + + pdata->sfp_rdy = gpiod_get(dev, "rxlos", GPIOD_IN); + if (IS_ERR(pdata->sfp_rdy)) + pdata->sfp_rdy = gpiod_get(dev, "sfp", GPIOD_IN); +} + static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) { struct platform_device *pdev; @@ -1401,6 +1478,8 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) if (ret) return ret; + xgene_enet_gpiod_get(pdata); + pdata->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) { /* Firmware may have set up the clock already. */ @@ -1425,6 +1504,7 @@ static int xgene_enet_get_resources(struct xgene_enet_pdata *pdata) } else { pdata->mcx_mac_addr = base_addr + BLOCK_AXG_MAC_OFFSET; pdata->mcx_mac_csr_addr = base_addr + BLOCK_AXG_MAC_CSR_OFFSET; + pdata->pcs_addr = base_addr + BLOCK_PCS_OFFSET; } pdata->rx_buff_cnt = NUM_PKT_BUF; @@ -1454,10 +1534,8 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) buf_pool = pdata->rx_ring[i]->buf_pool; xgene_enet_init_bufpool(buf_pool); ret = xgene_enet_refill_bufpool(buf_pool, pdata->rx_buff_cnt); - if (ret) { - xgene_enet_delete_desc_rings(pdata); - return ret; - } + if (ret) + goto err; } dst_ring_num = xgene_enet_dst_ring_num(pdata->rx_ring[0]); @@ -1474,7 +1552,7 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) ret = pdata->cle_ops->cle_init(pdata); if (ret) { netdev_err(ndev, "Preclass Tree init error\n"); - return ret; + goto err; } } else { pdata->port_ops->cle_bypass(pdata, dst_ring_num, buf_pool->id); @@ -1484,6 +1562,10 @@ static int xgene_enet_init_hw(struct xgene_enet_pdata *pdata) pdata->mac_ops->init(pdata); return ret; + +err: + xgene_enet_delete_desc_rings(pdata); + return ret; } static void xgene_enet_setup_ops(struct xgene_enet_pdata *pdata) @@ -1631,8 +1713,8 @@ static int xgene_enet_probe(struct platform_device *pdev) } #endif if (!pdata->enet_id) { - free_netdev(ndev); - return -ENODEV; + ret = -ENODEV; + goto err; } ret = xgene_enet_get_resources(pdata); @@ -1643,7 +1725,7 @@ static int xgene_enet_probe(struct platform_device *pdev) if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { ndev->features |= NETIF_F_TSO; - pdata->mss = XGENE_ENET_MSS; + spin_lock_init(&pdata->mss_lock); } ndev->hw_features = ndev->features; @@ -1655,7 +1737,7 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_init_hw(pdata); if (ret) - goto err_netdev; + goto err; link_state = pdata->mac_ops->link_state; if (pdata->phy_mode == PHY_INTERFACE_MODE_XGMII) { @@ -1665,21 +1747,32 @@ static int xgene_enet_probe(struct platform_device *pdev) ret = xgene_enet_mdio_config(pdata); else INIT_DELAYED_WORK(&pdata->link_work, link_state); + + if (ret) + goto err1; } - if (ret) - goto err; xgene_enet_napi_add(pdata); ret = register_netdev(ndev); if (ret) { netdev_err(ndev, "Failed to register netdev\n"); - goto err; + goto err2; } return 0; -err_netdev: - unregister_netdev(ndev); +err2: + /* + * If necessary, free_netdev() will call netif_napi_del() and undo + * the effects of xgene_enet_napi_add()'s calls to netif_napi_add(). + */ + + if (pdata->mdio_driver) + xgene_enet_phy_disconnect(pdata); + else if (pdata->phy_mode == PHY_INTERFACE_MODE_RGMII) + xgene_enet_mdio_remove(pdata); +err1: + xgene_enet_delete_desc_rings(pdata); err: free_netdev(ndev); return ret; @@ -1688,11 +1781,9 @@ err: static int xgene_enet_remove(struct platform_device *pdev) { struct xgene_enet_pdata *pdata; - const struct xgene_mac_ops *mac_ops; struct net_device *ndev; pdata = platform_get_drvdata(pdev); - mac_ops = pdata->mac_ops; ndev = pdata->ndev; rtnl_lock(); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h index 217546e5714a..0cda58f5a840 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_main.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_main.h @@ -47,7 +47,7 @@ #define NUM_PKT_BUF 64 #define NUM_BUFPOOL 32 #define MAX_EXP_BUFFS 256 -#define XGENE_ENET_MSS 1448 +#define NUM_MSS_REG 4 #define XGENE_MIN_ENET_FRAME_SIZE 60 #define XGENE_MAX_ENET_IRQ 16 @@ -143,7 +143,7 @@ struct xgene_mac_ops { void (*rx_disable)(struct xgene_enet_pdata *pdata); void (*set_speed)(struct xgene_enet_pdata *pdata); void (*set_mac_addr)(struct xgene_enet_pdata *pdata); - void (*set_mss)(struct xgene_enet_pdata *pdata); + void (*set_mss)(struct xgene_enet_pdata *pdata, u16 mss, u8 index); void (*link_state)(struct work_struct *work); }; @@ -174,7 +174,6 @@ struct xgene_cle_ops { struct xgene_enet_pdata { struct net_device *ndev; struct mii_bus *mdio_bus; - struct phy_device *phy_dev; int phy_speed; struct clk *clk; struct platform_device *pdev; @@ -196,6 +195,7 @@ struct xgene_enet_pdata { void __iomem *mcx_mac_addr; void __iomem *mcx_mac_csr_addr; void __iomem *base_addr; + void __iomem *pcs_addr; void __iomem *ring_csr_addr; void __iomem *ring_cmd_addr; int phy_mode; @@ -212,10 +212,13 @@ struct xgene_enet_pdata { u8 eth_bufnum; u8 bp_bufnum; u16 ring_num; - u32 mss; + u32 mss[NUM_MSS_REG]; + u32 mss_refcnt[NUM_MSS_REG]; + spinlock_t mss_lock; /* mss lock */ u8 tx_delay; u8 rx_delay; bool mdio_driver; + struct gpio_desc *sfp_rdy; }; struct xgene_indirect_ctl { diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c index 9c6ad0dce00f..6475f383ba83 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.c @@ -18,6 +18,8 @@ * along with this program. If not, see . */ +#include +#include #include "xgene_enet_main.h" #include "xgene_enet_hw.h" #include "xgene_enet_xgmac.h" @@ -84,6 +86,21 @@ static void xgene_enet_wr_mac(struct xgene_enet_pdata *pdata, wr_addr); } +static void xgene_enet_wr_pcs(struct xgene_enet_pdata *pdata, + u32 wr_addr, u32 wr_data) +{ + void __iomem *addr, *wr, *cmd, *cmd_done; + + addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET; + wr = pdata->pcs_addr + PCS_WRITE_REG_OFFSET; + cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET; + cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET; + + if (!xgene_enet_wr_indirect(addr, wr, cmd, cmd_done, wr_addr, wr_data)) + netdev_err(pdata->ndev, "PCS write failed, addr: %04x\n", + wr_addr); +} + static void xgene_enet_rd_csr(struct xgene_enet_pdata *pdata, u32 offset, u32 *val) { @@ -122,6 +139,7 @@ static bool xgene_enet_rd_indirect(void __iomem *addr, void __iomem *rd, return true; } + static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, u32 rd_addr, u32 *rd_data) { @@ -137,6 +155,25 @@ static void xgene_enet_rd_mac(struct xgene_enet_pdata *pdata, rd_addr); } +static bool xgene_enet_rd_pcs(struct xgene_enet_pdata *pdata, + u32 rd_addr, u32 *rd_data) +{ + void __iomem *addr, *rd, *cmd, *cmd_done; + bool success; + + addr = pdata->pcs_addr + PCS_ADDR_REG_OFFSET; + rd = pdata->pcs_addr + PCS_READ_REG_OFFSET; + cmd = pdata->pcs_addr + PCS_COMMAND_REG_OFFSET; + cmd_done = pdata->pcs_addr + PCS_COMMAND_DONE_REG_OFFSET; + + success = xgene_enet_rd_indirect(addr, rd, cmd, cmd_done, rd_addr, rd_data); + if (!success) + netdev_err(pdata->ndev, "PCS read failed, addr: %04x\n", + rd_addr); + + return success; +} + static int xgene_enet_ecc_init(struct xgene_enet_pdata *pdata) { struct net_device *ndev = pdata->ndev; @@ -171,6 +208,17 @@ static void xgene_xgmac_reset(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_0, 0); } +static void xgene_pcs_reset(struct xgene_enet_pdata *pdata) +{ + u32 data; + + if (!xgene_enet_rd_pcs(pdata, PCS_CONTROL_1, &data)) + return; + + xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data | PCS_CTRL_PCS_RST); + xgene_enet_wr_pcs(pdata, PCS_CONTROL_1, data & ~PCS_CTRL_PCS_RST); +} + static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) { u32 addr0, addr1; @@ -184,9 +232,22 @@ static void xgene_xgmac_set_mac_addr(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, HSTMACADR_MSW_ADDR, addr1); } -static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata) +static void xgene_xgmac_set_mss(struct xgene_enet_pdata *pdata, + u16 mss, u8 index) { - xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR, pdata->mss); + u8 offset; + u32 data; + + offset = (index < 2) ? 0 : 4; + xgene_enet_rd_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, &data); + + if (!(index & 0x1)) + data = SET_VAL(TSO_MSS1, data >> TSO_MSS1_POS) | + SET_VAL(TSO_MSS0, mss); + else + data = SET_VAL(TSO_MSS1, mss) | SET_VAL(TSO_MSS0, data); + + xgene_enet_wr_csr(pdata, XG_TSIF_MSS_REG0_ADDR + offset, data); } static u32 xgene_enet_link_status(struct xgene_enet_pdata *pdata) @@ -210,18 +271,17 @@ static void xgene_xgmac_init(struct xgene_enet_pdata *pdata) xgene_enet_wr_mac(pdata, AXGMAC_CONFIG_1, data); xgene_xgmac_set_mac_addr(pdata); - xgene_xgmac_set_mss(pdata); xgene_enet_rd_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, &data); data |= CFG_RSIF_FPBUFF_TIMEOUT_EN; xgene_enet_wr_csr(pdata, XG_RSIF_CONFIG_REG_ADDR, data); - xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX); - xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0); xgene_enet_rd_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, &data); data |= BIT(12); xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_ADDR, data); xgene_enet_wr_csr(pdata, XG_ENET_SPARE_CFG_REG_1_ADDR, 0x82); + xgene_enet_wr_csr(pdata, XGENET_RX_DV_GATE_REG_0_ADDR, 0); + xgene_enet_wr_csr(pdata, XG_CFG_BYPASS_ADDR, RESUME_TX); } static void xgene_xgmac_rx_enable(struct xgene_enet_pdata *pdata) @@ -359,14 +419,17 @@ static void xgene_enet_link_state(struct work_struct *work) { struct xgene_enet_pdata *pdata = container_of(to_delayed_work(work), struct xgene_enet_pdata, link_work); + struct gpio_desc *sfp_rdy = pdata->sfp_rdy; struct net_device *ndev = pdata->ndev; u32 link_status, poll_interval; link_status = xgene_enet_link_status(pdata); + if (link_status && !IS_ERR(sfp_rdy) && !gpiod_get_value(sfp_rdy)) + link_status = 0; + if (link_status) { if (!netif_carrier_ok(ndev)) { netif_carrier_on(ndev); - xgene_xgmac_init(pdata); xgene_xgmac_rx_enable(pdata); xgene_xgmac_tx_enable(pdata); netdev_info(ndev, "Link is Up - 10Gbps\n"); @@ -380,6 +443,8 @@ static void xgene_enet_link_state(struct work_struct *work) netdev_info(ndev, "Link is Down\n"); } poll_interval = PHY_POLL_LINK_OFF; + + xgene_pcs_reset(pdata); } schedule_delayed_work(&pdata->link_work, poll_interval); diff --git a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h index f1ea485f916b..360ccbd95566 100644 --- a/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h +++ b/drivers/net/ethernet/apm/xgene/xgene_enet_xgmac.h @@ -24,6 +24,7 @@ #define X2_BLOCK_ETH_MAC_CSR_OFFSET 0x3000 #define BLOCK_AXG_MAC_OFFSET 0x0800 #define BLOCK_AXG_MAC_CSR_OFFSET 0x2000 +#define BLOCK_PCS_OFFSET 0x3800 #define XGENET_CONFIG_REG_ADDR 0x20 #define XGENET_SRST_ADDR 0x00 @@ -72,6 +73,9 @@ #define XG_MCX_ICM_CONFIG0_REG_0_ADDR 0x00e0 #define XG_MCX_ICM_CONFIG2_REG_0_ADDR 0x00e8 +#define PCS_CONTROL_1 0x0000 +#define PCS_CTRL_PCS_RST BIT(15) + extern const struct xgene_mac_ops xgene_xgmac_ops; extern const struct xgene_port_ops xgene_xgport_ops; diff --git a/drivers/net/ethernet/arc/emac_mdio.c b/drivers/net/ethernet/arc/emac_mdio.c index 058460bdd5a6..a22403c688c9 100644 --- a/drivers/net/ethernet/arc/emac_mdio.c +++ b/drivers/net/ethernet/arc/emac_mdio.c @@ -104,7 +104,7 @@ static int arc_mdio_write(struct mii_bus *bus, int phy_addr, * @bus: points to the mii_bus structure * Description: reset the MII bus */ -int arc_mdio_reset(struct mii_bus *bus) +static int arc_mdio_reset(struct mii_bus *bus) { struct arc_emac_priv *priv = bus->priv; struct arc_emac_mdio_bus_data *data = &priv->bus_data; diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index 8fc93c5f6abc..6cac919272ea 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -76,11 +76,19 @@ enum alx_device_quirks { ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0), }; +#define ALX_FLAG_USING_MSIX BIT(0) +#define ALX_FLAG_USING_MSI BIT(1) + struct alx_priv { struct net_device *dev; struct alx_hw hw; + /* msi-x vectors */ + int num_vec; + struct msix_entry *msix_entries; + char irq_lbl[IFNAMSIZ + 8]; + /* all descriptor memory */ struct { dma_addr_t dma; @@ -105,7 +113,7 @@ struct alx_priv { u16 msg_enable; - bool msi; + int flags; /* protects hw.stats */ spinlock_t stats_lock; diff --git a/drivers/net/ethernet/atheros/alx/hw.c b/drivers/net/ethernet/atheros/alx/hw.c index 1fe35e453d43..6ac40b0003a3 100644 --- a/drivers/net/ethernet/atheros/alx/hw.c +++ b/drivers/net/ethernet/atheros/alx/hw.c @@ -1031,6 +1031,20 @@ void alx_configure_basic(struct alx_hw *hw) alx_write_mem32(hw, ALX_WRR, val); } +void alx_mask_msix(struct alx_hw *hw, int index, bool mask) +{ + u32 reg, val; + + reg = ALX_MSIX_ENTRY_BASE + index * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + + val = mask ? PCI_MSIX_ENTRY_CTRL_MASKBIT : 0; + + alx_write_mem32(hw, reg, val); + alx_post_write(hw); +} + + bool alx_get_phy_info(struct alx_hw *hw) { u16 devs1, devs2; diff --git a/drivers/net/ethernet/atheros/alx/hw.h b/drivers/net/ethernet/atheros/alx/hw.h index f289c05f5cb4..0191477ace51 100644 --- a/drivers/net/ethernet/atheros/alx/hw.h +++ b/drivers/net/ethernet/atheros/alx/hw.h @@ -562,6 +562,7 @@ int alx_reset_mac(struct alx_hw *hw); void alx_set_macaddr(struct alx_hw *hw, const u8 *addr); bool alx_phy_configured(struct alx_hw *hw); void alx_configure_basic(struct alx_hw *hw); +void alx_mask_msix(struct alx_hw *hw, int index, bool mask); void alx_disable_rss(struct alx_hw *hw); bool alx_get_phy_info(struct alx_hw *hw); void alx_update_hw_stats(struct alx_hw *hw); diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index 4eb17daefc4f..c0f84b73574d 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -51,6 +51,9 @@ const char alx_drv_name[] = "alx"; +static bool msix = false; +module_param(msix, bool, 0); +MODULE_PARM_DESC(msix, "Enable msi-x interrupt support"); static void alx_free_txbuf(struct alx_priv *alx, int entry) { @@ -292,32 +295,29 @@ static int alx_poll(struct napi_struct *napi, int budget) napi_complete(&alx->napi); /* enable interrupt */ - spin_lock_irqsave(&alx->irq_lock, flags); - alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; - alx_write_mem32(hw, ALX_IMR, alx->int_mask); - spin_unlock_irqrestore(&alx->irq_lock, flags); + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_mask_msix(hw, 1, false); + } else { + spin_lock_irqsave(&alx->irq_lock, flags); + alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); + spin_unlock_irqrestore(&alx->irq_lock, flags); + } alx_post_write(hw); return work; } -static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +static bool alx_intr_handle_misc(struct alx_priv *alx, u32 intr) { struct alx_hw *hw = &alx->hw; - bool write_int_mask = false; - - spin_lock(&alx->irq_lock); - - /* ACK interrupt */ - alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); - intr &= alx->int_mask; if (intr & ALX_ISR_FATAL) { netif_warn(alx, hw, alx->dev, "fatal interrupt 0x%x, resetting\n", intr); alx_schedule_reset(alx); - goto out; + return true; } if (intr & ALX_ISR_ALERT) @@ -329,19 +329,32 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) * is cleared, the interrupt status could be cleared. */ alx->int_mask &= ~ALX_ISR_PHY; - write_int_mask = true; + alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_schedule_link_check(alx); } + return false; +} + +static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) +{ + struct alx_hw *hw = &alx->hw; + + spin_lock(&alx->irq_lock); + + /* ACK interrupt */ + alx_write_mem32(hw, ALX_ISR, intr | ALX_ISR_DIS); + intr &= alx->int_mask; + + if (alx_intr_handle_misc(alx, intr)) + goto out; + if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) { napi_schedule(&alx->napi); /* mask rx/tx interrupt, enable them when napi complete */ alx->int_mask &= ~ALX_ISR_ALL_QUEUES; - write_int_mask = true; - } - - if (write_int_mask) alx_write_mem32(hw, ALX_IMR, alx->int_mask); + } alx_write_mem32(hw, ALX_ISR, 0); @@ -350,6 +363,46 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr) return IRQ_HANDLED; } +static irqreturn_t alx_intr_msix_ring(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 1, true); + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)); + + napi_schedule(&alx->napi); + + return IRQ_HANDLED; +} + +static irqreturn_t alx_intr_msix_misc(int irq, void *data) +{ + struct alx_priv *alx = data; + struct alx_hw *hw = &alx->hw; + u32 intr; + + /* mask interrupt to ACK chip */ + alx_mask_msix(hw, 0, true); + + /* read interrupt status */ + intr = alx_read_mem32(hw, ALX_ISR); + intr &= (alx->int_mask & ~ALX_ISR_ALL_QUEUES); + + if (alx_intr_handle_misc(alx, intr)) + return IRQ_HANDLED; + + /* clear interrupt status */ + alx_write_mem32(hw, ALX_ISR, intr); + + /* enable interrupt again */ + alx_mask_msix(hw, 0, false); + + return IRQ_HANDLED; +} + static irqreturn_t alx_intr_msi(int irq, void *data) { struct alx_priv *alx = data; @@ -614,31 +667,136 @@ static void alx_free_rings(struct alx_priv *alx) static void alx_config_vector_mapping(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + u32 tbl = 0; - alx_write_mem32(hw, ALX_MSI_MAP_TBL1, 0); + if (alx->flags & ALX_FLAG_USING_MSIX) { + tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT; + tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT; + } + + alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl); alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0); alx_write_mem32(hw, ALX_MSI_ID_MAP, 0); } +static bool alx_enable_msix(struct alx_priv *alx) +{ + int i, err, num_vec = 2; + + alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry), + GFP_KERNEL); + if (!alx->msix_entries) { + netdev_warn(alx->dev, "Allocation of msix entries failed!\n"); + return false; + } + + for (i = 0; i < num_vec; i++) + alx->msix_entries[i].entry = i; + + err = pci_enable_msix(alx->hw.pdev, alx->msix_entries, num_vec); + if (err) { + kfree(alx->msix_entries); + netdev_warn(alx->dev, "Enabling MSI-X interrupts failed!\n"); + return false; + } + + alx->num_vec = num_vec; + return true; +} + +static int alx_request_msix(struct alx_priv *alx) +{ + struct net_device *netdev = alx->dev; + int i, err, vector = 0, free_vector = 0; + + err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc, + 0, netdev->name, alx); + if (err) + goto out_err; + + vector++; + sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name); + + err = request_irq(alx->msix_entries[vector].vector, + alx_intr_msix_ring, 0, alx->irq_lbl, alx); + if (err) + goto out_free; + + return 0; + +out_free: + free_irq(alx->msix_entries[free_vector++].vector, alx); + + vector--; + for (i = 0; i < vector; i++) + free_irq(alx->msix_entries[free_vector++].vector, alx); + +out_err: + return err; +} + +static void alx_init_intr(struct alx_priv *alx, bool msix) +{ + if (msix) { + if (alx_enable_msix(alx)) + alx->flags |= ALX_FLAG_USING_MSIX; + } + + if (!(alx->flags & ALX_FLAG_USING_MSIX)) { + alx->num_vec = 1; + + if (!pci_enable_msi(alx->hw.pdev)) + alx->flags |= ALX_FLAG_USING_MSI; + } +} + +static void alx_disable_advanced_intr(struct alx_priv *alx) +{ + if (alx->flags & ALX_FLAG_USING_MSIX) { + kfree(alx->msix_entries); + pci_disable_msix(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSIX; + } + + if (alx->flags & ALX_FLAG_USING_MSI) { + pci_disable_msi(alx->hw.pdev); + alx->flags &= ~ALX_FLAG_USING_MSI; + } +} + static void alx_irq_enable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; /* level-1 interrupt switch */ alx_write_mem32(hw, ALX_ISR, 0); alx_write_mem32(hw, ALX_IMR, alx->int_mask); alx_post_write(hw); + + if (alx->flags & ALX_FLAG_USING_MSIX) + /* enable all msix irqs */ + for (i = 0; i < alx->num_vec; i++) + alx_mask_msix(hw, i, false); } static void alx_irq_disable(struct alx_priv *alx) { struct alx_hw *hw = &alx->hw; + int i; alx_write_mem32(hw, ALX_ISR, ALX_ISR_DIS); alx_write_mem32(hw, ALX_IMR, 0); alx_post_write(hw); - synchronize_irq(alx->hw.pdev->irq); + if (alx->flags & ALX_FLAG_USING_MSIX) { + for (i = 0; i < alx->num_vec; i++) { + alx_mask_msix(hw, i, true); + synchronize_irq(alx->msix_entries[i].vector); + } + } else { + synchronize_irq(alx->hw.pdev->irq); + } } static int alx_request_irq(struct alx_priv *alx) @@ -650,9 +808,18 @@ static int alx_request_irq(struct alx_priv *alx) msi_ctrl = (hw->imt >> 1) << ALX_MSI_RETRANS_TM_SHIFT; - if (!pci_enable_msi(alx->hw.pdev)) { - alx->msi = true; + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl); + err = alx_request_msix(alx); + if (!err) + goto out; + /* msix request failed, realloc resources */ + alx_disable_advanced_intr(alx); + alx_init_intr(alx, false); + } + + if (alx->flags & ALX_FLAG_USING_MSI) { alx_write_mem32(hw, ALX_MSI_RETRANS_TIMER, msi_ctrl | ALX_MSI_MASK_SEL_LINE); err = request_irq(pdev->irq, alx_intr_msi, 0, @@ -660,6 +827,7 @@ static int alx_request_irq(struct alx_priv *alx) if (!err) goto out; /* fall back to legacy interrupt */ + alx->flags &= ~ALX_FLAG_USING_MSI; pci_disable_msi(alx->hw.pdev); } @@ -669,19 +837,25 @@ static int alx_request_irq(struct alx_priv *alx) out: if (!err) alx_config_vector_mapping(alx); + else + netdev_err(alx->dev, "IRQ registration failed!\n"); return err; } static void alx_free_irq(struct alx_priv *alx) { struct pci_dev *pdev = alx->hw.pdev; + int i; - free_irq(pdev->irq, alx); - - if (alx->msi) { - pci_disable_msi(alx->hw.pdev); - alx->msi = false; + if (alx->flags & ALX_FLAG_USING_MSIX) { + /* we have only 2 vectors without multi queue support */ + for (i = 0; i < 2; i++) + free_irq(alx->msix_entries[i].vector, alx); + } else { + free_irq(pdev->irq, alx); } + + alx_disable_advanced_intr(alx); } static int alx_identify_hw(struct alx_priv *alx) @@ -847,12 +1021,14 @@ static int __alx_open(struct alx_priv *alx, bool resume) { int err; + alx_init_intr(alx, msix); + if (!resume) netif_carrier_off(alx->dev); err = alx_alloc_rings(alx); if (err) - return err; + goto out_disable_adv_intr; alx_configure(alx); @@ -873,6 +1049,8 @@ static int __alx_open(struct alx_priv *alx, bool resume) out_free_rings: alx_free_rings(alx); +out_disable_adv_intr: + alx_disable_advanced_intr(alx); return err; } @@ -993,6 +1171,18 @@ static void alx_reset(struct work_struct *work) rtnl_unlock(); } +static int alx_tpd_req(struct sk_buff *skb) +{ + int num; + + num = skb_shinfo(skb)->nr_frags + 1; + /* we need one extra descriptor for LSOv2 */ + if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) + num++; + + return num; +} + static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first) { u8 cso, css; @@ -1012,6 +1202,45 @@ static int alx_tx_csum(struct sk_buff *skb, struct alx_txd *first) return 0; } +static int alx_tso(struct sk_buff *skb, struct alx_txd *first) +{ + int err; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) + return 0; + + err = skb_cow_head(skb, 0); + if (err < 0) + return err; + + if (skb->protocol == htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + + iph->check = 0; + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + 0, IPPROTO_TCP, 0); + first->word1 |= 1 << TPD_IPV4_SHIFT; + } else if (skb_is_gso_v6(skb)) { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + /* LSOv2: the first TPD only provides the packet length */ + first->adrl.l.pkt_len = skb->len; + first->word1 |= 1 << TPD_LSO_V2_SHIFT; + } + + first->word1 |= 1 << TPD_LSO_EN_SHIFT; + first->word1 |= (skb_transport_offset(skb) & + TPD_L4HDROFFSET_MASK) << TPD_L4HDROFFSET_SHIFT; + first->word1 |= (skb_shinfo(skb)->gso_size & + TPD_MSS_MASK) << TPD_MSS_SHIFT; + return 1; +} + static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb) { struct alx_tx_queue *txq = &alx->txq; @@ -1022,6 +1251,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb) first_tpd = &txq->tpd[txq->write_idx]; tpd = first_tpd; + if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) { + if (++txq->write_idx == alx->tx_ringsz) + txq->write_idx = 0; + + tpd = &txq->tpd[txq->write_idx]; + tpd->len = first_tpd->len; + tpd->vlan_tag = first_tpd->vlan_tag; + tpd->word1 = first_tpd->word1; + } + maplen = skb_headlen(skb); dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen, DMA_TO_DEVICE); @@ -1082,9 +1321,9 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, struct alx_priv *alx = netdev_priv(netdev); struct alx_tx_queue *txq = &alx->txq; struct alx_txd *first; - int tpdreq = skb_shinfo(skb)->nr_frags + 1; + int tso; - if (alx_tpd_avail(alx) < tpdreq) { + if (alx_tpd_avail(alx) < alx_tpd_req(skb)) { netif_stop_queue(alx->dev); goto drop; } @@ -1092,7 +1331,10 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb, first = &txq->tpd[txq->write_idx]; memset(first, 0, sizeof(*first)); - if (alx_tx_csum(skb, first)) + tso = alx_tso(skb, first); + if (tso < 0) + goto drop; + else if (!tso && alx_tx_csum(skb, first)) goto drop; if (alx_map_tx_skb(alx, skb) < 0) @@ -1172,7 +1414,10 @@ static void alx_poll_controller(struct net_device *netdev) { struct alx_priv *alx = netdev_priv(netdev); - if (alx->msi) + if (alx->flags & ALX_FLAG_USING_MSIX) { + alx_intr_msix_misc(0, alx); + alx_intr_msix_ring(0, alx); + } else if (alx->flags & ALX_FLAG_USING_MSI) alx_intr_msi(0, alx); else alx_intr_legacy(0, alx); @@ -1351,7 +1596,10 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } - netdev->hw_features = NETIF_F_SG | NETIF_F_HW_CSUM; + netdev->hw_features = NETIF_F_SG | + NETIF_F_HW_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6; if (alx_get_perm_macaddr(hw, hw->perm_addr)) { dev_warn(&pdev->dev, diff --git a/drivers/net/ethernet/broadcom/b44.c b/drivers/net/ethernet/broadcom/b44.c index 74f0a37c4eb6..17aa33c5567d 100644 --- a/drivers/net/ethernet/broadcom/b44.c +++ b/drivers/net/ethernet/broadcom/b44.c @@ -1486,7 +1486,7 @@ static int b44_open(struct net_device *dev) b44_enable_ints(bp); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_start(bp->phydev); + phy_start(dev->phydev); netif_start_queue(dev); out: @@ -1651,7 +1651,7 @@ static int b44_close(struct net_device *dev) netif_stop_queue(dev); if (bp->flags & B44_FLAG_EXTERNAL_PHY) - phy_stop(bp->phydev); + phy_stop(dev->phydev); napi_disable(&bp->napi); @@ -1832,90 +1832,100 @@ static int b44_nway_reset(struct net_device *dev) return r; } -static int b44_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); + u32 supported, advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - return phy_ethtool_gset(bp->phydev, cmd); + BUG_ON(!dev->phydev); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_MII); + supported = (SUPPORTED_Autoneg); + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_MII); - cmd->advertising = 0; + advertising = 0; if (bp->flags & B44_FLAG_ADV_10HALF) - cmd->advertising |= ADVERTISED_10baseT_Half; + advertising |= ADVERTISED_10baseT_Half; if (bp->flags & B44_FLAG_ADV_10FULL) - cmd->advertising |= ADVERTISED_10baseT_Full; + advertising |= ADVERTISED_10baseT_Full; if (bp->flags & B44_FLAG_ADV_100HALF) - cmd->advertising |= ADVERTISED_100baseT_Half; + advertising |= ADVERTISED_100baseT_Half; if (bp->flags & B44_FLAG_ADV_100FULL) - cmd->advertising |= ADVERTISED_100baseT_Full; - cmd->advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; - ethtool_cmd_speed_set(cmd, ((bp->flags & B44_FLAG_100_BASE_T) ? - SPEED_100 : SPEED_10)); - cmd->duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? + advertising |= ADVERTISED_100baseT_Full; + advertising |= ADVERTISED_Pause | ADVERTISED_Asym_Pause; + cmd->base.speed = (bp->flags & B44_FLAG_100_BASE_T) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (bp->flags & B44_FLAG_FULL_DUPLEX) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->port = 0; - cmd->phy_address = bp->phy_addr; - cmd->transceiver = (bp->flags & B44_FLAG_EXTERNAL_PHY) ? - XCVR_EXTERNAL : XCVR_INTERNAL; - cmd->autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? + cmd->base.port = 0; + cmd->base.phy_address = bp->phy_addr; + cmd->base.autoneg = (bp->flags & B44_FLAG_FORCE_LINK) ? AUTONEG_DISABLE : AUTONEG_ENABLE; - if (cmd->autoneg == AUTONEG_ENABLE) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) + advertising |= ADVERTISED_Autoneg; + + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (!netif_running(dev)){ - ethtool_cmd_speed_set(cmd, 0); - cmd->duplex = 0xff; + cmd->base.speed = 0; + cmd->base.duplex = 0xff; } - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + return 0; } -static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int b44_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct b44 *bp = netdev_priv(dev); u32 speed; int ret; + u32 advertising; if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); + BUG_ON(!dev->phydev); spin_lock_irq(&bp->lock); if (netif_running(dev)) b44_setup_phy(bp); - ret = phy_ethtool_sset(bp->phydev, cmd); + ret = phy_ethtool_ksettings_set(dev->phydev, cmd); spin_unlock_irq(&bp->lock); return ret; } - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; + + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); /* We do not support gigabit. */ - if (cmd->autoneg == AUTONEG_ENABLE) { - if (cmd->advertising & + if (cmd->base.autoneg == AUTONEG_ENABLE) { + if (advertising & (ADVERTISED_1000baseT_Half | ADVERTISED_1000baseT_Full)) return -EINVAL; } else if ((speed != SPEED_100 && speed != SPEED_10) || - (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) { + (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) { return -EINVAL; } spin_lock_irq(&bp->lock); - if (cmd->autoneg == AUTONEG_ENABLE) { + if (cmd->base.autoneg == AUTONEG_ENABLE) { bp->flags &= ~(B44_FLAG_FORCE_LINK | B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX | @@ -1923,19 +1933,19 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); - if (cmd->advertising == 0) { + if (advertising == 0) { bp->flags |= (B44_FLAG_ADV_10HALF | B44_FLAG_ADV_10FULL | B44_FLAG_ADV_100HALF | B44_FLAG_ADV_100FULL); } else { - if (cmd->advertising & ADVERTISED_10baseT_Half) + if (advertising & ADVERTISED_10baseT_Half) bp->flags |= B44_FLAG_ADV_10HALF; - if (cmd->advertising & ADVERTISED_10baseT_Full) + if (advertising & ADVERTISED_10baseT_Full) bp->flags |= B44_FLAG_ADV_10FULL; - if (cmd->advertising & ADVERTISED_100baseT_Half) + if (advertising & ADVERTISED_100baseT_Half) bp->flags |= B44_FLAG_ADV_100HALF; - if (cmd->advertising & ADVERTISED_100baseT_Full) + if (advertising & ADVERTISED_100baseT_Full) bp->flags |= B44_FLAG_ADV_100FULL; } } else { @@ -1943,7 +1953,7 @@ static int b44_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) bp->flags &= ~(B44_FLAG_100_BASE_T | B44_FLAG_FULL_DUPLEX); if (speed == SPEED_100) bp->flags |= B44_FLAG_100_BASE_T; - if (cmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) bp->flags |= B44_FLAG_FULL_DUPLEX; } @@ -2110,8 +2120,6 @@ static int b44_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) static const struct ethtool_ops b44_ethtool_ops = { .get_drvinfo = b44_get_drvinfo, - .get_settings = b44_get_settings, - .set_settings = b44_set_settings, .nway_reset = b44_nway_reset, .get_link = ethtool_op_get_link, .get_wol = b44_get_wol, @@ -2125,6 +2133,8 @@ static const struct ethtool_ops b44_ethtool_ops = { .get_strings = b44_get_strings, .get_sset_count = b44_get_sset_count, .get_ethtool_stats = b44_get_ethtool_stats, + .get_link_ksettings = b44_get_link_ksettings, + .set_link_ksettings = b44_set_link_ksettings, }; static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) @@ -2137,8 +2147,8 @@ static int b44_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) spin_lock_irq(&bp->lock); if (bp->flags & B44_FLAG_EXTERNAL_PHY) { - BUG_ON(!bp->phydev); - err = phy_mii_ioctl(bp->phydev, ifr, cmd); + BUG_ON(!dev->phydev); + err = phy_mii_ioctl(dev->phydev, ifr, cmd); } else { err = generic_mii_ioctl(&bp->mii_if, if_mii(ifr), cmd, NULL); } @@ -2206,7 +2216,7 @@ static const struct net_device_ops b44_netdev_ops = { static void b44_adjust_link(struct net_device *dev) { struct b44 *bp = netdev_priv(dev); - struct phy_device *phydev = bp->phydev; + struct phy_device *phydev = dev->phydev; bool status_changed = 0; BUG_ON(!phydev); @@ -2303,7 +2313,6 @@ static int b44_register_phy_one(struct b44 *bp) SUPPORTED_MII); phydev->advertising = phydev->supported; - bp->phydev = phydev; bp->old_link = 0; bp->phy_addr = phydev->mdio.addr; @@ -2323,9 +2332,10 @@ err_out: static void b44_unregister_phy_one(struct b44 *bp) { + struct net_device *dev = bp->dev; struct mii_bus *mii_bus = bp->mii_bus; - phy_disconnect(bp->phydev); + phy_disconnect(dev->phydev); mdiobus_unregister(mii_bus); mdiobus_free(mii_bus); } diff --git a/drivers/net/ethernet/broadcom/b44.h b/drivers/net/ethernet/broadcom/b44.h index 65d88d7c5581..89d2cf341163 100644 --- a/drivers/net/ethernet/broadcom/b44.h +++ b/drivers/net/ethernet/broadcom/b44.h @@ -404,7 +404,6 @@ struct b44 { u32 tx_pending; u8 phy_addr; u8 force_copybreak; - struct phy_device *phydev; struct mii_bus *mii_bus; int old_link; struct mii_if_info mii_if; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.c b/drivers/net/ethernet/broadcom/bcm63xx_enet.c index 6c8bc5fadac7..ae364c74baf3 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.c +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.c @@ -791,7 +791,7 @@ static void bcm_enet_adjust_phy_link(struct net_device *dev) int status_changed; priv = netdev_priv(dev); - phydev = priv->phydev; + phydev = dev->phydev; status_changed = 0; if (priv->old_link != phydev->link) { @@ -913,7 +913,6 @@ static int bcm_enet_open(struct net_device *dev) priv->old_link = 0; priv->old_duplex = -1; priv->old_pause = -1; - priv->phydev = phydev; } /* mask all interrupts and request them */ @@ -1085,7 +1084,7 @@ static int bcm_enet_open(struct net_device *dev) ENETDMAC_IRMASK, priv->tx_chan); if (priv->has_phy) - phy_start(priv->phydev); + phy_start(phydev); else bcm_enet_adjust_link(dev); @@ -1127,7 +1126,7 @@ out_freeirq: free_irq(dev->irq, dev); out_phy_disconnect: - phy_disconnect(priv->phydev); + phy_disconnect(phydev); return ret; } @@ -1190,7 +1189,7 @@ static int bcm_enet_stop(struct net_device *dev) netif_stop_queue(dev); napi_disable(&priv->napi); if (priv->has_phy) - phy_stop(priv->phydev); + phy_stop(dev->phydev); del_timer_sync(&priv->rx_timeout); /* mask all interrupts */ @@ -1234,10 +1233,8 @@ static int bcm_enet_stop(struct net_device *dev) free_irq(dev->irq, dev); /* release phy */ - if (priv->has_phy) { - phy_disconnect(priv->phydev); - priv->phydev = NULL; - } + if (priv->has_phy) + phy_disconnect(dev->phydev); return 0; } @@ -1437,64 +1434,68 @@ static int bcm_enet_nway_reset(struct net_device *dev) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return genphy_restart_aneg(priv->phydev); + return genphy_restart_aneg(dev->phydev); } return -EOPNOTSUPP; } -static int bcm_enet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; + u32 supported, advertising; priv = netdev_priv(dev); - cmd->maxrxpkt = 0; - cmd->maxtxpkt = 0; - if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_ksettings_get(dev->phydev, cmd); } else { - cmd->autoneg = 0; - ethtool_cmd_speed_set(cmd, ((priv->force_speed_100) - ? SPEED_100 : SPEED_10)); - cmd->duplex = (priv->force_duplex_full) ? + cmd->base.autoneg = 0; + cmd->base.speed = (priv->force_speed_100) ? + SPEED_100 : SPEED_10; + cmd->base.duplex = (priv->force_duplex_full) ? DUPLEX_FULL : DUPLEX_HALF; - cmd->supported = ADVERTISED_10baseT_Half | + supported = ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full | ADVERTISED_100baseT_Half | ADVERTISED_100baseT_Full; - cmd->advertising = 0; - cmd->port = PORT_MII; - cmd->transceiver = XCVR_EXTERNAL; + advertising = 0; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.supported, supported); + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.advertising, advertising); + cmd->base.port = PORT_MII; } return 0; } -static int bcm_enet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcm_enet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcm_enet_priv *priv; priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_ksettings_set(dev->phydev, cmd); } else { - if (cmd->autoneg || - (cmd->speed != SPEED_100 && cmd->speed != SPEED_10) || - cmd->port != PORT_MII) + if (cmd->base.autoneg || + (cmd->base.speed != SPEED_100 && + cmd->base.speed != SPEED_10) || + cmd->base.port != PORT_MII) return -EINVAL; - priv->force_speed_100 = (cmd->speed == SPEED_100) ? 1 : 0; - priv->force_duplex_full = (cmd->duplex == DUPLEX_FULL) ? 1 : 0; + priv->force_speed_100 = + (cmd->base.speed == SPEED_100) ? 1 : 0; + priv->force_duplex_full = + (cmd->base.duplex == DUPLEX_FULL) ? 1 : 0; if (netif_running(dev)) bcm_enet_adjust_link(dev); @@ -1588,14 +1589,14 @@ static const struct ethtool_ops bcm_enet_ethtool_ops = { .get_sset_count = bcm_enet_get_sset_count, .get_ethtool_stats = bcm_enet_get_ethtool_stats, .nway_reset = bcm_enet_nway_reset, - .get_settings = bcm_enet_get_settings, - .set_settings = bcm_enet_set_settings, .get_drvinfo = bcm_enet_get_drvinfo, .get_link = ethtool_op_get_link, .get_ringparam = bcm_enet_get_ringparam, .set_ringparam = bcm_enet_set_ringparam, .get_pauseparam = bcm_enet_get_pauseparam, .set_pauseparam = bcm_enet_set_pauseparam, + .get_link_ksettings = bcm_enet_get_link_ksettings, + .set_link_ksettings = bcm_enet_set_link_ksettings, }; static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -1604,9 +1605,9 @@ static int bcm_enet_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) priv = netdev_priv(dev); if (priv->has_phy) { - if (!priv->phydev) + if (!dev->phydev) return -ENODEV; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } else { struct mii_if_info mii; diff --git a/drivers/net/ethernet/broadcom/bcm63xx_enet.h b/drivers/net/ethernet/broadcom/bcm63xx_enet.h index f55af4310085..0a1b7b2e55bd 100644 --- a/drivers/net/ethernet/broadcom/bcm63xx_enet.h +++ b/drivers/net/ethernet/broadcom/bcm63xx_enet.h @@ -290,7 +290,6 @@ struct bcm_enet_priv { /* used when a phy is connected (phylib used) */ struct mii_bus *mii_bus; - struct phy_device *phydev; int old_link; int old_duplex; int old_pause; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.c b/drivers/net/ethernet/broadcom/bcmsysport.c index b2d30863caeb..c3354b9941d1 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.c +++ b/drivers/net/ethernet/broadcom/bcmsysport.c @@ -58,8 +58,8 @@ BCM_SYSPORT_IO_MACRO(topctrl, SYS_PORT_TOPCTRL_OFFSET); static inline void intrl2_##which##_mask_clear(struct bcm_sysport_priv *priv, \ u32 mask) \ { \ - intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ priv->irq##which##_mask &= ~(mask); \ + intrl2_##which##_writel(priv, mask, INTRL2_CPU_MASK_CLEAR); \ } \ static inline void intrl2_##which##_mask_set(struct bcm_sysport_priv *priv, \ u32 mask) \ @@ -1692,7 +1692,7 @@ static int bcm_sysport_stop(struct net_device *dev) return 0; } -static struct ethtool_ops bcm_sysport_ethtool_ops = { +static const struct ethtool_ops bcm_sysport_ethtool_ops = { .get_drvinfo = bcm_sysport_get_drvinfo, .get_msglevel = bcm_sysport_get_msglvl, .set_msglevel = bcm_sysport_set_msglvl, diff --git a/drivers/net/ethernet/broadcom/bgmac-bcma.c b/drivers/net/ethernet/broadcom/bgmac-bcma.c index 625235db644f..c16ec3a51876 100644 --- a/drivers/net/ethernet/broadcom/bgmac-bcma.c +++ b/drivers/net/ethernet/broadcom/bgmac-bcma.c @@ -92,6 +92,7 @@ MODULE_DEVICE_TABLE(bcma, bgmac_bcma_tbl); /* http://bcm-v4.sipsolutions.net/mac-gbit/gmac/chipattach */ static int bgmac_probe(struct bcma_device *core) { + struct bcma_chipinfo *ci = &core->bus->chipinfo; struct ssb_sprom *sprom = &core->bus->sprom; struct mii_bus *mii_bus; struct bgmac *bgmac; @@ -157,7 +158,8 @@ static int bgmac_probe(struct bcma_device *core) dev_info(bgmac->dev, "Found PHY addr: %d%s\n", bgmac->phyaddr, bgmac->phyaddr == BGMAC_PHY_NOREGS ? " (NOREGS)" : ""); - if (!bgmac_is_bcm4707_family(core)) { + if (!bgmac_is_bcm4707_family(core) && + !(ci->id == BCMA_CHIP_ID_BCM53573 && core->core_unit == 1)) { mii_bus = bcma_mdio_mii_register(core, bgmac->phyaddr); if (IS_ERR(mii_bus)) { err = PTR_ERR(mii_bus); @@ -230,6 +232,21 @@ static int bgmac_probe(struct bcma_device *core) bgmac->feature_flags |= BGMAC_FEAT_NO_RESET; bgmac->feature_flags |= BGMAC_FEAT_FORCE_SPEED_2500; break; + case BCMA_CHIP_ID_BCM53573: + bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; + bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK; + if (ci->pkg == BCMA_PKG_ID_BCM47189) + bgmac->feature_flags |= BGMAC_FEAT_IOST_ATTACHED; + if (core->core_unit == 0) { + bgmac->feature_flags |= BGMAC_FEAT_CC4_IF_SW_TYPE; + if (ci->pkg == BCMA_PKG_ID_BCM47189) + bgmac->feature_flags |= + BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII; + } else if (core->core_unit == 1) { + bgmac->feature_flags |= BGMAC_FEAT_IRQ_ID_OOB_6; + bgmac->feature_flags |= BGMAC_FEAT_CC7_IF_TYPE_RGMII; + } + break; default: bgmac->feature_flags |= BGMAC_FEAT_CLKCTLST; bgmac->feature_flags |= BGMAC_FEAT_SET_RXQ_CLK; diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index c4751ece76f6..6ea0e5ff1e44 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -932,7 +932,8 @@ static void bgmac_chip_reset(struct bgmac *bgmac) et_swtype <<= 4; sw_type = et_swtype; } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_EPHYRMII) { - sw_type = BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII; + sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RMII | + BGMAC_CHIPCTL_1_SW_TYPE_EPHYRMII; } else if (bgmac->feature_flags & BGMAC_FEAT_SW_TYPE_RGMII) { sw_type = BGMAC_CHIPCTL_1_IF_TYPE_RGMII | BGMAC_CHIPCTL_1_SW_TYPE_RGMII; @@ -940,6 +941,27 @@ static void bgmac_chip_reset(struct bgmac *bgmac) bgmac_cco_ctl_maskset(bgmac, 1, ~(BGMAC_CHIPCTL_1_IF_TYPE_MASK | BGMAC_CHIPCTL_1_SW_TYPE_MASK), sw_type); + } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE) { + u32 sw_type = BGMAC_CHIPCTL_4_IF_TYPE_MII | + BGMAC_CHIPCTL_4_SW_TYPE_EPHY; + u8 et_swtype = 0; + char buf[4]; + + if (bcm47xx_nvram_getenv("et_swtype", buf, sizeof(buf)) > 0) { + if (kstrtou8(buf, 0, &et_swtype)) + dev_err(bgmac->dev, "Failed to parse et_swtype (%s)\n", + buf); + sw_type = (et_swtype & 0x0f) << 12; + } else if (bgmac->feature_flags & BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII) { + sw_type = BGMAC_CHIPCTL_4_IF_TYPE_RGMII | + BGMAC_CHIPCTL_4_SW_TYPE_RGMII; + } + bgmac_cco_ctl_maskset(bgmac, 4, ~(BGMAC_CHIPCTL_4_IF_TYPE_MASK | + BGMAC_CHIPCTL_4_SW_TYPE_MASK), + sw_type); + } else if (bgmac->feature_flags & BGMAC_FEAT_CC7_IF_TYPE_RGMII) { + bgmac_cco_ctl_maskset(bgmac, 7, ~BGMAC_CHIPCTL_7_IF_TYPE_MASK, + BGMAC_CHIPCTL_7_IF_TYPE_RGMII); } if (iost & BGMAC_BCMA_IOST_ATTACHED && !bgmac->has_robosw) @@ -1467,6 +1489,10 @@ int bgmac_enet_probe(struct bgmac *info) */ bgmac_clk_enable(bgmac, 0); + /* This seems to be fixing IRQ by assigning OOB #6 to the core */ + if (bgmac->feature_flags & BGMAC_FEAT_IRQ_ID_OOB_6) + bgmac_idm_write(bgmac, BCMA_OOB_SEL_OUT_A30, 0x86); + bgmac_chip_reset(bgmac); err = bgmac_dma_alloc(bgmac); diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h index 24a250267b88..80836b4c9f38 100644 --- a/drivers/net/ethernet/broadcom/bgmac.h +++ b/drivers/net/ethernet/broadcom/bgmac.h @@ -369,6 +369,21 @@ #define BGMAC_CHIPCTL_1_SW_TYPE_RGMII 0x000000C0 #define BGMAC_CHIPCTL_1_RXC_DLL_BYPASS 0x00010000 +#define BGMAC_CHIPCTL_4_IF_TYPE_MASK 0x00003000 +#define BGMAC_CHIPCTL_4_IF_TYPE_RMII 0x00000000 +#define BGMAC_CHIPCTL_4_IF_TYPE_MII 0x00001000 +#define BGMAC_CHIPCTL_4_IF_TYPE_RGMII 0x00002000 +#define BGMAC_CHIPCTL_4_SW_TYPE_MASK 0x0000C000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHY 0x00000000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYMII 0x00004000 +#define BGMAC_CHIPCTL_4_SW_TYPE_EPHYRMII 0x00008000 +#define BGMAC_CHIPCTL_4_SW_TYPE_RGMII 0x0000C000 + +#define BGMAC_CHIPCTL_7_IF_TYPE_MASK 0x000000C0 +#define BGMAC_CHIPCTL_7_IF_TYPE_RMII 0x00000000 +#define BGMAC_CHIPCTL_7_IF_TYPE_MII 0x00000040 +#define BGMAC_CHIPCTL_7_IF_TYPE_RGMII 0x00000080 + #define BGMAC_WEIGHT 64 #define ETHER_MAX_LEN 1518 @@ -390,6 +405,10 @@ #define BGMAC_FEAT_NO_CLR_MIB BIT(13) #define BGMAC_FEAT_FORCE_SPEED_2500 BIT(14) #define BGMAC_FEAT_CMDCFG_SR_REV4 BIT(15) +#define BGMAC_FEAT_IRQ_ID_OOB_6 BIT(16) +#define BGMAC_FEAT_CC4_IF_SW_TYPE BIT(17) +#define BGMAC_FEAT_CC4_IF_SW_TYPE_RGMII BIT(18) +#define BGMAC_FEAT_CC7_IF_TYPE_RGMII BIT(19) struct bgmac_slot_info { union { diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c index 505ceaf451e2..27f11a5d5fe2 100644 --- a/drivers/net/ethernet/broadcom/bnx2.c +++ b/drivers/net/ethernet/broadcom/bnx2.c @@ -50,7 +50,7 @@ #include #include -#if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE) +#if IS_ENABLED(CONFIG_CNIC) #define BCM_CNIC 1 #include "cnic_if.h" #endif diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h index 0e68fadecfdb..243cb9748d35 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h @@ -492,7 +492,8 @@ int __bnx2x_setup_tc(struct net_device *dev, u32 handle, __be16 proto, int bnx2x_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi); int bnx2x_set_vf_mac(struct net_device *dev, int queue, u8 *mac); -int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos); +int bnx2x_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto); /* select_queue callback */ u16 bnx2x_select_queue(struct net_device *dev, struct sk_buff *skb, diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c index fa3386bb14f7..20fe6a8c35c1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c @@ -12563,39 +12563,62 @@ static int bnx2x_close(struct net_device *dev) return 0; } -static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, - struct bnx2x_mcast_ramrod_params *p) +struct bnx2x_mcast_list_elem_group { - int mc_count = netdev_mc_count(bp->dev); - struct bnx2x_mcast_list_elem *mc_mac = - kcalloc(mc_count, sizeof(*mc_mac), GFP_ATOMIC); - struct netdev_hw_addr *ha; + struct list_head mcast_group_link; + struct bnx2x_mcast_list_elem mcast_elems[]; +}; - if (!mc_mac) - return -ENOMEM; +#define MCAST_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_list_elem_group)) / \ + sizeof(struct bnx2x_mcast_list_elem)) - INIT_LIST_HEAD(&p->mcast_list); +static void bnx2x_free_mcast_macs_list(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_list_elem_group *current_mcast_group; - netdev_for_each_mc_addr(ha, bp->dev) { - mc_mac->mac = bnx2x_mc_addr(ha); - list_add_tail(&mc_mac->link, &p->mcast_list); - mc_mac++; + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_list_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); } - - p->mcast_list_len = mc_count; - - return 0; } -static void bnx2x_free_mcast_macs_list( - struct bnx2x_mcast_ramrod_params *p) +static int bnx2x_init_mcast_macs_list(struct bnx2x *bp, + struct bnx2x_mcast_ramrod_params *p, + struct list_head *mcast_group_list) { - struct bnx2x_mcast_list_elem *mc_mac = - list_first_entry(&p->mcast_list, struct bnx2x_mcast_list_elem, - link); + struct bnx2x_mcast_list_elem *mc_mac; + struct netdev_hw_addr *ha; + struct bnx2x_mcast_list_elem_group *current_mcast_group = NULL; + int mc_count = netdev_mc_count(bp->dev); + int offset = 0; - WARN_ON(!mc_mac); - kfree(mc_mac); + INIT_LIST_HEAD(&p->mcast_list); + netdev_for_each_mc_addr(ha, bp->dev) { + if (!offset) { + current_mcast_group = + (struct bnx2x_mcast_list_elem_group *) + __get_free_page(GFP_ATOMIC); + if (!current_mcast_group) { + bnx2x_free_mcast_macs_list(mcast_group_list); + BNX2X_ERR("Failed to allocate mc MAC list\n"); + return -ENOMEM; + } + list_add(¤t_mcast_group->mcast_group_link, + mcast_group_list); + } + mc_mac = ¤t_mcast_group->mcast_elems[offset]; + mc_mac->mac = bnx2x_mc_addr(ha); + list_add_tail(&mc_mac->link, &p->mcast_list); + offset++; + if (offset == MCAST_ELEMS_PER_PG) + offset = 0; + } + p->mcast_list_len = mc_count; + return 0; } /** @@ -12643,8 +12666,9 @@ static int bnx2x_set_uc_list(struct bnx2x *bp) BNX2X_UC_LIST_MAC, &ramrod_flags); } -static int bnx2x_set_mc_list(struct bnx2x *bp) +static int bnx2x_set_mc_list_e1x(struct bnx2x *bp) { + LIST_HEAD(mcast_group_list); struct net_device *dev = bp->dev; struct bnx2x_mcast_ramrod_params rparam = {NULL}; int rc = 0; @@ -12660,12 +12684,9 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) /* then, configure a new MACs list */ if (netdev_mc_count(dev)) { - rc = bnx2x_init_mcast_macs_list(bp, &rparam); - if (rc) { - BNX2X_ERR("Failed to create multicast MACs list: %d\n", - rc); + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); + if (rc) return rc; - } /* Now add the new MACs */ rc = bnx2x_config_mcast(bp, &rparam, @@ -12674,7 +12695,44 @@ static int bnx2x_set_mc_list(struct bnx2x *bp) BNX2X_ERR("Failed to set a new multicast configuration: %d\n", rc); - bnx2x_free_mcast_macs_list(&rparam); + bnx2x_free_mcast_macs_list(&mcast_group_list); + } + + return rc; +} + +static int bnx2x_set_mc_list(struct bnx2x *bp) +{ + LIST_HEAD(mcast_group_list); + struct bnx2x_mcast_ramrod_params rparam = {NULL}; + struct net_device *dev = bp->dev; + int rc = 0; + + /* On older adapters, we need to flush and re-add filters */ + if (CHIP_IS_E1x(bp)) + return bnx2x_set_mc_list_e1x(bp); + + rparam.mcast_obj = &bp->mcast_obj; + + if (netdev_mc_count(dev)) { + rc = bnx2x_init_mcast_macs_list(bp, &rparam, &mcast_group_list); + if (rc) + return rc; + + /* Override the curently configured set of mc filters */ + rc = bnx2x_config_mcast(bp, &rparam, + BNX2X_MCAST_CMD_SET); + if (rc < 0) + BNX2X_ERR("Failed to set a new multicast configuration: %d\n", + rc); + + bnx2x_free_mcast_macs_list(&mcast_group_list); + } else { + /* If no mc addresses are required, flush the configuration */ + rc = bnx2x_config_mcast(bp, &rparam, BNX2X_MCAST_CMD_DEL); + if (rc) + BNX2X_ERR("Failed to clear multicast configuration %d\n", + rc); } return rc; @@ -13214,13 +13272,22 @@ static int bnx2x_init_dev(struct bnx2x *bp, struct pci_dev *pdev, NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO | NETIF_F_RXHASH | NETIF_F_HW_VLAN_CTAG_TX; if (!chip_is_e1x) { - dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL | - NETIF_F_GSO_IPXIP4; + dev->hw_features |= NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_IPXIP4 | + NETIF_F_GSO_UDP_TUNNEL | + NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GSO_IPXIP4 | - NETIF_F_GSO_GRE | NETIF_F_GSO_UDP_TUNNEL; + NETIF_F_GSO_GRE | NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_UDP_TUNNEL_CSUM | + NETIF_F_GSO_PARTIAL; + + dev->gso_partial_features = NETIF_F_GSO_GRE_CSUM | + NETIF_F_GSO_UDP_TUNNEL_CSUM; } dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c index ff702a707a91..cea6bdcde33f 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c @@ -2600,8 +2600,29 @@ struct bnx2x_mcast_mac_elem { u8 pad[2]; /* For a natural alignment of the following buffer */ }; +struct bnx2x_mcast_bin_elem { + struct list_head link; + int bin; + int type; /* BNX2X_MCAST_CMD_SET_{ADD, DEL} */ +}; + +union bnx2x_mcast_elem { + struct bnx2x_mcast_bin_elem bin_elem; + struct bnx2x_mcast_mac_elem mac_elem; +}; + +struct bnx2x_mcast_elem_group { + struct list_head mcast_group_link; + union bnx2x_mcast_elem mcast_elems[]; +}; + +#define MCAST_MAC_ELEMS_PER_PG \ + ((PAGE_SIZE - sizeof(struct bnx2x_mcast_elem_group)) / \ + sizeof(union bnx2x_mcast_elem)) + struct bnx2x_pending_mcast_cmd { struct list_head link; + struct list_head group_head; int type; /* BNX2X_MCAST_CMD_X */ union { struct list_head macs_head; @@ -2609,6 +2630,11 @@ struct bnx2x_pending_mcast_cmd { int next_bin; /* Needed for RESTORE flow with aprox match */ } data; + bool set_convert; /* in case type == BNX2X_MCAST_CMD_SET, this is set + * when macs_head had been converted to a list of + * bnx2x_mcast_bin_elem. + */ + bool done; /* set to true, when the command has been handled, * practically used in 57712 handling only, where one pending * command may be handled in a few operations. As long as for @@ -2627,53 +2653,93 @@ static int bnx2x_mcast_wait(struct bnx2x *bp, return 0; } +static void bnx2x_free_groups(struct list_head *mcast_group_list) +{ + struct bnx2x_mcast_elem_group *current_mcast_group; + + while (!list_empty(mcast_group_list)) { + current_mcast_group = list_first_entry(mcast_group_list, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_del(¤t_mcast_group->mcast_group_link); + free_page((unsigned long)current_mcast_group); + } +} + static int bnx2x_mcast_enqueue_cmd(struct bnx2x *bp, struct bnx2x_mcast_obj *o, struct bnx2x_mcast_ramrod_params *p, enum bnx2x_mcast_cmd cmd) { - int total_sz; struct bnx2x_pending_mcast_cmd *new_cmd; - struct bnx2x_mcast_mac_elem *cur_mac = NULL; struct bnx2x_mcast_list_elem *pos; - int macs_list_len = ((cmd == BNX2X_MCAST_CMD_ADD) ? - p->mcast_list_len : 0); + struct bnx2x_mcast_elem_group *elem_group; + struct bnx2x_mcast_mac_elem *mac_elem; + int total_elems = 0, macs_list_len = 0, offset = 0; + + /* When adding MACs we'll need to store their values */ + if (cmd == BNX2X_MCAST_CMD_ADD || cmd == BNX2X_MCAST_CMD_SET) + macs_list_len = p->mcast_list_len; /* If the command is empty ("handle pending commands only"), break */ if (!p->mcast_list_len) return 0; - total_sz = sizeof(*new_cmd) + - macs_list_len * sizeof(struct bnx2x_mcast_mac_elem); - /* Add mcast is called under spin_lock, thus calling with GFP_ATOMIC */ - new_cmd = kzalloc(total_sz, GFP_ATOMIC); - + new_cmd = kzalloc(sizeof(*new_cmd), GFP_ATOMIC); if (!new_cmd) return -ENOMEM; + INIT_LIST_HEAD(&new_cmd->data.macs_head); + INIT_LIST_HEAD(&new_cmd->group_head); + new_cmd->type = cmd; + new_cmd->done = false; + DP(BNX2X_MSG_SP, "About to enqueue a new %d command. macs_list_len=%d\n", cmd, macs_list_len); - INIT_LIST_HEAD(&new_cmd->data.macs_head); - - new_cmd->type = cmd; - new_cmd->done = false; - switch (cmd) { case BNX2X_MCAST_CMD_ADD: - cur_mac = (struct bnx2x_mcast_mac_elem *) - ((u8 *)new_cmd + sizeof(*new_cmd)); - - /* Push the MACs of the current command into the pending command - * MACs list: FIFO + case BNX2X_MCAST_CMD_SET: + /* For a set command, we need to allocate sufficient memory for + * all the bins, since we can't analyze at this point how much + * memory would be required. */ - list_for_each_entry(pos, &p->mcast_list, link) { - memcpy(cur_mac->mac, pos->mac, ETH_ALEN); - list_add_tail(&cur_mac->link, &new_cmd->data.macs_head); - cur_mac++; + total_elems = macs_list_len; + if (cmd == BNX2X_MCAST_CMD_SET) { + if (total_elems < BNX2X_MCAST_BINS_NUM) + total_elems = BNX2X_MCAST_BINS_NUM; + } + while (total_elems > 0) { + elem_group = (struct bnx2x_mcast_elem_group *) + __get_free_page(GFP_ATOMIC | __GFP_ZERO); + if (!elem_group) { + bnx2x_free_groups(&new_cmd->group_head); + kfree(new_cmd); + return -ENOMEM; + } + total_elems -= MCAST_MAC_ELEMS_PER_PG; + list_add_tail(&elem_group->mcast_group_link, + &new_cmd->group_head); + } + elem_group = list_first_entry(&new_cmd->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); + list_for_each_entry(pos, &p->mcast_list, link) { + mac_elem = &elem_group->mcast_elems[offset].mac_elem; + memcpy(mac_elem->mac, pos->mac, ETH_ALEN); + /* Push the MACs of the current command into the pending + * command MACs list: FIFO + */ + list_add_tail(&mac_elem->link, + &new_cmd->data.macs_head); + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } } - break; case BNX2X_MCAST_CMD_DEL: @@ -2771,7 +2837,8 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp, u8 rx_tx_add_flag = bnx2x_mcast_get_rx_tx_flag(o); int bin; - if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE)) + if ((cmd == BNX2X_MCAST_CMD_ADD) || (cmd == BNX2X_MCAST_CMD_RESTORE) || + (cmd == BNX2X_MCAST_CMD_SET_ADD)) rx_tx_add_flag |= ETH_MULTICAST_RULES_CMD_IS_ADD; data->rules[idx].cmd_general_data |= rx_tx_add_flag; @@ -2797,6 +2864,16 @@ static void bnx2x_mcast_set_one_rule_e2(struct bnx2x *bp, bin = cfg_data->bin; break; + case BNX2X_MCAST_CMD_SET_ADD: + bin = cfg_data->bin; + BIT_VEC64_SET_BIT(o->registry.aprox_match.vec, bin); + break; + + case BNX2X_MCAST_CMD_SET_DEL: + bin = cfg_data->bin; + BIT_VEC64_CLEAR_BIT(o->registry.aprox_match.vec, bin); + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd); return; @@ -2932,6 +3009,110 @@ static inline void bnx2x_mcast_hdl_pending_restore_e2(struct bnx2x *bp, cmd_pos->data.next_bin++; } +static void +bnx2x_mcast_hdl_pending_set_e2_convert(struct bnx2x *bp, + struct bnx2x_mcast_obj *o, + struct bnx2x_pending_mcast_cmd *cmd_pos) +{ + u64 cur[BNX2X_MCAST_VEC_SZ], req[BNX2X_MCAST_VEC_SZ]; + struct bnx2x_mcast_mac_elem *pmac_pos, *pmac_pos_n; + struct bnx2x_mcast_bin_elem *p_item; + struct bnx2x_mcast_elem_group *elem_group; + int cnt = 0, mac_cnt = 0, offset = 0, i; + + memset(req, 0, sizeof(u64) * BNX2X_MCAST_VEC_SZ); + memcpy(cur, o->registry.aprox_match.vec, + sizeof(u64) * BNX2X_MCAST_VEC_SZ); + + /* Fill `current' with the required set of bins to configure */ + list_for_each_entry_safe(pmac_pos, pmac_pos_n, &cmd_pos->data.macs_head, + link) { + int bin = bnx2x_mcast_bin_from_mac(pmac_pos->mac); + + DP(BNX2X_MSG_SP, "Set contains %pM mcast MAC\n", + pmac_pos->mac); + + BIT_VEC64_SET_BIT(req, bin); + list_del(&pmac_pos->link); + mac_cnt++; + } + + /* We no longer have use for the MACs; Need to re-use memory for + * a list that will be used to configure bins. + */ + cmd_pos->set_convert = true; + INIT_LIST_HEAD(&cmd_pos->data.macs_head); + elem_group = list_first_entry(&cmd_pos->group_head, + struct bnx2x_mcast_elem_group, + mcast_group_link); + for (i = 0; i < BNX2X_MCAST_BINS_NUM; i++) { + bool b_current = !!BIT_VEC64_TEST_BIT(cur, i); + bool b_required = !!BIT_VEC64_TEST_BIT(req, i); + + if (b_current == b_required) + continue; + + p_item = &elem_group->mcast_elems[offset].bin_elem; + p_item->bin = i; + p_item->type = b_required ? BNX2X_MCAST_CMD_SET_ADD + : BNX2X_MCAST_CMD_SET_DEL; + list_add_tail(&p_item->link , &cmd_pos->data.macs_head); + cnt++; + offset++; + if (offset == MCAST_MAC_ELEMS_PER_PG) { + offset = 0; + elem_group = list_next_entry(elem_group, + mcast_group_link); + } + } + + /* We now definitely know how many commands are hiding here. + * Also need to correct the disruption we've added to guarantee this + * would be enqueued. + */ + o->total_pending_num -= (o->max_cmd_len + mac_cnt); + o->total_pending_num += cnt; + + DP(BNX2X_MSG_SP, "o->total_pending_num=%d\n", o->total_pending_num); +} + +static void +bnx2x_mcast_hdl_pending_set_e2(struct bnx2x *bp, + struct bnx2x_mcast_obj *o, + struct bnx2x_pending_mcast_cmd *cmd_pos, + int *cnt) +{ + union bnx2x_mcast_config_data cfg_data = {NULL}; + struct bnx2x_mcast_bin_elem *p_item, *p_item_n; + + /* This is actually a 2-part scheme - it starts by converting the MACs + * into a list of bins to be added/removed, and correcting the numbers + * on the object. this is now allowed, as we're now sure that all + * previous configured requests have already applied. + * The second part is actually adding rules for the newly introduced + * entries [like all the rest of the hdl_pending functions]. + */ + if (!cmd_pos->set_convert) + bnx2x_mcast_hdl_pending_set_e2_convert(bp, o, cmd_pos); + + list_for_each_entry_safe(p_item, p_item_n, &cmd_pos->data.macs_head, + link) { + cfg_data.bin = (u8)p_item->bin; + o->set_one_rule(bp, o, *cnt, &cfg_data, p_item->type); + (*cnt)++; + + list_del(&p_item->link); + + /* Break if we reached the maximum number of rules. */ + if (*cnt >= o->max_cmd_len) + break; + } + + /* if no more MACs to configure - we are done */ + if (list_empty(&cmd_pos->data.macs_head)) + cmd_pos->done = true; +} + static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p) { @@ -2955,6 +3136,10 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, &cnt); break; + case BNX2X_MCAST_CMD_SET: + bnx2x_mcast_hdl_pending_set_e2(bp, o, cmd_pos, &cnt); + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd_pos->type); return -EINVAL; @@ -2965,6 +3150,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e2(struct bnx2x *bp, */ if (cmd_pos->done) { list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); } @@ -3095,6 +3281,19 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp, o->set_registry_size(o, reg_sz + p->mcast_list_len); break; + case BNX2X_MCAST_CMD_SET: + /* We can only learn how many commands would actually be used + * when this is being configured. So for now, simply guarantee + * the command will be enqueued [to refrain from adding logic + * that handles this and THEN learns it needs several ramrods]. + * Just like for ADD/Cont, the mcast_list_len might be an over + * estimation; or even more so, since we don't take into + * account the possibility of removal of existing bins. + */ + o->set_registry_size(o, reg_sz + p->mcast_list_len); + o->total_pending_num += o->max_cmd_len; + break; + default: BNX2X_ERR("Unknown command: %d\n", cmd); return -EINVAL; @@ -3108,12 +3307,16 @@ static int bnx2x_mcast_validate_e2(struct bnx2x *bp, static void bnx2x_mcast_revert_e2(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins) + int old_num_bins, + enum bnx2x_mcast_cmd cmd) { struct bnx2x_mcast_obj *o = p->mcast_obj; o->set_registry_size(o, old_num_bins); o->total_pending_num -= p->mcast_list_len; + + if (cmd == BNX2X_MCAST_CMD_SET) + o->total_pending_num -= o->max_cmd_len; } /** @@ -3223,9 +3426,11 @@ static int bnx2x_mcast_setup_e2(struct bnx2x *bp, bnx2x_mcast_refresh_registry_e2(bp, o); /* If CLEAR_ONLY was requested - don't send a ramrod and clear - * RAMROD_PENDING status immediately. + * RAMROD_PENDING status immediately. due to the SET option, it's also + * possible that after evaluating the differences there's no need for + * a ramrod. In that case, we can skip it as well. */ - if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags)) { + if (test_bit(RAMROD_DRV_CLR_ONLY, &p->ramrod_flags) || !cnt) { raw->clear_pending(raw); return 0; } else { @@ -3253,6 +3458,11 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, enum bnx2x_mcast_cmd cmd) { + if (cmd == BNX2X_MCAST_CMD_SET) { + BNX2X_ERR("Can't use `set' command on e1h!\n"); + return -EINVAL; + } + /* Mark, that there is a work to do */ if ((cmd == BNX2X_MCAST_CMD_DEL) || (cmd == BNX2X_MCAST_CMD_RESTORE)) p->mcast_list_len = 1; @@ -3262,7 +3472,8 @@ static int bnx2x_mcast_validate_e1h(struct bnx2x *bp, static void bnx2x_mcast_revert_e1h(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins) + int old_num_bins, + enum bnx2x_mcast_cmd cmd) { /* Do nothing */ } @@ -3372,6 +3583,11 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp, struct bnx2x_mcast_obj *o = p->mcast_obj; int reg_sz = o->get_registry_size(o); + if (cmd == BNX2X_MCAST_CMD_SET) { + BNX2X_ERR("Can't use `set' command on e1!\n"); + return -EINVAL; + } + switch (cmd) { /* DEL command deletes all currently configured MACs */ case BNX2X_MCAST_CMD_DEL: @@ -3422,7 +3638,8 @@ static int bnx2x_mcast_validate_e1(struct bnx2x *bp, static void bnx2x_mcast_revert_e1(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_macs) + int old_num_macs, + enum bnx2x_mcast_cmd cmd) { struct bnx2x_mcast_obj *o = p->mcast_obj; @@ -3572,6 +3789,7 @@ static inline int bnx2x_mcast_handle_pending_cmds_e1( } list_del(&cmd_pos->link); + bnx2x_free_groups(&cmd_pos->group_head); kfree(cmd_pos); return cnt; @@ -3816,7 +4034,7 @@ error_exit2: r->clear_pending(r); error_exit1: - o->revert(bp, p, old_reg_size); + o->revert(bp, p, old_reg_size, cmd); return rc; } diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 4048fc594cce..0bf2fd470819 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -536,6 +536,15 @@ enum bnx2x_mcast_cmd { BNX2X_MCAST_CMD_CONT, BNX2X_MCAST_CMD_DEL, BNX2X_MCAST_CMD_RESTORE, + + /* Following this, multicast configuration should equal to approx + * the set of MACs provided [i.e., remove all else]. + * The two sub-commands are used internally to decide whether a given + * bin is to be added or removed + */ + BNX2X_MCAST_CMD_SET, + BNX2X_MCAST_CMD_SET_ADD, + BNX2X_MCAST_CMD_SET_DEL, }; struct bnx2x_mcast_obj { @@ -635,7 +644,8 @@ struct bnx2x_mcast_obj { */ void (*revert)(struct bnx2x *bp, struct bnx2x_mcast_ramrod_params *p, - int old_num_bins); + int old_num_bins, + enum bnx2x_mcast_cmd cmd); int (*get_registry_size)(struct bnx2x_mcast_obj *o); void (*set_registry_size)(struct bnx2x_mcast_obj *o, int n); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c index 632daff117d3..3f77d0863543 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c @@ -573,17 +573,6 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, } } - /* clear existing mcasts */ - mcast.mcast_list_len = vf->mcast_list_len; - vf->mcast_list_len = mc_num; - rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL); - if (rc) { - BNX2X_ERR("Failed to remove multicasts\n"); - kfree(mc); - return rc; - } - - /* update mcast list on the ramrod params */ if (mc_num) { INIT_LIST_HEAD(&mcast.mcast_list); for (i = 0; i < mc_num; i++) { @@ -594,12 +583,18 @@ int bnx2x_vf_mcast(struct bnx2x *bp, struct bnx2x_virtf *vf, /* add new mcasts */ mcast.mcast_list_len = mc_num; - rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_ADD); + rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_SET); if (rc) - BNX2X_ERR("Faled to add multicasts\n"); - kfree(mc); + BNX2X_ERR("Faled to set multicasts\n"); + } else { + /* clear existing mcasts */ + rc = bnx2x_config_mcast(bp, &mcast, BNX2X_MCAST_CMD_DEL); + if (rc) + BNX2X_ERR("Failed to remove multicasts\n"); } + kfree(mc); + return rc; } @@ -1583,7 +1578,6 @@ int bnx2x_iov_nic_init(struct bnx2x *bp) * It needs to be initialized here so that it can be safely * handled by a subsequent FLR flow. */ - vf->mcast_list_len = 0; bnx2x_init_mcast_obj(bp, &vf->mcast_obj, 0xFF, 0xFF, 0xFF, 0xFF, bnx2x_vf_sp(bp, vf, mcast_rdata), @@ -2527,7 +2521,8 @@ void bnx2x_pf_set_vfs_vlan(struct bnx2x *bp) for_each_vf(bp, vfidx) { bulletin = BP_VF_BULLETIN(bp, vfidx); if (bulletin->valid_bitmap & (1 << VLAN_VALID)) - bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0); + bnx2x_set_vf_vlan(bp->dev, vfidx, bulletin->vlan, 0, + htons(ETH_P_8021Q)); } } @@ -2787,7 +2782,8 @@ static int bnx2x_set_vf_vlan_filter(struct bnx2x *bp, struct bnx2x_virtf *vf, return 0; } -int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) +int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos, + __be16 vlan_proto) { struct pf_vf_bulletin_content *bulletin = NULL; struct bnx2x *bp = netdev_priv(dev); @@ -2802,6 +2798,9 @@ int bnx2x_set_vf_vlan(struct net_device *dev, int vfidx, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP(BNX2X_MSG_IOV, "configuring VF %d with VLAN %d qos %d\n", vfidx, vlan, 0); diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h index 670a581ffabc..7a6d406f4c11 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.h @@ -195,7 +195,6 @@ struct bnx2x_virtf { int leading_rss; /* MCAST object */ - int mcast_list_len; struct bnx2x_mcast_obj mcast_obj; /* RSS configuration object */ diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 228c964e709a..a9f9f3738022 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -93,50 +94,49 @@ enum board_idx { BCM57404_NPAR, BCM57406_NPAR, BCM57407_SFP, + BCM57407_NPAR, BCM57414_NPAR, BCM57416_NPAR, - BCM57304_VF, - BCM57404_VF, - BCM57414_VF, - BCM57314_VF, + NETXTREME_E_VF, + NETXTREME_C_VF, }; /* indexed by enum above */ static const struct { char *name; } board_info[] = { - { "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57301 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57302 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57304 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, { "Broadcom BCM57417 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" }, - { "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" }, - { "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57404 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57406 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM58700 Nitro 1Gb/2.5Gb/10Gb Ethernet" }, + { "Broadcom BCM57311 NetXtreme-C 10Gb Ethernet" }, + { "Broadcom BCM57312 NetXtreme-C 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57402 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57404 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57406 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57402 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57412 NetXtreme-E Dual-port 10Gb Ethernet" }, - { "Broadcom BCM57414 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10GBase-T Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10GBase-T Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57412 NetXtreme-E 10Gb Ethernet" }, + { "Broadcom BCM57414 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10GBase-T Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10GBase-T Ethernet" }, { "Broadcom BCM57412 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57314 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, - { "Broadcom BCM57417 NetXtreme-E Dual-port 10Gb/25Gb Ethernet" }, - { "Broadcom BCM57416 NetXtreme-E Dual-port 10Gb Ethernet" }, + { "Broadcom BCM57314 NetXtreme-C 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM57417 NetXtreme-E 10Gb/25Gb Ethernet" }, + { "Broadcom BCM57416 NetXtreme-E 10Gb Ethernet" }, { "Broadcom BCM57404 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57406 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57407 NetXtreme-E Dual-port 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E 25Gb Ethernet" }, + { "Broadcom BCM57407 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57414 NetXtreme-E Ethernet Partition" }, { "Broadcom BCM57416 NetXtreme-E Ethernet Partition" }, - { "Broadcom BCM57304 NetXtreme-C Ethernet Virtual Function" }, - { "Broadcom BCM57404 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57414 NetXtreme-E Ethernet Virtual Function" }, - { "Broadcom BCM57314 NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-E Ethernet Virtual Function" }, + { "Broadcom NetXtreme-C Ethernet Virtual Function" }, }; static const struct pci_device_id bnxt_pci_tbl[] = { + { PCI_VDEVICE(BROADCOM, 0x16c0), .driver_data = BCM57417_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 }, { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 }, { PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 }, @@ -160,13 +160,19 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16e7), .driver_data = BCM57404_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e8), .driver_data = BCM57406_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16e9), .driver_data = BCM57407_SFP }, + { PCI_VDEVICE(BROADCOM, 0x16ea), .driver_data = BCM57407_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16eb), .driver_data = BCM57412_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ec), .driver_data = BCM57414_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ed), .driver_data = BCM57414_NPAR }, { PCI_VDEVICE(BROADCOM, 0x16ee), .driver_data = BCM57416_NPAR }, + { PCI_VDEVICE(BROADCOM, 0x16ef), .driver_data = BCM57416_NPAR }, #ifdef CONFIG_BNXT_SRIOV - { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = BCM57304_VF }, - { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = BCM57404_VF }, - { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = BCM57414_VF }, - { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = BCM57314_VF }, + { PCI_VDEVICE(BROADCOM, 0x16c1), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16cb), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16d3), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16dc), .driver_data = NETXTREME_E_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e1), .driver_data = NETXTREME_C_VF }, + { PCI_VDEVICE(BROADCOM, 0x16e5), .driver_data = NETXTREME_C_VF }, #endif { 0 } }; @@ -189,8 +195,7 @@ static const u16 bnxt_async_events_arr[] = { static bool bnxt_vf_pciid(enum board_idx idx) { - return (idx == BCM57304_VF || idx == BCM57404_VF || - idx == BCM57314_VF || idx == BCM57414_VF); + return (idx == NETXTREME_C_VF || idx == NETXTREME_E_VF); } #define DB_CP_REARM_FLAGS (DB_KEY_CP | DB_IDX_VALID) @@ -3419,10 +3424,10 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1); if (set_rss) { - vnic->hash_type = BNXT_RSS_HASH_TYPE_FLAG_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 | - BNXT_RSS_HASH_TYPE_FLAG_IPV6 | - BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6; + vnic->hash_type = VNIC_RSS_CFG_REQ_HASH_TYPE_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV4 | + VNIC_RSS_CFG_REQ_HASH_TYPE_IPV6 | + VNIC_RSS_CFG_REQ_HASH_TYPE_TCP_IPV6; req.hash_type = cpu_to_le32(vnic->hash_type); @@ -4156,6 +4161,11 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) if (rc) goto hwrm_func_qcaps_exit; + bp->tx_push_thresh = 0; + if (resp->flags & + cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) + bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; + if (BNXT_PF(bp)) { struct bnxt_pf_info *pf = &bp->pf; @@ -4187,12 +4197,6 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) struct bnxt_vf_info *vf = &bp->vf; vf->fw_fid = le16_to_cpu(resp->fid); - memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); - if (is_valid_ether_addr(vf->mac_addr)) - /* overwrite netdev dev_adr with admin VF MAC */ - memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); - else - random_ether_addr(bp->dev->dev_addr); vf->max_rsscos_ctxs = le16_to_cpu(resp->max_rsscos_ctx); vf->max_cp_rings = le16_to_cpu(resp->max_cmpl_rings); @@ -4204,14 +4208,21 @@ int bnxt_hwrm_func_qcaps(struct bnxt *bp) vf->max_l2_ctxs = le16_to_cpu(resp->max_l2_ctxs); vf->max_vnics = le16_to_cpu(resp->max_vnics); vf->max_stat_ctxs = le16_to_cpu(resp->max_stat_ctx); + + memcpy(vf->mac_addr, resp->mac_address, ETH_ALEN); + mutex_unlock(&bp->hwrm_cmd_lock); + + if (is_valid_ether_addr(vf->mac_addr)) { + /* overwrite netdev dev_adr with admin VF MAC */ + memcpy(bp->dev->dev_addr, vf->mac_addr, ETH_ALEN); + } else { + random_ether_addr(bp->dev->dev_addr); + rc = bnxt_approve_mac(bp, bp->dev->dev_addr); + } + return rc; #endif } - bp->tx_push_thresh = 0; - if (resp->flags & - cpu_to_le32(FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED)) - bp->tx_push_thresh = BNXT_TX_PUSH_THRESH; - hwrm_func_qcaps_exit: mutex_unlock(&bp->hwrm_cmd_lock); return rc; @@ -4249,6 +4260,9 @@ static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp) if (bp->max_tc > BNXT_MAX_QUEUE) bp->max_tc = BNXT_MAX_QUEUE; + if (resp->queue_cfg_info & QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG) + bp->max_tc = 1; + qptr = &resp->queue_id0; for (i = 0; i < bp->max_tc; i++) { bp->q_info[i].queue_id = *qptr++; @@ -4307,6 +4321,31 @@ hwrm_ver_get_exit: return rc; } +int bnxt_hwrm_fw_set_time(struct bnxt *bp) +{ +#if IS_ENABLED(CONFIG_RTC_LIB) + struct hwrm_fw_set_time_input req = {0}; + struct rtc_time tm; + struct timeval tv; + + if (bp->hwrm_spec_code < 0x10400) + return -EOPNOTSUPP; + + do_gettimeofday(&tv); + rtc_time_to_tm(tv.tv_sec, &tm); + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_SET_TIME, -1, -1); + req.year = cpu_to_le16(1900 + tm.tm_year); + req.month = 1 + tm.tm_mon; + req.day = tm.tm_mday; + req.hour = tm.tm_hour; + req.minute = tm.tm_min; + req.second = tm.tm_sec; + return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); +#else + return -EOPNOTSUPP; +#endif +} + static int bnxt_hwrm_port_qstats(struct bnxt *bp) { int rc; @@ -6804,6 +6843,8 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) if (rc) goto init_err; + bnxt_hwrm_fw_set_time(bp); + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 23e04a6142fb..51b164a0e844 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -11,10 +11,10 @@ #define BNXT_H #define DRV_MODULE_NAME "bnxt_en" -#define DRV_MODULE_VERSION "1.3.0" +#define DRV_MODULE_VERSION "1.5.0" #define DRV_VER_MAJ 1 -#define DRV_VER_MIN 3 +#define DRV_VER_MIN 5 #define DRV_VER_UPD 0 struct tx_bd { @@ -106,11 +106,11 @@ struct tx_cmp { #define CMP_TYPE_REMOTE_DRIVER_REQ 34 #define CMP_TYPE_REMOTE_DRIVER_RESP 36 #define CMP_TYPE_ERROR_STATUS 48 - #define CMPL_BASE_TYPE_STAT_EJECT (0x1aUL << 0) - #define CMPL_BASE_TYPE_HWRM_DONE (0x20UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_REQ (0x22UL << 0) - #define CMPL_BASE_TYPE_HWRM_FWD_RESP (0x24UL << 0) - #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define CMPL_BASE_TYPE_STAT_EJECT 0x1aUL + #define CMPL_BASE_TYPE_HWRM_DONE 0x20UL + #define CMPL_BASE_TYPE_HWRM_FWD_REQ 0x22UL + #define CMPL_BASE_TYPE_HWRM_FWD_RESP 0x24UL + #define CMPL_BASE_TYPE_HWRM_ASYNC_EVENT 0x2eUL #define TX_CMP_FLAGS_ERROR (1 << 6) #define TX_CMP_FLAGS_PUSH (1 << 7) @@ -389,11 +389,6 @@ struct rx_tpa_end_cmp_ext { #define INVALID_HW_RING_ID ((u16)-1) -#define BNXT_RSS_HASH_TYPE_FLAG_IPV4 0x01 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV4 0x02 -#define BNXT_RSS_HASH_TYPE_FLAG_IPV6 0x04 -#define BNXT_RSS_HASH_TYPE_FLAG_TCP_IPV6 0x08 - /* The hardware supports certain page sizes. Use the supported page sizes * to allocate the rings. */ @@ -418,7 +413,7 @@ struct rx_tpa_end_cmp_ext { #define BNXT_RX_PAGE_SIZE (1 << BNXT_RX_PAGE_SHIFT) -#define BNXT_MIN_PKT_SIZE 45 +#define BNXT_MIN_PKT_SIZE 52 #define BNXT_NUM_TESTS(bp) 0 @@ -1225,6 +1220,7 @@ int bnxt_hwrm_set_coal(struct bnxt *); int bnxt_hwrm_func_qcaps(struct bnxt *); int bnxt_hwrm_set_pause(struct bnxt *); int bnxt_hwrm_set_link_setting(struct bnxt *, bool, bool); +int bnxt_hwrm_fw_set_time(struct bnxt *); int bnxt_open_nic(struct bnxt *, bool, bool); int bnxt_close_nic(struct bnxt *, bool, bool); int bnxt_get_max_rings(struct bnxt *, int *, int *, bool); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index b83e17403d6c..a7e04ff4eaed 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -21,6 +21,8 @@ #include "bnxt_nvm_defs.h" /* NVRAM content constant and structure defs */ #include "bnxt_fw_hdr.h" /* Firmware hdr constant and structure defs */ #define FLASH_NVRAM_TIMEOUT ((HWRM_CMD_TIMEOUT) * 100) +#define FLASH_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) +#define INSTALL_PACKAGE_TIMEOUT ((HWRM_CMD_TIMEOUT) * 200) static char *bnxt_get_pkgver(struct net_device *dev, char *buf, size_t buflen); @@ -346,7 +348,7 @@ static void bnxt_get_channels(struct net_device *dev, int max_rx_rings, max_tx_rings, tcs; bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, true); - channel->max_combined = max_rx_rings; + channel->max_combined = max_t(int, max_rx_rings, max_tx_rings); if (bnxt_get_max_rings(bp, &max_rx_rings, &max_tx_rings, false)) { max_rx_rings = 0; @@ -404,8 +406,8 @@ static int bnxt_set_channels(struct net_device *dev, if (tcs > 1) max_tx_rings /= tcs; - if (sh && (channel->combined_count > max_rx_rings || - channel->combined_count > max_tx_rings)) + if (sh && + channel->combined_count > max_t(int, max_rx_rings, max_tx_rings)) return -ENOMEM; if (!sh && (channel->rx_count > max_rx_rings || @@ -428,8 +430,10 @@ static int bnxt_set_channels(struct net_device *dev, if (sh) { bp->flags |= BNXT_FLAG_SHARED_RINGS; - bp->rx_nr_rings = channel->combined_count; - bp->tx_nr_rings_per_tc = channel->combined_count; + bp->rx_nr_rings = min_t(int, channel->combined_count, + max_rx_rings); + bp->tx_nr_rings_per_tc = min_t(int, channel->combined_count, + max_tx_rings); } else { bp->flags &= ~BNXT_FLAG_SHARED_RINGS; bp->rx_nr_rings = channel->rx_count; @@ -1028,6 +1032,10 @@ static u32 bnxt_get_link(struct net_device *dev) return bp->link_info.link_up; } +static int bnxt_find_nvram_item(struct net_device *dev, u16 type, u16 ordinal, + u16 ext, u16 *index, u32 *item_length, + u32 *data_length); + static int bnxt_flash_nvram(struct net_device *dev, u16 dir_type, u16 dir_ordinal, @@ -1179,7 +1187,6 @@ static int bnxt_flash_firmware(struct net_device *dev, (unsigned long)calculated_crc); return -EINVAL; } - /* TODO: Validate digital signature (RSA-encrypted SHA-256 hash) here */ rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw_data, fw_size); if (rc == 0) /* Firmware update successful */ @@ -1188,6 +1195,57 @@ static int bnxt_flash_firmware(struct net_device *dev, return rc; } +static int bnxt_flash_microcode(struct net_device *dev, + u16 dir_type, + const u8 *fw_data, + size_t fw_size) +{ + struct bnxt_ucode_trailer *trailer; + u32 calculated_crc; + u32 stored_crc; + int rc = 0; + + if (fw_size < sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode file size: %u\n", + (unsigned int)fw_size); + return -EINVAL; + } + trailer = (struct bnxt_ucode_trailer *)(fw_data + (fw_size - + sizeof(*trailer))); + if (trailer->sig != cpu_to_le32(BNXT_UCODE_TRAILER_SIGNATURE)) { + netdev_err(dev, "Invalid microcode trailer signature: %08X\n", + le32_to_cpu(trailer->sig)); + return -EINVAL; + } + if (le16_to_cpu(trailer->dir_type) != dir_type) { + netdev_err(dev, "Expected microcode type: %d, read: %d\n", + dir_type, le16_to_cpu(trailer->dir_type)); + return -EINVAL; + } + if (le16_to_cpu(trailer->trailer_length) < + sizeof(struct bnxt_ucode_trailer)) { + netdev_err(dev, "Invalid microcode trailer length: %d\n", + le16_to_cpu(trailer->trailer_length)); + return -EINVAL; + } + + /* Confirm the CRC32 checksum of the file: */ + stored_crc = le32_to_cpu(*(__le32 *)(fw_data + fw_size - + sizeof(stored_crc))); + calculated_crc = ~crc32(~0, fw_data, fw_size - sizeof(stored_crc)); + if (calculated_crc != stored_crc) { + netdev_err(dev, + "CRC32 (%08lX) does not match calculated: %08lX\n", + (unsigned long)stored_crc, + (unsigned long)calculated_crc); + return -EINVAL; + } + rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, + 0, 0, fw_data, fw_size); + + return rc; +} + static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) { switch (dir_type) { @@ -1206,7 +1264,7 @@ static bool bnxt_dir_type_is_ape_bin_format(u16 dir_type) return false; } -static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) +static bool bnxt_dir_type_is_other_exec_format(u16 dir_type) { switch (dir_type) { case BNX_DIR_TYPE_AVS: @@ -1227,7 +1285,7 @@ static bool bnxt_dir_type_is_unprotected_exec_format(u16 dir_type) static bool bnxt_dir_type_is_executable(u16 dir_type) { return bnxt_dir_type_is_ape_bin_format(dir_type) || - bnxt_dir_type_is_unprotected_exec_format(dir_type); + bnxt_dir_type_is_other_exec_format(dir_type); } static int bnxt_flash_firmware_from_file(struct net_device *dev, @@ -1237,10 +1295,6 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, const struct firmware *fw; int rc; - if (dir_type != BNX_DIR_TYPE_UPDATE && - bnxt_dir_type_is_executable(dir_type) == false) - return -EINVAL; - rc = request_firmware(&fw, filename, &dev->dev); if (rc != 0) { netdev_err(dev, "Error %d requesting firmware file: %s\n", @@ -1249,6 +1303,8 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } if (bnxt_dir_type_is_ape_bin_format(dir_type) == true) rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size); + else if (bnxt_dir_type_is_other_exec_format(dir_type) == true) + rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size); else rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST, 0, 0, fw->data, fw->size); @@ -1257,10 +1313,83 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev, } static int bnxt_flash_package_from_file(struct net_device *dev, - char *filename) + char *filename, u32 install_type) { - netdev_err(dev, "packages are not yet supported\n"); - return -EINVAL; + struct bnxt *bp = netdev_priv(dev); + struct hwrm_nvm_install_update_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_nvm_install_update_input install = {0}; + const struct firmware *fw; + u32 item_len; + u16 index; + int rc; + + bnxt_hwrm_fw_set_time(bp); + + if (bnxt_find_nvram_item(dev, BNX_DIR_TYPE_UPDATE, + BNX_DIR_ORDINAL_FIRST, BNX_DIR_EXT_NONE, + &index, &item_len, NULL) != 0) { + netdev_err(dev, "PKG update area not created in nvram\n"); + return -ENOBUFS; + } + + rc = request_firmware(&fw, filename, &dev->dev); + if (rc != 0) { + netdev_err(dev, "PKG error %d requesting file: %s\n", + rc, filename); + return rc; + } + + if (fw->size > item_len) { + netdev_err(dev, "PKG insufficient update area in nvram: %lu", + (unsigned long)fw->size); + rc = -EFBIG; + } else { + dma_addr_t dma_handle; + u8 *kmem; + struct hwrm_nvm_modify_input modify = {0}; + + bnxt_hwrm_cmd_hdr_init(bp, &modify, HWRM_NVM_MODIFY, -1, -1); + + modify.dir_idx = cpu_to_le16(index); + modify.len = cpu_to_le32(fw->size); + + kmem = dma_alloc_coherent(&bp->pdev->dev, fw->size, + &dma_handle, GFP_KERNEL); + if (!kmem) { + netdev_err(dev, + "dma_alloc_coherent failure, length = %u\n", + (unsigned int)fw->size); + rc = -ENOMEM; + } else { + memcpy(kmem, fw->data, fw->size); + modify.host_src_addr = cpu_to_le64(dma_handle); + + rc = hwrm_send_message(bp, &modify, sizeof(modify), + FLASH_PACKAGE_TIMEOUT); + dma_free_coherent(&bp->pdev->dev, fw->size, kmem, + dma_handle); + } + } + release_firmware(fw); + if (rc) + return rc; + + if ((install_type & 0xffff) == 0) + install_type >>= 16; + bnxt_hwrm_cmd_hdr_init(bp, &install, HWRM_NVM_INSTALL_UPDATE, -1, -1); + install.install_type = cpu_to_le32(install_type); + + rc = hwrm_send_message(bp, &install, sizeof(install), + INSTALL_PACKAGE_TIMEOUT); + if (rc) + return -EOPNOTSUPP; + + if (resp->result) { + netdev_err(dev, "PKG install error = %d, problem_item = %d\n", + (s8)resp->result, (int)resp->problem_item); + return -ENOPKG; + } + return 0; } static int bnxt_flash_device(struct net_device *dev, @@ -1271,8 +1400,10 @@ static int bnxt_flash_device(struct net_device *dev, return -EINVAL; } - if (flash->region == ETHTOOL_FLASH_ALL_REGIONS) - return bnxt_flash_package_from_file(dev, flash->data); + if (flash->region == ETHTOOL_FLASH_ALL_REGIONS || + flash->region > 0xffff) + return bnxt_flash_package_from_file(dev, flash->data, + flash->region); return bnxt_flash_firmware_from_file(dev, flash->region, flash->data); } @@ -1516,7 +1647,7 @@ static int bnxt_set_eeprom(struct net_device *dev, /* Create or re-write an NVM item: */ if (bnxt_dir_type_is_executable(type) == true) - return -EINVAL; + return -EOPNOTSUPP; ext = eeprom->magic & 0xffff; ordinal = eeprom->offset >> 16; attr = eeprom->offset & 0xffff; @@ -1718,6 +1849,25 @@ static int bnxt_get_module_eeprom(struct net_device *dev, return rc; } +static int bnxt_nway_reset(struct net_device *dev) +{ + int rc = 0; + + struct bnxt *bp = netdev_priv(dev); + struct bnxt_link_info *link_info = &bp->link_info; + + if (!BNXT_SINGLE_PF(bp)) + return -EOPNOTSUPP; + + if (!(link_info->autoneg & BNXT_AUTONEG_SPEED)) + return -EINVAL; + + if (netif_running(dev)) + rc = bnxt_hwrm_set_link_setting(bp, true, false); + + return rc; +} + const struct ethtool_ops bnxt_ethtool_ops = { .get_link_ksettings = bnxt_get_link_ksettings, .set_link_ksettings = bnxt_set_link_ksettings, @@ -1750,4 +1900,5 @@ const struct ethtool_ops bnxt_ethtool_ops = { .set_eee = bnxt_set_eee, .get_module_info = bnxt_get_module_info, .get_module_eeprom = bnxt_get_module_eeprom, + .nway_reset = bnxt_nway_reset }; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h index 82bf44ab811b..cad30ddc6936 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_fw_hdr.h @@ -11,6 +11,7 @@ #define __BNXT_FW_HDR_H__ #define BNXT_FIRMWARE_BIN_SIGNATURE 0x1a4d4342 /* "BCM"+0x1a */ +#define BNXT_UCODE_TRAILER_SIGNATURE 0x726c7254 /* "Trlr" */ enum SUPPORTED_FAMILY { DEVICE_5702_3_4_FAMILY, /* 0 - Denali, Vinson, K2 */ @@ -85,7 +86,7 @@ enum SUPPORTED_MEDIA { struct bnxt_fw_header { __le32 signature; /* constains the constant value of - * BNXT_Firmware_Bin_Signatures + * BNXT_FIRMWARE_BIN_SIGNATURE */ u8 flags; /* reserved for ChiMP use */ u8 code_type; /* enum SUPPORTED_CODE */ @@ -102,4 +103,17 @@ struct bnxt_fw_header { u8 major_ver; }; +/* Microcode and pre-boot software/firmware trailer: */ +struct bnxt_ucode_trailer { + u8 rsa_sig[256]; + __le16 flags; + u8 version_format; + u8 version_length; + u8 version[16]; + __le16 dir_type; + __le16 trailer_length; + __le32 sig; /* BNXT_UCODE_TRAILER_SIGNATURE */ + __le32 chksum; /* CRC-32 */ +}; + #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h index 517567f6d651..04a96cc3498a 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h @@ -39,7 +39,7 @@ struct eject_cmpl { __le16 type; #define EJECT_CMPL_TYPE_MASK 0x3fUL #define EJECT_CMPL_TYPE_SFT 0 - #define EJECT_CMPL_TYPE_STAT_EJECT (0x1aUL << 0) + #define EJECT_CMPL_TYPE_STAT_EJECT 0x1aUL __le16 len; __le32 opaque; __le32 v; @@ -52,7 +52,7 @@ struct hwrm_cmpl { __le16 type; #define HWRM_CMPL_TYPE_MASK 0x3fUL #define HWRM_CMPL_TYPE_SFT 0 - #define HWRM_CMPL_TYPE_HWRM_DONE (0x20UL << 0) + #define HWRM_CMPL_TYPE_HWRM_DONE 0x20UL __le16 sequence_id; __le32 unused_1; __le32 v; @@ -65,7 +65,7 @@ struct hwrm_fwd_req_cmpl { __le16 req_len_type; #define HWRM_FWD_REQ_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_REQ_CMPL_TYPE_SFT 0 - #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ (0x22UL << 0) + #define HWRM_FWD_REQ_CMPL_TYPE_HWRM_FWD_REQ 0x22UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_MASK 0xffc0UL #define HWRM_FWD_REQ_CMPL_REQ_LEN_SFT 6 __le16 source_id; @@ -81,7 +81,7 @@ struct hwrm_fwd_resp_cmpl { __le16 type; #define HWRM_FWD_RESP_CMPL_TYPE_MASK 0x3fUL #define HWRM_FWD_RESP_CMPL_TYPE_SFT 0 - #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP (0x24UL << 0) + #define HWRM_FWD_RESP_CMPL_TYPE_HWRM_FWD_RESP 0x24UL __le16 source_id; __le16 resp_len; __le16 unused_1; @@ -96,25 +96,26 @@ struct hwrm_async_event_cmpl { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE (0x7UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR (0x30UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_MTU_CHANGE 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CHANGE 0x2UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PORT_PHY_CFG_CHANGE 0x7UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_DRVR_LOAD 0x11UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_FUNC_FLR_PROC_CMPLT 0x12UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_UNLOAD 0x20UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_DRVR_LOAD 0x21UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_FLR 0x30UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_VF_CFG_CHANGE 0x33UL + #define HWRM_ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_V 0x1UL @@ -130,9 +131,9 @@ struct hwrm_async_event_cmpl_link_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE (0x0UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_EVENT_ID_LINK_STATUS_CHANGE 0x0UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_STATUS_CHANGE_V 0x1UL @@ -156,9 +157,9 @@ struct hwrm_async_event_cmpl_link_mtu_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE (0x1UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_EVENT_ID_LINK_MTU_CHANGE 0x1UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_MTU_CHANGE_V 0x1UL @@ -176,9 +177,9 @@ struct hwrm_async_event_cmpl_link_speed_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_ID_LINK_SPEED_CHANGE 0x2UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_V 0x1UL @@ -200,8 +201,7 @@ struct hwrm_async_event_cmpl_link_speed_change { #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_40GB (0x190UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_50GB (0x1f4UL << 1) #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB (0x3e8UL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB (0xffffUL << 1) - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_10MB + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_LAST HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_NEW_LINK_SPEED_100MBPS_100GB #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_MASK 0xffff0000UL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CHANGE_EVENT_DATA1_PORT_ID_SFT 16 }; @@ -211,9 +211,9 @@ struct hwrm_async_event_cmpl_dcb_config_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE (0x3UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_EVENT_ID_DCB_CONFIG_CHANGE 0x3UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_DCB_CONFIG_CHANGE_V 0x1UL @@ -231,9 +231,9 @@ struct hwrm_async_event_cmpl_port_conn_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED (0x4UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_EVENT_ID_PORT_CONN_NOT_ALLOWED 0x4UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PORT_CONN_NOT_ALLOWED_V 0x1UL @@ -258,9 +258,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_not_allowed { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED (0x5UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_EVENT_ID_LINK_SPEED_CFG_NOT_ALLOWED 0x5UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_NOT_ALLOWED_V 0x1UL @@ -278,9 +278,9 @@ struct hwrm_async_event_cmpl_link_speed_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE (0x6UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_EVENT_ID_LINK_SPEED_CFG_CHANGE 0x6UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_LINK_SPEED_CFG_CHANGE_V 0x1UL @@ -300,9 +300,9 @@ struct hwrm_async_event_cmpl_func_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD (0x10UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_EVENT_ID_FUNC_DRVR_UNLOAD 0x10UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_UNLOAD_V 0x1UL @@ -320,9 +320,9 @@ struct hwrm_async_event_cmpl_func_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD (0x11UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_EVENT_ID_FUNC_DRVR_LOAD 0x11UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_FUNC_DRVR_LOAD_V 0x1UL @@ -340,9 +340,9 @@ struct hwrm_async_event_cmpl_pf_drvr_unload { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD (0x20UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_EVENT_ID_PF_DRVR_UNLOAD 0x20UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_UNLOAD_V 0x1UL @@ -362,9 +362,9 @@ struct hwrm_async_event_cmpl_pf_drvr_load { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD (0x21UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_EVENT_ID_PF_DRVR_LOAD 0x21UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_DRVR_LOAD_V 0x1UL @@ -384,9 +384,9 @@ struct hwrm_async_event_cmpl_vf_flr { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR (0x30UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_EVENT_ID_VF_FLR 0x30UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_FLR_V 0x1UL @@ -404,9 +404,9 @@ struct hwrm_async_event_cmpl_vf_mac_addr_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE (0x31UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_EVENT_ID_VF_MAC_ADDR_CHANGE 0x31UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_MAC_ADDR_CHANGE_V 0x1UL @@ -424,9 +424,9 @@ struct hwrm_async_event_cmpl_pf_vf_comm_status_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE (0x32UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_EVENT_ID_PF_VF_COMM_STATUS_CHANGE 0x32UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_PF_VF_COMM_STATUS_CHANGE_V 0x1UL @@ -443,9 +443,9 @@ struct hwrm_async_event_cmpl_vf_cfg_change { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE (0x33UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_EVENT_ID_VF_CFG_CHANGE 0x33UL __le32 event_data2; u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_VF_CFG_CHANGE_V 0x1UL @@ -465,15 +465,15 @@ struct hwrm_async_event_cmpl_hwrm_error { __le16 type; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_MASK 0x3fUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT (0x2eUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_TYPE_HWRM_ASYNC_EVENT 0x2eUL __le16 event_id; - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR (0xffUL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_ID_HWRM_ERROR 0xffUL __le32 event_data2; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_MASK 0xffUL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_SFT 0 - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING (0x0UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL (0x1UL << 0) - #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL (0x2UL << 0) + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_WARNING 0x0UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_NONFATAL 0x1UL + #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL 0x2UL #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_LAST HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA2_SEVERITY_FATAL u8 opaque_v; #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_V 0x1UL @@ -485,12 +485,12 @@ struct hwrm_async_event_cmpl_hwrm_error { #define HWRM_ASYNC_EVENT_CMPL_HWRM_ERROR_EVENT_DATA1_TIMESTAMP 0x1UL }; -/* HW Resource Manager Specification 1.3.0 */ +/* HW Resource Manager Specification 1.5.1 */ #define HWRM_VERSION_MAJOR 1 -#define HWRM_VERSION_MINOR 3 -#define HWRM_VERSION_UPDATE 0 +#define HWRM_VERSION_MINOR 5 +#define HWRM_VERSION_UPDATE 1 -#define HWRM_VERSION_STR "1.3.0" +#define HWRM_VERSION_STR "1.5.1" /* * Following is the signature for HWRM message field that indicates not * applicable (All F's). Need to cast it the size of the field if needed. @@ -556,8 +556,8 @@ struct cmd_nums { #define HWRM_QUEUE_QPORTCFG (0x30UL) #define HWRM_QUEUE_QCFG (0x31UL) #define HWRM_QUEUE_CFG (0x32UL) - #define HWRM_QUEUE_BUFFERS_QCFG (0x33UL) - #define HWRM_QUEUE_BUFFERS_CFG (0x34UL) + #define RESERVED2 (0x33UL) + #define RESERVED3 (0x34UL) #define HWRM_QUEUE_PFCENABLE_QCFG (0x35UL) #define HWRM_QUEUE_PFCENABLE_CFG (0x36UL) #define HWRM_QUEUE_PRI2COS_QCFG (0x37UL) @@ -574,6 +574,7 @@ struct cmd_nums { #define HWRM_VNIC_RSS_QCFG (0x47UL) #define HWRM_VNIC_PLCMODES_CFG (0x48UL) #define HWRM_VNIC_PLCMODES_QCFG (0x49UL) + #define HWRM_VNIC_QCAPS (0x4aUL) #define HWRM_RING_ALLOC (0x50UL) #define HWRM_RING_FREE (0x51UL) #define HWRM_RING_CMPL_RING_QAGGINT_PARAMS (0x52UL) @@ -581,13 +582,15 @@ struct cmd_nums { #define HWRM_RING_RESET (0x5eUL) #define HWRM_RING_GRP_ALLOC (0x60UL) #define HWRM_RING_GRP_FREE (0x61UL) + #define RESERVED5 (0x64UL) + #define RESERVED6 (0x65UL) #define HWRM_VNIC_RSS_COS_LB_CTX_ALLOC (0x70UL) #define HWRM_VNIC_RSS_COS_LB_CTX_FREE (0x71UL) #define HWRM_CFA_L2_FILTER_ALLOC (0x90UL) #define HWRM_CFA_L2_FILTER_FREE (0x91UL) #define HWRM_CFA_L2_FILTER_CFG (0x92UL) #define HWRM_CFA_L2_SET_RX_MASK (0x93UL) - #define RESERVED3 (0x94UL) + #define RESERVED4 (0x94UL) #define HWRM_CFA_TUNNEL_FILTER_ALLOC (0x95UL) #define HWRM_CFA_TUNNEL_FILTER_FREE (0x96UL) #define HWRM_CFA_ENCAP_RECORD_ALLOC (0x97UL) @@ -607,6 +610,8 @@ struct cmd_nums { #define HWRM_STAT_CTX_CLR_STATS (0xb3UL) #define HWRM_FW_RESET (0xc0UL) #define HWRM_FW_QSTATUS (0xc1UL) + #define HWRM_FW_SET_TIME (0xc8UL) + #define HWRM_FW_GET_TIME (0xc9UL) #define HWRM_EXEC_FWD_RESP (0xd0UL) #define HWRM_REJECT_FWD_RESP (0xd1UL) #define HWRM_FWD_RESP (0xd2UL) @@ -615,11 +620,13 @@ struct cmd_nums { #define HWRM_WOL_FILTER_ALLOC (0xf0UL) #define HWRM_WOL_FILTER_FREE (0xf1UL) #define HWRM_WOL_FILTER_QCFG (0xf2UL) + #define HWRM_WOL_REASON_QCFG (0xf3UL) #define HWRM_DBG_READ_DIRECT (0xff10UL) #define HWRM_DBG_READ_INDIRECT (0xff11UL) #define HWRM_DBG_WRITE_DIRECT (0xff12UL) #define HWRM_DBG_WRITE_INDIRECT (0xff13UL) #define HWRM_DBG_DUMP (0xff14UL) + #define HWRM_NVM_INSTALL_UPDATE (0xfff3UL) #define HWRM_NVM_MODIFY (0xfff4UL) #define HWRM_NVM_VERIFY_UPDATE (0xfff5UL) #define HWRM_NVM_GET_DEV_INFO (0xfff6UL) @@ -824,7 +831,9 @@ struct hwrm_ver_get_output { u8 netctrl_fw_min; u8 netctrl_fw_bld; u8 netctrl_fw_rsvd; - __le32 reserved1; + __le32 dev_caps_cfg; + #define VER_GET_RESP_DEV_CAPS_CFG_SECURE_FW_UPD_SUPPORTED 0x1UL + #define VER_GET_RESP_DEV_CAPS_CFG_FW_DCBX_AGENT_SUPPORTED 0x2UL u8 roce_fw_maj; u8 roce_fw_min; u8 roce_fw_bld; @@ -839,9 +848,9 @@ struct hwrm_ver_get_output { u8 chip_metal; u8 chip_bond_id; u8 chip_platform_type; - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC (0x0UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA (0x1UL << 0) - #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM (0x2UL << 0) + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_ASIC 0x0UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_FPGA 0x1UL + #define VER_GET_RESP_CHIP_PLATFORM_TYPE_PALLADIUM 0x2UL __le16 max_req_win_len; __le16 max_resp_len; __le16 def_req_timeout; @@ -863,10 +872,10 @@ struct hwrm_func_reset_input { #define FUNC_RESET_REQ_ENABLES_VF_ID_VALID 0x1UL __le16 vf_id; u8 func_reset_level; - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL (0x0UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME (0x1UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN (0x2UL << 0) - #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF (0x3UL << 0) + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETALL 0x0UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETME 0x1UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETCHILDREN 0x2UL + #define FUNC_RESET_REQ_FUNC_RESET_LEVEL_RESETVF 0x3UL u8 unused_0; }; @@ -1028,6 +1037,10 @@ struct hwrm_func_qcaps_output { #define FUNC_QCAPS_RESP_FLAGS_ROCE_V2_SUPPORTED 0x10UL #define FUNC_QCAPS_RESP_FLAGS_WOL_MAGICPKT_SUPPORTED 0x20UL #define FUNC_QCAPS_RESP_FLAGS_WOL_BMP_SUPPORTED 0x40UL + #define FUNC_QCAPS_RESP_FLAGS_TX_RING_RL_SUPPORTED 0x80UL + #define FUNC_QCAPS_RESP_FLAGS_TX_BW_CFG_SUPPORTED 0x100UL + #define FUNC_QCAPS_RESP_FLAGS_VF_TX_RING_RL_SUPPORTED 0x200UL + #define FUNC_QCAPS_RESP_FLAGS_VF_BW_CFG_SUPPORTED 0x400UL u8 mac_address[6]; __le16 max_rsscos_ctx; __le16 max_cmpl_rings; @@ -1047,9 +1060,8 @@ struct hwrm_func_qcaps_output { __le32 max_mcast_filters; __le32 max_flow_id; __le32 max_hw_ring_grps; + __le16 max_sp_tx_rings; u8 unused_0; - u8 unused_1; - u8 unused_2; u8 valid; }; @@ -1077,6 +1089,7 @@ struct hwrm_func_qcfg_output { __le16 flags; #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_MAGICPKT_ENABLED 0x1UL #define FUNC_QCFG_RESP_FLAGS_OOB_WOL_BMP_ENABLED 0x2UL + #define FUNC_QCFG_RESP_FLAGS_FW_DCBX_AGENT_ENABLED 0x4UL u8 mac_address[6]; __le16 pci_id; __le16 alloc_rsscos_ctx; @@ -1089,29 +1102,46 @@ struct hwrm_func_qcfg_output { __le16 mru; __le16 stat_ctx_id; u8 port_partition_type; - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF (0x0UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS (0x1UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 (0x2UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 (0x3UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 (0x4UL << 0) - #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN (0xffUL << 0) + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_SPF 0x0UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_MPFS 0x1UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_0 0x2UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR1_5 0x3UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_NPAR2_0 0x4UL + #define FUNC_QCFG_RESP_PORT_PARTITION_TYPE_UNKNOWN 0xffUL u8 unused_0; __le16 dflt_vnic_id; u8 unused_1; u8 unused_2; __le32 min_bw; + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MIN_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_QCFG_RESP_MAX_BW_RSVD 0x10000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_LAST FUNC_QCFG_RESP_MAX_BW_BW_VALUE_UNIT_INVALID u8 evb_mode; - #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_QCFG_RESP_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_QCFG_RESP_EVB_MODE_NO_EVB 0x0UL + #define FUNC_QCFG_RESP_EVB_MODE_VEB 0x1UL + #define FUNC_QCFG_RESP_EVB_MODE_VEPA 0x2UL u8 unused_3; - __le16 unused_4; + __le16 alloc_vfs; __le32 alloc_mcast_filters; __le32 alloc_hw_ring_grps; - u8 unused_5; - u8 unused_6; - u8 unused_7; + __le16 alloc_sp_tx_rings; + u8 unused_4; u8 valid; }; @@ -1171,18 +1201,36 @@ struct hwrm_func_cfg_input { __le16 dflt_vlan; __be32 dflt_ip_addr[4]; __le32 min_bw; + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MIN_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MIN_BW_BW_VALUE_UNIT_INVALID __le32 max_bw; + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_SFT 0 + #define FUNC_CFG_REQ_MAX_BW_RSVD 0x10000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_LAST FUNC_CFG_REQ_MAX_BW_BW_VALUE_UNIT_INVALID __le16 async_event_cr; u8 vlan_antispoof_mode; - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK (0x0UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN (0x1UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE (0x2UL << 0) - #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN (0x3UL << 0) + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_NOCHECK 0x0UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_VALIDATE_VLAN 0x1UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_IF_VLANDNE 0x2UL + #define FUNC_CFG_REQ_VLAN_ANTISPOOF_MODE_INSERT_OR_OVERRIDE_VLAN 0x3UL u8 allowed_vlan_pris; u8 evb_mode; - #define FUNC_CFG_REQ_EVB_MODE_NO_EVB (0x0UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEB (0x1UL << 0) - #define FUNC_CFG_REQ_EVB_MODE_VEPA (0x2UL << 0) + #define FUNC_CFG_REQ_EVB_MODE_NO_EVB 0x0UL + #define FUNC_CFG_REQ_EVB_MODE_VEB 0x1UL + #define FUNC_CFG_REQ_EVB_MODE_VEPA 0x2UL u8 unused_2; __le16 num_mcast_filters; }; @@ -1341,16 +1389,16 @@ struct hwrm_func_drv_rgtr_input { #define FUNC_DRV_RGTR_REQ_ENABLES_VF_REQ_FWD 0x8UL #define FUNC_DRV_RGTR_REQ_ENABLES_ASYNC_EVENT_FWD 0x10UL __le16 os_type; - #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_RGTR_REQ_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_RGTR_REQ_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1415,13 +1463,13 @@ struct hwrm_func_buf_rgtr_input { __le16 vf_id; __le16 req_buf_num_pages; __le16 req_buf_page_size; - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B (0x4UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K (0xcUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K (0xdUL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K (0x10UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M (0x15UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M (0x16UL << 0) - #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G (0x1eUL << 0) + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_16B 0x4UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4K 0xcUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_8K 0xdUL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_64K 0x10UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_2M 0x15UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_4M 0x16UL + #define FUNC_BUF_RGTR_REQ_REQ_BUF_PAGE_SIZE_1G 0x1eUL __le16 req_buf_len; __le16 resp_buf_len; u8 unused_0; @@ -1473,16 +1521,16 @@ struct hwrm_func_drv_qver_output { __le16 seq_id; __le16 resp_len; __le16 os_type; - #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN (0x0UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER (0x1UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS (0xeUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS (0x12UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS (0x1dUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX (0x24UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD (0x2aUL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI (0x68UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 (0x73UL << 0) - #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 (0x74UL << 0) + #define FUNC_DRV_QVER_RESP_OS_TYPE_UNKNOWN 0x0UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_OTHER 0x1UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_MSDOS 0xeUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WINDOWS 0x12UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_SOLARIS 0x1dUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_LINUX 0x24UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_FREEBSD 0x2aUL + #define FUNC_DRV_QVER_RESP_OS_TYPE_ESXI 0x68UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN864 0x73UL + #define FUNC_DRV_QVER_RESP_OS_TYPE_WIN2012R2 0x74UL u8 ver_maj; u8 ver_min; u8 ver_upd; @@ -1528,44 +1576,44 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_ENABLES_TX_LPI_TIMER 0x400UL __le16 port_id; __le16 force_link_speed; - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_CFG_REQ_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_duplex; - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH (0x2UL << 0) + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_HALF 0x0UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_FULL 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_DUPLEX_BOTH 0x2UL u8 auto_pause; #define PORT_PHY_CFG_REQ_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_CFG_REQ_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL u8 unused_0; __le16 auto_link_speed; - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1582,12 +1630,12 @@ struct hwrm_port_phy_cfg_input { #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_CFG_REQ_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_CFG_REQ_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_CFG_REQ_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_CFG_REQ_WIRESPEED_OFF 0x0UL + #define PORT_PHY_CFG_REQ_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_CFG_REQ_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_PHY_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_PHY_CFG_REQ_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_CFG_REQ_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_CFG_REQ_FORCE_PAUSE_RX 0x2UL @@ -1641,25 +1689,25 @@ struct hwrm_port_phy_qcfg_output { __le16 seq_id; __le16 resp_len; u8 link; - #define PORT_PHY_QCFG_RESP_LINK_NO_LINK (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SIGNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_LINK (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_NO_LINK 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_SIGNAL 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_LINK 0x2UL u8 unused_0; __le16 link_speed; - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_LINK_SPEED_10MB 0xffffUL u8 duplex; - #define PORT_PHY_QCFG_RESP_DUPLEX_HALF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_DUPLEX_FULL (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_DUPLEX_HALF 0x0UL + #define PORT_PHY_QCFG_RESP_DUPLEX_FULL 0x1UL u8 pause; #define PORT_PHY_QCFG_RESP_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_PAUSE_RX 0x2UL @@ -1679,39 +1727,39 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_SUPPORT_SPEEDS_10MB 0x2000UL __le16 force_link_speed; - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_FORCE_LINK_SPEED_10MB 0xffffUL u8 auto_mode; - #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_AUTO_MODE_SPEED_MASK 0x4UL u8 auto_pause; #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_RX 0x2UL #define PORT_PHY_QCFG_RESP_AUTO_PAUSE_AUTONEG_PAUSE 0x4UL __le16 auto_link_speed; - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB (0xaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB (0x14UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB (0x19UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB (0x64UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB (0xc8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB (0xfaUL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB (0x190UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB (0x1f4UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB (0x3e8UL << 0) - #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB (0xffffUL << 0) + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100MB 0x1UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_1GB 0xaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2GB 0x14UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_2_5GB 0x19UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10GB 0x64UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_20GB 0xc8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_25GB 0xfaUL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_40GB 0x190UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_50GB 0x1f4UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_100GB 0x3e8UL + #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_10MB 0xffffUL __le16 auto_link_speed_mask; #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MBHD 0x1UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_100MB 0x2UL @@ -1728,46 +1776,46 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_AUTO_LINK_SPEED_MASK_10MB 0x2000UL u8 wirespeed; - #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_WIRESPEED_ON (0x1UL << 0) + #define PORT_PHY_QCFG_RESP_WIRESPEED_OFF 0x0UL + #define PORT_PHY_QCFG_RESP_WIRESPEED_ON 0x1UL u8 lpbk; - #define PORT_PHY_QCFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_LPBK_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_PHY_QCFG_RESP_LPBK_REMOTE 0x2UL u8 force_pause; #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_FORCE_PAUSE_RX 0x2UL u8 module_status; - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE (0xffUL << 0) + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_DISABLETX 0x1UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_WARNINGMSG 0x2UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_PWRDOWN 0x3UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTINSERTED 0x4UL + #define PORT_PHY_QCFG_RESP_MODULE_STATUS_NOTAPPLICABLE 0xffUL __le32 preemphasis; u8 phy_maj; u8 phy_min; u8 phy_bld; u8 phy_type; - #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR (0x4UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 (0x5UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX (0x6UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR (0x7UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET (0x8UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE (0x9UL << 0) - #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY (0xaUL << 0) + #define PORT_PHY_QCFG_RESP_PHY_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASECR 0x1UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR4 0x2UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASELR 0x3UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASESR 0x4UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR2 0x5UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKX 0x6UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASEKR 0x7UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASET 0x8UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_BASETE 0x9UL + #define PORT_PHY_QCFG_RESP_PHY_TYPE_SGMIIEXTPHY 0xaUL u8 media_type; - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE (0x3UL << 0) + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_UNKNOWN 0x0UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_TP 0x1UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_DAC 0x2UL + #define PORT_PHY_QCFG_RESP_MEDIA_TYPE_FIBRE 0x3UL u8 xcvr_pkg_type; - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL (0x2UL << 0) + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_INTERNAL 0x1UL + #define PORT_PHY_QCFG_RESP_XCVR_PKG_TYPE_XCVR_EXTERNAL 0x2UL u8 eee_config_phy_addr; #define PORT_PHY_QCFG_RESP_PHY_ADDR_MASK 0x1fUL #define PORT_PHY_QCFG_RESP_PHY_ADDR_SFT 0 @@ -1796,11 +1844,11 @@ struct hwrm_port_phy_qcfg_output { #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MBHD 0x1000UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_SPEEDS_10MB 0x2000UL u8 link_partner_adv_auto_mode; - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE (0x0UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS (0x1UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED (0x2UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW (0x3UL << 0) - #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK (0x4UL << 0) + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_NONE 0x0UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ALL_SPEEDS 0x1UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_SPEED 0x2UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_ONE_OR_BELOW 0x3UL + #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_AUTO_MODE_SPEED_MASK 0x4UL u8 link_partner_adv_pause; #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_TX 0x1UL #define PORT_PHY_QCFG_RESP_LINK_PARTNER_ADV_PAUSE_RX 0x2UL @@ -1859,7 +1907,7 @@ struct hwrm_port_mac_cfg_input { __le64 resp_addr; __le32 flags; #define PORT_MAC_CFG_REQ_FLAGS_MATCH_LINK 0x1UL - #define PORT_MAC_CFG_REQ_FLAGS_COS_ASSIGNMENT_ENABLE 0x2UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_ENABLE 0x2UL #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_ENABLE 0x4UL #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_ENABLE 0x8UL #define PORT_MAC_CFG_REQ_FLAGS_PTP_RX_TS_CAPTURE_ENABLE 0x10UL @@ -1868,28 +1916,50 @@ struct hwrm_port_mac_cfg_input { #define PORT_MAC_CFG_REQ_FLAGS_PTP_TX_TS_CAPTURE_DISABLE 0x80UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_ENABLE 0x100UL #define PORT_MAC_CFG_REQ_FLAGS_OOB_WOL_DISABLE 0x200UL + #define PORT_MAC_CFG_REQ_FLAGS_VLAN_PRI2COS_DISABLE 0x400UL + #define PORT_MAC_CFG_REQ_FLAGS_TUNNEL_PRI2COS_DISABLE 0x800UL + #define PORT_MAC_CFG_REQ_FLAGS_IP_DSCP2COS_DISABLE 0x1000UL __le32 enables; #define PORT_MAC_CFG_REQ_ENABLES_IPG 0x1UL #define PORT_MAC_CFG_REQ_ENABLES_LPBK 0x2UL - #define PORT_MAC_CFG_REQ_ENABLES_IVLAN_PRI2COS_MAP_PRI 0x4UL - #define PORT_MAC_CFG_REQ_ENABLES_LCOS_MAP_PRI 0x8UL + #define PORT_MAC_CFG_REQ_ENABLES_VLAN_PRI2COS_MAP_PRI 0x4UL + #define PORT_MAC_CFG_REQ_ENABLES_RESERVED1 0x8UL #define PORT_MAC_CFG_REQ_ENABLES_TUNNEL_PRI2COS_MAP_PRI 0x10UL #define PORT_MAC_CFG_REQ_ENABLES_DSCP2COS_MAP_PRI 0x20UL #define PORT_MAC_CFG_REQ_ENABLES_RX_TS_CAPTURE_PTP_MSG_TYPE 0x40UL #define PORT_MAC_CFG_REQ_ENABLES_TX_TS_CAPTURE_PTP_MSG_TYPE 0x80UL + #define PORT_MAC_CFG_REQ_ENABLES_COS_FIELD_CFG 0x100UL __le16 port_id; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_REQ_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_REQ_LPBK_REMOTE (0x2UL << 0) - u8 ivlan_pri2cos_map_pri; - u8 lcos_map_pri; + #define PORT_MAC_CFG_REQ_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_REQ_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_REQ_LPBK_REMOTE 0x2UL + u8 vlan_pri2cos_map_pri; + u8 reserved1; u8 tunnel_pri2cos_map_pri; u8 dscp2pri_map_pri; __le16 rx_ts_capture_ptp_msg_type; __le16 tx_ts_capture_ptp_msg_type; - __le32 unused_0; + u8 cos_field_cfg; + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_RSVD1 0x1UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_MASK 0x6UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_SFT 1 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_INNERMOST (0x0UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTER (0x1UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_OUTERMOST (0x2UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 1) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_MASK 0x18UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_SFT 3 + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_INNERMOST (0x0UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTER (0x1UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_OUTERMOST (0x2UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED (0x3UL << 3) + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_LAST PORT_MAC_CFG_REQ_COS_FIELD_CFG_T_VLAN_PRI_SEL_UNSPECIFIED + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_MASK 0xe0UL + #define PORT_MAC_CFG_REQ_COS_FIELD_CFG_DEFAULT_COS_SFT 5 + u8 unused_0[3]; }; /* Output (16 bytes) */ @@ -1902,9 +1972,9 @@ struct hwrm_port_mac_cfg_output { __le16 mtu; u8 ipg; u8 lpbk; - #define PORT_MAC_CFG_RESP_LPBK_NONE (0x0UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_LOCAL (0x1UL << 0) - #define PORT_MAC_CFG_RESP_LPBK_REMOTE (0x2UL << 0) + #define PORT_MAC_CFG_RESP_LPBK_NONE 0x0UL + #define PORT_MAC_CFG_RESP_LPBK_LOCAL 0x1UL + #define PORT_MAC_CFG_RESP_LPBK_REMOTE 0x2UL u8 unused_0; u8 valid; }; @@ -2163,8 +2233,8 @@ struct hwrm_queue_qportcfg_input { __le64 resp_addr; __le32 flags; #define QUEUE_QPORTCFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX (0x1UL << 0) + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX 0x1UL #define QUEUE_QPORTCFG_REQ_FLAGS_PATH_LAST QUEUE_QPORTCFG_REQ_FLAGS_PATH_RX __le16 port_id; __le16 unused_0; @@ -2179,50 +2249,51 @@ struct hwrm_queue_qportcfg_output { u8 max_configurable_queues; u8 max_configurable_lossless_queues; u8 queue_cfg_allowed; - u8 queue_buffers_cfg_allowed; + u8 queue_cfg_info; + #define QUEUE_QPORTCFG_RESP_QUEUE_CFG_INFO_ASYM_CFG 0x1UL u8 queue_pfcenable_cfg_allowed; u8 queue_pri2cos_cfg_allowed; u8 queue_cos2bw_cfg_allowed; u8 queue_id0; u8 queue_id0_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID0_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id1; u8 queue_id1_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID1_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id2; u8 queue_id2_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID2_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id3; u8 queue_id3_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID3_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id4; u8 queue_id4_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID4_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id5; u8 queue_id5_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID5_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id6; u8 queue_id6_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID6_SERVICE_PROFILE_UNKNOWN 0xffUL u8 queue_id7; u8 queue_id7_service_profile; - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_QPORTCFG_RESP_QUEUE_ID7_SERVICE_PROFILE_UNKNOWN 0xffUL u8 valid; }; @@ -2235,19 +2306,21 @@ struct hwrm_queue_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_RX + #define QUEUE_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_CFG_REQ_FLAGS_PATH_SFT 0 + #define QUEUE_CFG_REQ_FLAGS_PATH_TX 0x0UL + #define QUEUE_CFG_REQ_FLAGS_PATH_RX 0x1UL + #define QUEUE_CFG_REQ_FLAGS_PATH_BIDIR 0x2UL + #define QUEUE_CFG_REQ_FLAGS_PATH_LAST QUEUE_CFG_REQ_FLAGS_PATH_BIDIR __le32 enables; #define QUEUE_CFG_REQ_ENABLES_DFLT_LEN 0x1UL #define QUEUE_CFG_REQ_ENABLES_SERVICE_PROFILE 0x2UL __le32 queue_id; __le32 dflt_len; u8 service_profile; - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY (0x0UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS (0x1UL << 0) - #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN (0xffUL << 0) + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSY 0x0UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_LOSSLESS 0x1UL + #define QUEUE_CFG_REQ_SERVICE_PROFILE_UNKNOWN 0xffUL u8 unused_0[7]; }; @@ -2264,50 +2337,6 @@ struct hwrm_queue_cfg_output { u8 valid; }; -/* hwrm_queue_buffers_cfg */ -/* Input (56 bytes) */ -struct hwrm_queue_buffers_cfg_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le32 flags; - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_LAST QUEUE_BUFFERS_CFG_REQ_FLAGS_PATH_RX - __le32 enables; - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_RESERVED 0x1UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_SHARED 0x2UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XOFF 0x4UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_XON 0x8UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_FULL 0x10UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_NOTFULL 0x20UL - #define QUEUE_BUFFERS_CFG_REQ_ENABLES_MAX 0x40UL - __le32 queue_id; - __le32 reserved; - __le32 shared; - __le32 xoff; - __le32 xon; - __le32 full; - __le32 notfull; - __le32 max; -}; - -/* Output (16 bytes) */ -struct hwrm_queue_buffers_cfg_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_queue_pfcenable_cfg */ /* Input (24 bytes) */ struct hwrm_queue_pfcenable_cfg_input { @@ -2351,12 +2380,22 @@ struct hwrm_queue_pri2cos_cfg_input { __le16 target_id; __le64 resp_addr; __le32 flags; - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_MASK 0x3UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_SFT 0 #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_TX (0x0UL << 0) #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX (0x1UL << 0) - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_RX - #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR (0x2UL << 0) + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_LAST QUEUE_PRI2COS_CFG_REQ_FLAGS_PATH_BIDIR + #define QUEUE_PRI2COS_CFG_REQ_FLAGS_IVLAN 0x4UL __le32 enables; + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI0_COS_QUEUE_ID 0x1UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI1_COS_QUEUE_ID 0x2UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI2_COS_QUEUE_ID 0x4UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI3_COS_QUEUE_ID 0x8UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI4_COS_QUEUE_ID 0x10UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI5_COS_QUEUE_ID 0x20UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI6_COS_QUEUE_ID 0x40UL + #define QUEUE_PRI2COS_CFG_REQ_ENABLES_PRI7_COS_QUEUE_ID 0x80UL u8 port_id; u8 pri0_cos_queue_id; u8 pri1_cos_queue_id; @@ -2404,82 +2443,226 @@ struct hwrm_queue_cos2bw_cfg_input { u8 queue_id0; u8 unused_0; __le32 queue_id0_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id0_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id0_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID0_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id0_pri_lvl; u8 queue_id0_bw_weight; u8 queue_id1; __le32 queue_id1_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id1_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id1_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID1_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id1_pri_lvl; u8 queue_id1_bw_weight; u8 queue_id2; __le32 queue_id2_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id2_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id2_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID2_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id2_pri_lvl; u8 queue_id2_bw_weight; u8 queue_id3; __le32 queue_id3_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id3_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id3_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID3_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id3_pri_lvl; u8 queue_id3_bw_weight; u8 queue_id4; __le32 queue_id4_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id4_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id4_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID4_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id4_pri_lvl; u8 queue_id4_bw_weight; u8 queue_id5; __le32 queue_id5_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id5_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id5_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID5_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id5_pri_lvl; u8 queue_id5_bw_weight; u8 queue_id6; __le32 queue_id6_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id6_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id6_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID6_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id6_pri_lvl; u8 queue_id6_bw_weight; u8 queue_id7; __le32 queue_id7_min_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MIN_BW_BW_VALUE_UNIT_INVALID __le32 queue_id7_max_bw; + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_SFT 0 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_RSVD 0x10000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_LAST QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_MAX_BW_BW_VALUE_UNIT_INVALID u8 queue_id7_tsa_assign; - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP (0x0UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS (0x1UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST (0x2UL << 0) - #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST (0xffUL << 0) + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_SP 0x0UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_ETS 0x1UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_FIRST 0x2UL + #define QUEUE_COS2BW_CFG_REQ_QUEUE_ID7_TSA_ASSIGN_RESERVED_LAST 0xffUL u8 queue_id7_pri_lvl; u8 queue_id7_bw_weight; u8 unused_1[5]; @@ -2563,6 +2746,7 @@ struct hwrm_vnic_cfg_input { #define VNIC_CFG_REQ_FLAGS_BD_STALL_MODE 0x4UL #define VNIC_CFG_REQ_FLAGS_ROCE_DUAL_VNIC_MODE 0x8UL #define VNIC_CFG_REQ_FLAGS_ROCE_ONLY_VNIC_MODE 0x10UL + #define VNIC_CFG_REQ_FLAGS_RSS_DFLT_CR_MODE 0x20UL __le32 enables; #define VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP 0x1UL #define VNIC_CFG_REQ_ENABLES_RSS_RULE 0x2UL @@ -2615,18 +2799,18 @@ struct hwrm_vnic_tpa_cfg_input { #define VNIC_TPA_CFG_REQ_ENABLES_MIN_AGG_LEN 0x8UL __le16 vnic_id; __le16 max_agg_segs; - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX (0x1fUL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGG_SEGS_MAX 0x1fUL __le16 max_aggs; - #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 (0x0UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 (0x1UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 (0x2UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 (0x3UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 (0x4UL << 0) - #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX (0x7UL << 0) + #define VNIC_TPA_CFG_REQ_MAX_AGGS_1 0x0UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_2 0x1UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_4 0x2UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_8 0x3UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_16 0x4UL + #define VNIC_TPA_CFG_REQ_MAX_AGGS_MAX 0x7UL u8 unused_0; u8 unused_1; __le32 max_agg_timer; @@ -2780,15 +2964,15 @@ struct hwrm_ring_alloc_input { __le64 resp_addr; __le32 enables; #define RING_ALLOC_REQ_ENABLES_RESERVED1 0x1UL - #define RING_ALLOC_REQ_ENABLES_RESERVED2 0x2UL + #define RING_ALLOC_REQ_ENABLES_RING_ARB_CFG 0x2UL #define RING_ALLOC_REQ_ENABLES_RESERVED3 0x4UL #define RING_ALLOC_REQ_ENABLES_STAT_CTX_ID_VALID 0x8UL #define RING_ALLOC_REQ_ENABLES_RESERVED4 0x10UL #define RING_ALLOC_REQ_ENABLES_MAX_BW_VALID 0x20UL u8 ring_type; - #define RING_ALLOC_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_ALLOC_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_ALLOC_REQ_RING_TYPE_CMPL 0x0UL + #define RING_ALLOC_REQ_RING_TYPE_TX 0x1UL + #define RING_ALLOC_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 unused_1; __le64 page_tbl_addr; @@ -2804,18 +2988,36 @@ struct hwrm_ring_alloc_input { u8 unused_4; u8 unused_5; __le32 reserved1; - __le16 reserved2; + __le16 ring_arb_cfg; + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_MASK 0xfUL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SFT 0 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_SP (0x1UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ (0x2UL << 0) + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_LAST RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_WFQ + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_MASK 0xf0UL + #define RING_ALLOC_REQ_RING_ARB_CFG_RSVD_SFT 4 + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_MASK 0xff00UL + #define RING_ALLOC_REQ_RING_ARB_CFG_ARB_POLICY_PARAM_SFT 8 u8 unused_6; u8 unused_7; __le32 reserved3; __le32 stat_ctx_id; __le32 reserved4; __le32 max_bw; + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_MASK 0xfffffffUL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_SFT 0 + #define RING_ALLOC_REQ_MAX_BW_RSVD 0x10000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MASK 0xe0000000UL + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_SFT 29 + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_MBPS (0x0UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_PERCENT1_100 (0x1UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID (0x7UL << 29) + #define RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_LAST RING_ALLOC_REQ_MAX_BW_BW_VALUE_UNIT_INVALID u8 int_mode; - #define RING_ALLOC_REQ_INT_MODE_LEGACY (0x0UL << 0) - #define RING_ALLOC_REQ_INT_MODE_RSVD (0x1UL << 0) - #define RING_ALLOC_REQ_INT_MODE_MSIX (0x2UL << 0) - #define RING_ALLOC_REQ_INT_MODE_POLL (0x3UL << 0) + #define RING_ALLOC_REQ_INT_MODE_LEGACY 0x0UL + #define RING_ALLOC_REQ_INT_MODE_RSVD 0x1UL + #define RING_ALLOC_REQ_INT_MODE_MSIX 0x2UL + #define RING_ALLOC_REQ_INT_MODE_POLL 0x3UL u8 unused_8[3]; }; @@ -2842,9 +3044,9 @@ struct hwrm_ring_free_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_FREE_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_FREE_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_FREE_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_FREE_REQ_RING_TYPE_CMPL 0x0UL + #define RING_FREE_REQ_RING_TYPE_TX 0x1UL + #define RING_FREE_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -2942,9 +3144,9 @@ struct hwrm_ring_reset_input { __le16 target_id; __le64 resp_addr; u8 ring_type; - #define RING_RESET_REQ_RING_TYPE_CMPL (0x0UL << 0) - #define RING_RESET_REQ_RING_TYPE_TX (0x1UL << 0) - #define RING_RESET_REQ_RING_TYPE_RX (0x2UL << 0) + #define RING_RESET_REQ_RING_TYPE_CMPL 0x0UL + #define RING_RESET_REQ_RING_TYPE_TX 0x1UL + #define RING_RESET_REQ_RING_TYPE_RX 0x2UL u8 unused_0; __le16 ring_id; __le32 unused_1; @@ -3068,36 +3270,36 @@ struct hwrm_cfa_l2_filter_alloc_input { __le16 t_l2_ivlan; __le16 t_l2_ivlan_mask; u8 src_type; - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG (0x7UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_NPORT 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_PF 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VF 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_VNIC 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_KONG 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_APE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_BONO 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_SRC_TYPE_TANG 0x7UL u8 unused_6; __le32 src_id; u8 tunnel_type; - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_L2_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_7; __le16 dst_id; __le16 mirror_vnic_id; u8 pri_hint; - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER (0x1UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER (0x2UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX (0x3UL << 0) - #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN (0x4UL << 0) + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_ABOVE_FILTER 0x1UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_BELOW_FILTER 0x2UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MAX 0x3UL + #define CFA_L2_FILTER_ALLOC_REQ_PRI_HINT_MIN 0x4UL u8 unused_8; __le32 unused_9; __le64 l2_filter_id_hint; @@ -3246,16 +3448,16 @@ struct hwrm_cfa_tunnel_filter_alloc_input { u8 l3_addr_type; u8 t_l3_addr_type; u8 tunnel_type; - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_TUNNEL_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 unused_0; __le32 vni; __le32 dst_vnic_id; @@ -3311,14 +3513,14 @@ struct hwrm_cfa_encap_record_alloc_input { __le32 flags; #define CFA_ENCAP_RECORD_ALLOC_REQ_FLAGS_LOOPBACK 0x1UL u8 encap_type; - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN (0x1UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE (0x2UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE (0x3UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP (0x4UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE (0x5UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS (0x6UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN (0x7UL << 0) - #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE (0x8UL << 0) + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VXLAN 0x1UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_NVGRE 0x2UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_L2GRE 0x3UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPIP 0x4UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_GENEVE 0x5UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_MPLS 0x6UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_VLAN 0x7UL + #define CFA_ENCAP_RECORD_ALLOC_REQ_ENCAP_TYPE_IPGRE 0x8UL u8 unused_0; __le16 unused_1; __le32 encap_data[16]; @@ -3397,32 +3599,32 @@ struct hwrm_cfa_ntuple_filter_alloc_input { u8 src_macaddr[6]; __be16 ethertype; u8 ip_addr_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 (0x6UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV4 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_ADDR_TYPE_IPV6 0x6UL u8 ip_protocol; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP (0x11UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UNKNOWN 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_UDP 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_IP_PROTOCOL_TCP 0x11UL __le16 dst_id; __le16 mirror_vnic_id; u8 tunnel_type; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP (0x4UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS (0x6UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT (0x7UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE (0x8UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL (0xffUL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NONTUNNEL 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_NVGRE 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_L2GRE 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPIP 0x4UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_MPLS 0x6UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_STT 0x7UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_IPGRE 0x8UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_TUNNEL_TYPE_ANYTUNNEL 0xffUL u8 pri_hint; - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER (0x0UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE (0x1UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW (0x2UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST (0x3UL << 0) - #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST (0x4UL << 0) + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_NO_PREFER 0x0UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_ABOVE 0x1UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_BELOW 0x2UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_HIGHEST 0x3UL + #define CFA_NTUPLE_FILTER_ALLOC_REQ_PRI_HINT_LOWEST 0x4UL __be32 src_ipaddr[4]; __be32 src_ipaddr_mask[4]; __be32 dst_ipaddr[4]; @@ -3511,8 +3713,8 @@ struct hwrm_tunnel_dst_port_query_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_QUERY_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0[7]; }; @@ -3539,8 +3741,8 @@ struct hwrm_tunnel_dst_port_alloc_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_ALLOC_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __be16 tunnel_dst_port_val; __le32 unused_1; @@ -3570,8 +3772,8 @@ struct hwrm_tunnel_dst_port_free_input { __le16 target_id; __le64 resp_addr; u8 tunnel_type; - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN (0x1UL << 0) - #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE (0x5UL << 0) + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_VXLAN 0x1UL + #define TUNNEL_DST_PORT_FREE_REQ_TUNNEL_TYPE_GENEVE 0x5UL u8 unused_0; __le16 tunnel_dst_port_id; __le32 unused_1; @@ -3720,15 +3922,15 @@ struct hwrm_fw_reset_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_RESET_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 selfrst_status; - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST 0x2UL __le16 unused_0[3]; }; @@ -3739,9 +3941,9 @@ struct hwrm_fw_reset_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_RESET_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3759,11 +3961,11 @@ struct hwrm_fw_qstatus_input { __le16 target_id; __le64 resp_addr; u8 embedded_proc_type; - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT (0x0UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT (0x1UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL (0x2UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE (0x3UL << 0) - #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD (0x4UL << 0) + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_BOOT 0x0UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_MGMT 0x1UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_NETCTRL 0x2UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_ROCE 0x3UL + #define FW_QSTATUS_REQ_EMBEDDED_PROC_TYPE_RSVD 0x4UL u8 unused_0[7]; }; @@ -3774,9 +3976,9 @@ struct hwrm_fw_qstatus_output { __le16 seq_id; __le16 resp_len; u8 selfrst_status; - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE (0x0UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP (0x1UL << 0) - #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST (0x2UL << 0) + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTNONE 0x0UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTASAP 0x1UL + #define FW_QSTATUS_RESP_SELFRST_STATUS_SELFRSTPCIERST 0x2UL u8 unused_0; __le16 unused_1; u8 unused_2; @@ -3785,6 +3987,42 @@ struct hwrm_fw_qstatus_output { u8 valid; }; +/* hwrm_fw_set_time */ +/* Input (32 bytes) */ +struct hwrm_fw_set_time_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le16 year; + #define FW_SET_TIME_REQ_YEAR_UNKNOWN 0x0UL + u8 month; + u8 day; + u8 hour; + u8 minute; + u8 second; + u8 unused_0; + __le16 millisecond; + __le16 zone; + #define FW_SET_TIME_REQ_ZONE_UTC 0x0UL + #define FW_SET_TIME_REQ_ZONE_UNKNOWN 0xffffUL + __le32 unused_1; +}; + +/* Output (16 bytes) */ +struct hwrm_fw_set_time_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le32 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + /* hwrm_exec_fwd_resp */ /* Input (128 bytes) */ struct hwrm_exec_fwd_resp_input { @@ -3921,32 +4159,6 @@ struct hwrm_temp_monitor_query_output { u8 valid; }; -/* hwrm_nvm_raw_write_blk */ -/* Input (32 bytes) */ -struct hwrm_nvm_raw_write_blk_input { - __le16 req_type; - __le16 cmpl_ring; - __le16 seq_id; - __le16 target_id; - __le64 resp_addr; - __le64 host_src_addr; - __le32 dest_addr; - __le32 len; -}; - -/* Output (16 bytes) */ -struct hwrm_nvm_raw_write_blk_output { - __le16 error_code; - __le16 req_type; - __le16 seq_id; - __le16 resp_len; - __le32 unused_0; - u8 unused_1; - u8 unused_2; - u8 unused_3; - u8 valid; -}; - /* hwrm_nvm_read */ /* Input (40 bytes) */ struct hwrm_nvm_read_input { @@ -4132,9 +4344,9 @@ struct hwrm_nvm_find_dir_entry_input { u8 opt_ordinal; #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_MASK 0x3UL #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_SFT 0 - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ (0x0UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE (0x1UL << 0) - #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT (0x2UL << 0) + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_EQ 0x0UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GE 0x1UL + #define NVM_FIND_DIR_ENTRY_REQ_OPT_ORDINAL_GT 0x2UL u8 unused_1[3]; }; @@ -4266,4 +4478,41 @@ struct hwrm_nvm_verify_update_output { u8 valid; }; +/* hwrm_nvm_install_update */ +/* Input (24 bytes) */ +struct hwrm_nvm_install_update_input { + __le16 req_type; + __le16 cmpl_ring; + __le16 seq_id; + __le16 target_id; + __le64 resp_addr; + __le32 install_type; + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_NORMAL 0x0UL + #define NVM_INSTALL_UPDATE_REQ_INSTALL_TYPE_ALL 0xffffffffUL + __le32 unused_0; +}; + +/* Output (24 bytes) */ +struct hwrm_nvm_install_update_output { + __le16 error_code; + __le16 req_type; + __le16 seq_id; + __le16 resp_len; + __le64 installed_items; + u8 result; + #define NVM_INSTALL_UPDATE_RESP_RESULT_SUCCESS 0x0UL + u8 problem_item; + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_PROBLEM_ITEM_PACKAGE 0xffUL + u8 reset_required; + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_NONE 0x0UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_PCI 0x1UL + #define NVM_INSTALL_UPDATE_RESP_RESET_REQUIRED_POWER 0x2UL + u8 unused_0; + u8 unused_1; + u8 unused_2; + u8 unused_3; + u8 valid; +}; + #endif diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c index 50d2007a2640..ec6cd18842c3 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c @@ -19,6 +19,45 @@ #include "bnxt_ethtool.h" #ifdef CONFIG_BNXT_SRIOV +static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, + struct bnxt_vf_info *vf, u16 event_id) +{ + struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; + struct hwrm_fwd_async_event_cmpl_input req = {0}; + struct hwrm_async_event_cmpl *async_cmpl; + int rc = 0; + + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); + if (vf) + req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); + else + /* broadcast this async event to all VFs */ + req.encap_async_event_target_id = cpu_to_le16(0xffff); + async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; + async_cmpl->type = + cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); + async_cmpl->event_id = cpu_to_le16(event_id); + + mutex_lock(&bp->hwrm_cmd_lock); + rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); + + if (rc) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", + rc); + goto fwd_async_event_cmpl_exit; + } + + if (resp->error_code) { + netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", + resp->error_code); + rc = -1; + } + +fwd_async_event_cmpl_exit: + mutex_unlock(&bp->hwrm_cmd_lock); + return rc; +} + static int bnxt_vf_ndo_prep(struct bnxt *bp, int vf_id) { if (!test_bit(BNXT_STATE_OPEN, &bp->state)) { @@ -135,7 +174,8 @@ int bnxt_set_vf_mac(struct net_device *dev, int vf_id, u8 *mac) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } -int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) +int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos, + __be16 vlan_proto) { struct hwrm_func_cfg_input req = {0}; struct bnxt *bp = netdev_priv(dev); @@ -146,6 +186,9 @@ int bnxt_set_vf_vlan(struct net_device *dev, int vf_id, u16 vlan_id, u8 qos) if (bp->hwrm_spec_code < 0x10201) return -ENOTSUPP; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + rc = bnxt_vf_ndo_prep(bp, vf_id); if (rc) return rc; @@ -243,8 +286,9 @@ int bnxt_set_vf_link_state(struct net_device *dev, int vf_id, int link) rc = -EINVAL; break; } - /* CHIMP TODO: send msg to VF to update new link state */ - + if (vf->flags & (BNXT_VF_LINK_UP | BNXT_VF_LINK_FORCED)) + rc = bnxt_hwrm_fwd_async_event_cmpl(bp, vf, + HWRM_ASYNC_EVENT_CMPL_EVENT_ID_LINK_STATUS_CHANGE); return rc; } @@ -525,46 +569,6 @@ err_out1: return rc; } -static int bnxt_hwrm_fwd_async_event_cmpl(struct bnxt *bp, - struct bnxt_vf_info *vf, - u16 event_id) -{ - int rc = 0; - struct hwrm_fwd_async_event_cmpl_input req = {0}; - struct hwrm_fwd_async_event_cmpl_output *resp = bp->hwrm_cmd_resp_addr; - struct hwrm_async_event_cmpl *async_cmpl; - - bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FWD_ASYNC_EVENT_CMPL, -1, -1); - if (vf) - req.encap_async_event_target_id = cpu_to_le16(vf->fw_fid); - else - /* broadcast this async event to all VFs */ - req.encap_async_event_target_id = cpu_to_le16(0xffff); - async_cmpl = (struct hwrm_async_event_cmpl *)req.encap_async_event_cmpl; - async_cmpl->type = - cpu_to_le16(HWRM_ASYNC_EVENT_CMPL_TYPE_HWRM_ASYNC_EVENT); - async_cmpl->event_id = cpu_to_le16(event_id); - - mutex_lock(&bp->hwrm_cmd_lock); - rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - - if (rc) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl failed. rc:%d\n", - rc); - goto fwd_async_event_cmpl_exit; - } - - if (resp->error_code) { - netdev_err(bp->dev, "hwrm_fwd_async_event_cmpl error %d\n", - resp->error_code); - rc = -1; - } - -fwd_async_event_cmpl_exit: - mutex_unlock(&bp->hwrm_cmd_lock); - return rc; -} - void bnxt_sriov_disable(struct bnxt *bp) { u16 num_vfs = pci_num_vf(bp->pdev); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h index 0392670ab49c..1ab72e4820af 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.h @@ -12,7 +12,7 @@ int bnxt_get_vf_config(struct net_device *, int, struct ifla_vf_info *); int bnxt_set_vf_mac(struct net_device *, int, u8 *); -int bnxt_set_vf_vlan(struct net_device *, int, u16, u8); +int bnxt_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int bnxt_set_vf_bw(struct net_device *, int, int, int); int bnxt_set_vf_link_state(struct net_device *, int, int); int bnxt_set_vf_spoofchk(struct net_device *, int, bool); diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c index 541456398dfb..4464bc5db934 100644 --- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c +++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c @@ -450,8 +450,8 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv, genet_dma_ring_regs[r]); } -static int bcmgenet_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -461,11 +461,11 @@ static int bcmgenet_get_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_gset(priv->phydev, cmd); + return phy_ethtool_ksettings_get(priv->phydev, cmd); } -static int bcmgenet_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int bcmgenet_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct bcmgenet_priv *priv = netdev_priv(dev); @@ -475,7 +475,7 @@ static int bcmgenet_set_settings(struct net_device *dev, if (!priv->phydev) return -ENODEV; - return phy_ethtool_sset(priv->phydev, cmd); + return phy_ethtool_ksettings_set(priv->phydev, cmd); } static int bcmgenet_set_rx_csum(struct net_device *dev, @@ -979,12 +979,10 @@ static int bcmgenet_nway_reset(struct net_device *dev) } /* standard ethtool support functions. */ -static struct ethtool_ops bcmgenet_ethtool_ops = { +static const struct ethtool_ops bcmgenet_ethtool_ops = { .get_strings = bcmgenet_get_strings, .get_sset_count = bcmgenet_get_sset_count, .get_ethtool_stats = bcmgenet_get_ethtool_stats, - .get_settings = bcmgenet_get_settings, - .set_settings = bcmgenet_set_settings, .get_drvinfo = bcmgenet_get_drvinfo, .get_link = ethtool_op_get_link, .get_msglevel = bcmgenet_get_msglevel, @@ -996,6 +994,8 @@ static struct ethtool_ops bcmgenet_ethtool_ops = { .nway_reset = bcmgenet_nway_reset, .get_coalesce = bcmgenet_get_coalesce, .set_coalesce = bcmgenet_set_coalesce, + .get_link_ksettings = bcmgenet_get_link_ksettings, + .set_link_ksettings = bcmgenet_set_link_ksettings, }; /* Power down the unimac, based on mode. */ @@ -2669,128 +2669,6 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl) bcmgenet_tdma_writel(priv, reg, DMA_CTRL); } -static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv, - u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - return !!(reg & (1 << (f_index % 32))); -} - -static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg |= (1 << (f_index % 32)); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv, - u32 f_index, u32 rx_queue) -{ - u32 offset; - u32 reg; - - offset = f_index / 8; - reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset); - reg &= ~(0xF << (4 * (f_index % 8))); - reg |= ((rx_queue & 0xF) << (4 * (f_index % 8))); - bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset); -} - -static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv, - u32 f_index, u32 f_length) -{ - u32 offset; - u32 reg; - - offset = HFB_FLT_LEN_V3PLUS + - ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) * - sizeof(u32); - reg = bcmgenet_hfb_reg_readl(priv, offset); - reg &= ~(0xFF << (8 * (f_index % 4))); - reg |= ((f_length & 0xFF) << (8 * (f_index % 4))); - bcmgenet_hfb_reg_writel(priv, reg, offset); -} - -static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv) -{ - u32 f_index; - - for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++) - if (!bcmgenet_hfb_is_filter_enabled(priv, f_index)) - return f_index; - - return -ENOMEM; -} - -/* bcmgenet_hfb_add_filter - * - * Add new filter to Hardware Filter Block to match and direct Rx traffic to - * desired Rx queue. - * - * f_data is an array of unsigned 32-bit integers where each 32-bit integer - * provides filter data for 2 bytes (4 nibbles) of Rx frame: - * - * bits 31:20 - unused - * bit 19 - nibble 0 match enable - * bit 18 - nibble 1 match enable - * bit 17 - nibble 2 match enable - * bit 16 - nibble 3 match enable - * bits 15:12 - nibble 0 data - * bits 11:8 - nibble 1 data - * bits 7:4 - nibble 2 data - * bits 3:0 - nibble 3 data - * - * Example: - * In order to match: - * - Ethernet frame type = 0x0800 (IP) - * - IP version field = 4 - * - IP protocol field = 0x11 (UDP) - * - * The following filter is needed: - * u32 hfb_filter_ipv4_udp[] = { - * Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000, - * Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000, - * Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011, - * }; - * - * To add the filter to HFB and direct the traffic to Rx queue 0, call: - * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp, - * ARRAY_SIZE(hfb_filter_ipv4_udp), 0); - */ -int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data, - u32 f_length, u32 rx_queue) -{ - int f_index; - u32 i; - - f_index = bcmgenet_hfb_find_unused_filter(priv); - if (f_index < 0) - return -ENOMEM; - - if (f_length > priv->hw_params->hfb_filter_size) - return -EINVAL; - - for (i = 0; i < f_length; i++) - bcmgenet_hfb_writel(priv, f_data[i], - (f_index * priv->hw_params->hfb_filter_size + i) * - sizeof(u32)); - - bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length); - bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue); - bcmgenet_hfb_enable_filter(priv, f_index); - bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL); - - return 0; -} - /* bcmgenet_hfb_clear * * Clear Hardware Filter Block and disable all filtering. diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index ea967df4b202..a927a730da10 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -12079,95 +12079,107 @@ static int tg3_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, return ret; } -static int tg3_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); + u32 supported, advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_gset(phydev, cmd); + return phy_ethtool_ksettings_get(phydev, cmd); } - cmd->supported = (SUPPORTED_Autoneg); + supported = (SUPPORTED_Autoneg); if (!(tp->phy_flags & TG3_PHYFLG_10_100_ONLY)) - cmd->supported |= (SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full); + supported |= (SUPPORTED_1000baseT_Half | + SUPPORTED_1000baseT_Full); if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { - cmd->supported |= (SUPPORTED_100baseT_Half | - SUPPORTED_100baseT_Full | - SUPPORTED_10baseT_Half | - SUPPORTED_10baseT_Full | - SUPPORTED_TP); - cmd->port = PORT_TP; + supported |= (SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_TP); + cmd->base.port = PORT_TP; } else { - cmd->supported |= SUPPORTED_FIBRE; - cmd->port = PORT_FIBRE; + supported |= SUPPORTED_FIBRE; + cmd->base.port = PORT_FIBRE; } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); - cmd->advertising = tp->link_config.advertising; + advertising = tp->link_config.advertising; if (tg3_flag(tp, PAUSE_AUTONEG)) { if (tp->link_config.flowctrl & FLOW_CTRL_RX) { if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Pause; + advertising |= ADVERTISED_Pause; } else { - cmd->advertising |= ADVERTISED_Pause | - ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; } } else if (tp->link_config.flowctrl & FLOW_CTRL_TX) { - cmd->advertising |= ADVERTISED_Asym_Pause; + advertising |= ADVERTISED_Asym_Pause; } } + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + if (netif_running(dev) && tp->link_up) { - ethtool_cmd_speed_set(cmd, tp->link_config.active_speed); - cmd->duplex = tp->link_config.active_duplex; - cmd->lp_advertising = tp->link_config.rmt_adv; + cmd->base.speed = tp->link_config.active_speed; + cmd->base.duplex = tp->link_config.active_duplex; + ethtool_convert_legacy_u32_to_link_mode( + cmd->link_modes.lp_advertising, + tp->link_config.rmt_adv); + if (!(tp->phy_flags & TG3_PHYFLG_ANY_SERDES)) { if (tp->phy_flags & TG3_PHYFLG_MDIX_STATE) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } } else { - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.speed = SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; } - cmd->phy_address = tp->phy_addr; - cmd->transceiver = XCVR_INTERNAL; - cmd->autoneg = tp->link_config.autoneg; - cmd->maxtxpkt = 0; - cmd->maxrxpkt = 0; + cmd->base.phy_address = tp->phy_addr; + cmd->base.autoneg = tp->link_config.autoneg; return 0; } -static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int tg3_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct tg3 *tp = netdev_priv(dev); - u32 speed = ethtool_cmd_speed(cmd); + u32 speed = cmd->base.speed; + u32 advertising; if (tg3_flag(tp, USE_PHYLIB)) { struct phy_device *phydev; if (!(tp->phy_flags & TG3_PHYFLG_IS_CONNECTED)) return -EAGAIN; phydev = mdiobus_get_phy(tp->mdio_bus, tp->phy_addr); - return phy_ethtool_sset(phydev, cmd); + return phy_ethtool_ksettings_set(phydev, cmd); } - if (cmd->autoneg != AUTONEG_ENABLE && - cmd->autoneg != AUTONEG_DISABLE) + if (cmd->base.autoneg != AUTONEG_ENABLE && + cmd->base.autoneg != AUTONEG_DISABLE) return -EINVAL; - if (cmd->autoneg == AUTONEG_DISABLE && - cmd->duplex != DUPLEX_FULL && - cmd->duplex != DUPLEX_HALF) + if (cmd->base.autoneg == AUTONEG_DISABLE && + cmd->base.duplex != DUPLEX_FULL && + cmd->base.duplex != DUPLEX_HALF) return -EINVAL; - if (cmd->autoneg == AUTONEG_ENABLE) { + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); + + if (cmd->base.autoneg == AUTONEG_ENABLE) { u32 mask = ADVERTISED_Autoneg | ADVERTISED_Pause | ADVERTISED_Asym_Pause; @@ -12185,7 +12197,7 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) else mask |= ADVERTISED_FIBRE; - if (cmd->advertising & ~mask) + if (advertising & ~mask) return -EINVAL; mask &= (ADVERTISED_1000baseT_Half | @@ -12195,13 +12207,13 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) ADVERTISED_10baseT_Half | ADVERTISED_10baseT_Full); - cmd->advertising &= mask; + advertising &= mask; } else { if (tp->phy_flags & TG3_PHYFLG_ANY_SERDES) { if (speed != SPEED_1000) return -EINVAL; - if (cmd->duplex != DUPLEX_FULL) + if (cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else { if (speed != SPEED_100 && @@ -12212,16 +12224,16 @@ static int tg3_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) tg3_full_lock(tp, 0); - tp->link_config.autoneg = cmd->autoneg; - if (cmd->autoneg == AUTONEG_ENABLE) { - tp->link_config.advertising = (cmd->advertising | + tp->link_config.autoneg = cmd->base.autoneg; + if (cmd->base.autoneg == AUTONEG_ENABLE) { + tp->link_config.advertising = (advertising | ADVERTISED_Autoneg); tp->link_config.speed = SPEED_UNKNOWN; tp->link_config.duplex = DUPLEX_UNKNOWN; } else { tp->link_config.advertising = 0; tp->link_config.speed = speed; - tp->link_config.duplex = cmd->duplex; + tp->link_config.duplex = cmd->base.duplex; } tp->phy_flags |= TG3_PHYFLG_USER_CONFIGURED; @@ -14094,8 +14106,6 @@ static int tg3_get_eee(struct net_device *dev, struct ethtool_eee *edata) } static const struct ethtool_ops tg3_ethtool_ops = { - .get_settings = tg3_get_settings, - .set_settings = tg3_set_settings, .get_drvinfo = tg3_get_drvinfo, .get_regs_len = tg3_get_regs_len, .get_regs = tg3_get_regs, @@ -14128,6 +14138,8 @@ static const struct ethtool_ops tg3_ethtool_ops = { .get_ts_info = tg3_get_ts_info, .get_eee = tg3_get_eee, .set_eee = tg3_set_eee, + .get_link_ksettings = tg3_get_link_ksettings, + .set_link_ksettings = tg3_set_link_ksettings, }; static struct rtnl_link_stats64 *tg3_get_stats64(struct net_device *dev, diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c index 771cc267f217..f9df4b5ae90e 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.c +++ b/drivers/net/ethernet/brocade/bna/bnad.c @@ -54,9 +54,7 @@ MODULE_PARM_DESC(bna_debugfs_enable, "Enables debugfs feature, default=1," * Global variables */ static u32 bnad_rxqs_per_cq = 2; -static u32 bna_id; -static struct mutex bnad_list_mutex; -static LIST_HEAD(bnad_list); +static atomic_t bna_id; static const u8 bnad_bcast_addr[] __aligned(2) = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; @@ -76,23 +74,6 @@ do { \ (_res_info)->res_u.mem_info.len = (_size); \ } while (0) -static void -bnad_add_to_list(struct bnad *bnad) -{ - mutex_lock(&bnad_list_mutex); - list_add_tail(&bnad->list_entry, &bnad_list); - bnad->id = bna_id++; - mutex_unlock(&bnad_list_mutex); -} - -static void -bnad_remove_from_list(struct bnad *bnad) -{ - mutex_lock(&bnad_list_mutex); - list_del(&bnad->list_entry); - mutex_unlock(&bnad_list_mutex); -} - /* * Reinitialize completions in CQ, once Rx is taken down */ @@ -3573,14 +3554,12 @@ bnad_lock_init(struct bnad *bnad) { spin_lock_init(&bnad->bna_lock); mutex_init(&bnad->conf_mutex); - mutex_init(&bnad_list_mutex); } static void bnad_lock_uninit(struct bnad *bnad) { mutex_destroy(&bnad->conf_mutex); - mutex_destroy(&bnad_list_mutex); } /* PCI Initialization */ @@ -3653,7 +3632,7 @@ bnad_pci_probe(struct pci_dev *pdev, } bnad = netdev_priv(netdev); bnad_lock_init(bnad); - bnad_add_to_list(bnad); + bnad->id = atomic_inc_return(&bna_id) - 1; mutex_lock(&bnad->conf_mutex); /* @@ -3807,7 +3786,6 @@ pci_uninit: bnad_pci_uninit(pdev); unlock_mutex: mutex_unlock(&bnad->conf_mutex); - bnad_remove_from_list(bnad); bnad_lock_uninit(bnad); free_netdev(netdev); return err; @@ -3845,7 +3823,6 @@ bnad_pci_remove(struct pci_dev *pdev) bnad_disable_msix(bnad); bnad_pci_uninit(pdev); mutex_unlock(&bnad->conf_mutex); - bnad_remove_from_list(bnad); bnad_lock_uninit(bnad); /* Remove the debugfs node for this bnad */ kfree(bnad->regdata); diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index f4ed816b93ee..46f7b842b39c 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -288,7 +288,6 @@ struct bnad_rx_unmap_q { struct bnad { struct net_device *netdev; u32 id; - struct list_head list_entry; /* Data path */ struct bnad_tx_info tx_info[BNAD_MAX_TX]; diff --git a/drivers/net/ethernet/cadence/macb.c b/drivers/net/ethernet/cadence/macb.c index d954a97b0b0b..63144bb413d1 100644 --- a/drivers/net/ethernet/cadence/macb.c +++ b/drivers/net/ethernet/cadence/macb.c @@ -541,6 +541,14 @@ static void macb_tx_unmap(struct macb *bp, struct macb_tx_skb *tx_skb) } } +static inline void macb_set_addr(struct macb_dma_desc *desc, dma_addr_t addr) +{ + desc->addr = (u32)addr; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + desc->addrh = (u32)(addr >> 32); +#endif +} + static void macb_tx_error_task(struct work_struct *work) { struct macb_queue *queue = container_of(work, struct macb_queue, @@ -621,14 +629,17 @@ static void macb_tx_error_task(struct work_struct *work) /* Set end of TX queue */ desc = macb_tx_desc(queue, 0); - desc->addr = 0; + macb_set_addr(desc, 0); desc->ctrl = MACB_BIT(TX_USED); /* Make descriptor updates visible to hardware */ wmb(); /* Reinitialize the TX desc queue */ - queue_writel(queue, TBQP, queue->tx_ring_dma); + queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); +#endif /* Make TX ring reflect state of hardware */ queue->tx_head = 0; queue->tx_tail = 0; @@ -750,7 +761,7 @@ static void gem_rx_refill(struct macb *bp) if (entry == RX_RING_SIZE - 1) paddr |= MACB_BIT(RX_WRAP); - bp->rx_ring[entry].addr = paddr; + macb_set_addr(&(bp->rx_ring[entry]), paddr); bp->rx_ring[entry].ctrl = 0; /* properly align Ethernet header */ @@ -798,7 +809,9 @@ static int gem_rx(struct macb *bp, int budget) int count = 0; while (count < budget) { - u32 addr, ctrl; + u32 ctrl; + dma_addr_t addr; + bool rxused; entry = macb_rx_ring_wrap(bp->rx_tail); desc = &bp->rx_ring[entry]; @@ -806,10 +819,14 @@ static int gem_rx(struct macb *bp, int budget) /* Make hw descriptor updates visible to CPU */ rmb(); - addr = desc->addr; + rxused = (desc->addr & MACB_BIT(RX_USED)) ? true : false; + addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + addr |= ((u64)(desc->addrh) << 32); +#endif ctrl = desc->ctrl; - if (!(addr & MACB_BIT(RX_USED))) + if (!rxused) break; bp->rx_tail++; @@ -835,7 +852,6 @@ static int gem_rx(struct macb *bp, int budget) netdev_vdbg(bp->dev, "gem_rx %u (len %u)\n", entry, len); skb_put(skb, len); - addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, addr)); dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, DMA_FROM_DEVICE); @@ -1299,7 +1315,7 @@ static unsigned int macb_tx_map(struct macb *bp, ctrl |= MACB_BIT(TX_WRAP); /* Set TX buffer descriptor */ - desc->addr = tx_skb->mapping; + macb_set_addr(desc, tx_skb->mapping); /* desc->addr must be visible to hardware before clearing * 'TX_USED' bit in desc->ctrl. */ @@ -1382,7 +1398,7 @@ static int macb_start_xmit(struct sk_buff *skb, struct net_device *dev) if (macb_clear_csum(skb)) { dev_kfree_skb_any(skb); - return NETDEV_TX_OK; + goto unlock; } /* Map socket buffer for DMA transfer */ @@ -1445,6 +1461,9 @@ static void gem_free_rx_buffers(struct macb *bp) desc = &bp->rx_ring[i]; addr = MACB_BF(RX_WADDR, MACB_BFEXT(RX_WADDR, desc->addr)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + addr |= ((u64)(desc->addrh) << 32); +#endif dma_unmap_single(&bp->pdev->dev, addr, bp->rx_buffer_size, DMA_FROM_DEVICE); dev_kfree_skb_any(skb); @@ -1570,7 +1589,7 @@ static void gem_init_rings(struct macb *bp) for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { for (i = 0; i < TX_RING_SIZE; i++) { - queue->tx_ring[i].addr = 0; + macb_set_addr(&(queue->tx_ring[i]), 0); queue->tx_ring[i].ctrl = MACB_BIT(TX_USED); } queue->tx_ring[TX_RING_SIZE - 1].ctrl |= MACB_BIT(TX_WRAP); @@ -1717,6 +1736,10 @@ static void macb_configure_dma(struct macb *bp) dmacfg |= GEM_BIT(TXCOEN); else dmacfg &= ~GEM_BIT(TXCOEN); + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + dmacfg |= GEM_BIT(ADDR64); +#endif netdev_dbg(bp->dev, "Cadence configure DMA with 0x%08x\n", dmacfg); gem_writel(bp, DMACFG, dmacfg); @@ -1762,9 +1785,15 @@ static void macb_init_hw(struct macb *bp) macb_configure_dma(bp); /* Initialize TX and RX buffers */ - macb_writel(bp, RBQP, bp->rx_ring_dma); + macb_writel(bp, RBQP, (u32)(bp->rx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + macb_writel(bp, RBQPH, (u32)(bp->rx_ring_dma >> 32)); +#endif for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { - queue_writel(queue, TBQP, queue->tx_ring_dma); + queue_writel(queue, TBQP, (u32)(queue->tx_ring_dma)); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue_writel(queue, TBQPH, (u32)(queue->tx_ring_dma >> 32)); +#endif /* Enable interrupts */ queue_writel(queue, IER, @@ -2326,7 +2355,8 @@ static void macb_probe_queues(void __iomem *mem, } static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; @@ -2348,6 +2378,10 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, if (IS_ERR(*tx_clk)) *tx_clk = NULL; + *rx_clk = devm_clk_get(&pdev->dev, "rx_clk"); + if (IS_ERR(*rx_clk)) + *rx_clk = NULL; + err = clk_prepare_enable(*pclk); if (err) { dev_err(&pdev->dev, "failed to enable pclk (%u)\n", err); @@ -2366,8 +2400,17 @@ static int macb_clk_init(struct platform_device *pdev, struct clk **pclk, goto err_disable_hclk; } + err = clk_prepare_enable(*rx_clk); + if (err) { + dev_err(&pdev->dev, "failed to enable rx_clk (%u)\n", err); + goto err_disable_txclk; + } + return 0; +err_disable_txclk: + clk_disable_unprepare(*tx_clk); + err_disable_hclk: clk_disable_unprepare(*hclk); @@ -2402,6 +2445,9 @@ static int macb_init(struct platform_device *pdev) queue->IDR = GEM_IDR(hw_q - 1); queue->IMR = GEM_IMR(hw_q - 1); queue->TBQP = GEM_TBQP(hw_q - 1); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue->TBQPH = GEM_TBQPH(hw_q -1); +#endif } else { /* queue0 uses legacy registers */ queue->ISR = MACB_ISR; @@ -2409,6 +2455,9 @@ static int macb_init(struct platform_device *pdev) queue->IDR = MACB_IDR; queue->IMR = MACB_IMR; queue->TBQP = MACB_TBQP; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + queue->TBQPH = MACB_TBQPH; +#endif } /* get irq: here we use the linux queue index, not the hardware @@ -2751,12 +2800,14 @@ static const struct net_device_ops at91ether_netdev_ops = { }; static int at91ether_clk_init(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk) + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk) { int err; *hclk = NULL; *tx_clk = NULL; + *rx_clk = NULL; *pclk = devm_clk_get(&pdev->dev, "ether_clk"); if (IS_ERR(*pclk)) @@ -2880,13 +2931,13 @@ MODULE_DEVICE_TABLE(of, macb_dt_ids); static int macb_probe(struct platform_device *pdev) { int (*clk_init)(struct platform_device *, struct clk **, - struct clk **, struct clk **) + struct clk **, struct clk **, struct clk **) = macb_clk_init; int (*init)(struct platform_device *) = macb_init; struct device_node *np = pdev->dev.of_node; struct device_node *phy_node; const struct macb_config *macb_config = NULL; - struct clk *pclk, *hclk = NULL, *tx_clk = NULL; + struct clk *pclk, *hclk = NULL, *tx_clk = NULL, *rx_clk = NULL; unsigned int queue_mask, num_queues; struct macb_platform_data *pdata; bool native_io; @@ -2914,7 +2965,7 @@ static int macb_probe(struct platform_device *pdev) } } - err = clk_init(pdev, &pclk, &hclk, &tx_clk); + err = clk_init(pdev, &pclk, &hclk, &tx_clk, &rx_clk); if (err) return err; @@ -2950,6 +3001,7 @@ static int macb_probe(struct platform_device *pdev) bp->pclk = pclk; bp->hclk = hclk; bp->tx_clk = tx_clk; + bp->rx_clk = rx_clk; if (macb_config) bp->jumbo_max_len = macb_config->jumbo_max_len; @@ -2958,6 +3010,11 @@ static int macb_probe(struct platform_device *pdev) bp->wol |= MACB_WOL_HAS_MAGIC_PACKET; device_init_wakeup(&pdev->dev, bp->wol & MACB_WOL_HAS_MAGIC_PACKET); +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + if (GEM_BFEXT(DBWDEF, gem_readl(bp, DCFG1)) > GEM_DBW32) + dma_set_mask(&pdev->dev, DMA_BIT_MASK(44)); +#endif + spin_lock_init(&bp->lock); /* setup capabilities */ @@ -2968,7 +3025,7 @@ static int macb_probe(struct platform_device *pdev) dev->irq = platform_get_irq(pdev, 0); if (dev->irq < 0) { err = dev->irq; - goto err_disable_clocks; + goto err_out_free_netdev; } mac = of_get_mac_address(np); @@ -3043,6 +3100,7 @@ err_disable_clocks: clk_disable_unprepare(tx_clk); clk_disable_unprepare(hclk); clk_disable_unprepare(pclk); + clk_disable_unprepare(rx_clk); return err; } @@ -3069,6 +3127,7 @@ static int macb_remove(struct platform_device *pdev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); free_netdev(dev); } @@ -3092,6 +3151,7 @@ static int __maybe_unused macb_suspend(struct device *dev) clk_disable_unprepare(bp->tx_clk); clk_disable_unprepare(bp->hclk); clk_disable_unprepare(bp->pclk); + clk_disable_unprepare(bp->rx_clk); } return 0; @@ -3111,6 +3171,7 @@ static int __maybe_unused macb_resume(struct device *dev) clk_prepare_enable(bp->pclk); clk_prepare_enable(bp->hclk); clk_prepare_enable(bp->tx_clk); + clk_prepare_enable(bp->rx_clk); } netif_device_attach(netdev); diff --git a/drivers/net/ethernet/cadence/macb.h b/drivers/net/ethernet/cadence/macb.h index b6fcf10621b6..8bed4b52fef5 100644 --- a/drivers/net/ethernet/cadence/macb.h +++ b/drivers/net/ethernet/cadence/macb.h @@ -66,6 +66,8 @@ #define MACB_USRIO 0x00c0 #define MACB_WOL 0x00c4 #define MACB_MID 0x00fc +#define MACB_TBQPH 0x04C8 +#define MACB_RBQPH 0x04D4 /* GEM register offsets. */ #define GEM_NCFGR 0x0004 /* Network Config */ @@ -139,6 +141,7 @@ #define GEM_ISR(hw_q) (0x0400 + ((hw_q) << 2)) #define GEM_TBQP(hw_q) (0x0440 + ((hw_q) << 2)) +#define GEM_TBQPH(hw_q) (0x04C8) #define GEM_RBQP(hw_q) (0x0480 + ((hw_q) << 2)) #define GEM_IER(hw_q) (0x0600 + ((hw_q) << 2)) #define GEM_IDR(hw_q) (0x0620 + ((hw_q) << 2)) @@ -249,6 +252,8 @@ #define GEM_RXBS_SIZE 8 #define GEM_DDRP_OFFSET 24 /* disc_when_no_ahb */ #define GEM_DDRP_SIZE 1 +#define GEM_ADDR64_OFFSET 30 /* Address bus width - 64b or 32b */ +#define GEM_ADDR64_SIZE 1 /* Bitfields in NSR */ @@ -474,6 +479,10 @@ struct macb_dma_desc { u32 addr; u32 ctrl; +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT + u32 addrh; + u32 resvd; +#endif }; /* DMA descriptor bitfields */ @@ -763,7 +772,8 @@ struct macb_config { u32 caps; unsigned int dma_burst_length; int (*clk_init)(struct platform_device *pdev, struct clk **pclk, - struct clk **hclk, struct clk **tx_clk); + struct clk **hclk, struct clk **tx_clk, + struct clk **rx_clk); int (*init)(struct platform_device *pdev); int jumbo_max_len; }; @@ -777,6 +787,7 @@ struct macb_queue { unsigned int IDR; unsigned int IMR; unsigned int TBQP; + unsigned int TBQPH; unsigned int tx_head, tx_tail; struct macb_dma_desc *tx_ring; @@ -809,6 +820,7 @@ struct macb { struct clk *pclk; struct clk *hclk; struct clk *tx_clk; + struct clk *rx_clk; struct net_device *dev; struct napi_struct napi; struct net_device_stats stats; diff --git a/drivers/net/ethernet/cavium/Kconfig b/drivers/net/ethernet/cavium/Kconfig index 0ef232d3331e..92f411c9f0df 100644 --- a/drivers/net/ethernet/cavium/Kconfig +++ b/drivers/net/ethernet/cavium/Kconfig @@ -36,10 +36,20 @@ config THUNDER_NIC_BGX depends on 64BIT select PHYLIB select MDIO_THUNDER + select THUNDER_NIC_RGX ---help--- This driver supports programming and controlling of MAC interface from NIC physical function driver. +config THUNDER_NIC_RGX + tristate "Thunder MAC interface driver (RGX)" + depends on 64BIT + select PHYLIB + select MDIO_THUNDER + ---help--- + This driver supports configuring XCV block of RGX interface + present on CN81XX chip. + config LIQUIDIO tristate "Cavium LiquidIO support" depends on 64BIT @@ -48,7 +58,7 @@ config LIQUIDIO select LIBCRC32C ---help--- This driver supports Cavium LiquidIO Intelligent Server Adapters - based on CN66XX and CN68XX chips. + based on CN66XX, CN68XX and CN23XX chips. To compile this driver as a module, choose M here: the module will be called liquidio. This is recommended. diff --git a/drivers/net/ethernet/cavium/liquidio/Makefile b/drivers/net/ethernet/cavium/liquidio/Makefile index 2f366806835d..5a27b2a44039 100644 --- a/drivers/net/ethernet/cavium/liquidio/Makefile +++ b/drivers/net/ethernet/cavium/liquidio/Makefile @@ -3,14 +3,16 @@ # obj-$(CONFIG_LIQUIDIO) += liquidio.o -liquidio-objs := lio_main.o \ - lio_ethtool.o \ - request_manager.o \ - response_manager.o \ - octeon_device.o \ - cn66xx_device.o \ - cn68xx_device.o \ - octeon_mem_ops.o \ - octeon_droq.o \ - octeon_console.o \ - octeon_nic.o +liquidio-$(CONFIG_LIQUIDIO) += lio_ethtool.o \ + lio_core.o \ + request_manager.o \ + response_manager.o \ + octeon_device.o \ + cn66xx_device.o \ + cn68xx_device.o \ + cn23xx_pf_device.o \ + octeon_mem_ops.o \ + octeon_droq.o \ + octeon_nic.o + +liquidio-objs := lio_main.o octeon_console.o $(liquidio-y) diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c new file mode 100644 index 000000000000..bddb198c0b74 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.c @@ -0,0 +1,1237 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +#include +#include +#include +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "cn23xx_pf_device.h" +#include "octeon_main.h" + +#define RESET_NOTDONE 0 +#define RESET_DONE 1 + +/* Change the value of SLI Packet Input Jabber Register to allow + * VXLAN TSO packets which can be 64424 bytes, exceeding the + * MAX_GSO_SIZE we supplied to the kernel + */ +#define CN23XX_INPUT_JABBER 64600 + +#define LIOLUT_RING_DISTRIBUTION 9 +const int liolut_num_vfs_to_rings_per_vf[LIOLUT_RING_DISTRIBUTION] = { + 0, 8, 4, 2, 2, 2, 1, 1, 1 +}; + +void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct) +{ + int i = 0; + u32 regval = 0; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + /*In cn23xx_soft_reset*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%llx\n", + "CN23XX_WIN_WR_MASK_REG", CVM_CAST64(CN23XX_WIN_WR_MASK_REG), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_WIN_WR_MASK_REG))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_SCRATCH1", CVM_CAST64(CN23XX_SLI_SCRATCH1), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_RST_SOFT_RST", CN23XX_RST_SOFT_RST, + lio_pci_readq(oct, CN23XX_RST_SOFT_RST)); + + /*In cn23xx_set_dpi_regs*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_CONTROL", CN23XX_DPI_DMA_CONTROL, + lio_pci_readq(oct, CN23XX_DPI_DMA_CONTROL)); + + for (i = 0; i < 6; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_ENG_ENB", i, + CN23XX_DPI_DMA_ENG_ENB(i), + lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_ENB(i))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_DMA_ENG_BUF", i, + CN23XX_DPI_DMA_ENG_BUF(i), + lio_pci_readq(oct, CN23XX_DPI_DMA_ENG_BUF(i))); + } + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", "CN23XX_DPI_CTL", + CN23XX_DPI_CTL, lio_pci_readq(oct, CN23XX_DPI_CTL)); + + /*In cn23xx_setup_pcie_mps and cn23xx_setup_pcie_mrrs */ + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_CONFIG_PCIE_DEVCTL", + CVM_CAST64(CN23XX_CONFIG_PCIE_DEVCTL), CVM_CAST64(regval)); + + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_DPI_SLI_PRTX_CFG", oct->pcie_port, + CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port), + lio_pci_readq(oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port))); + + /*In cn23xx_specific_regs_setup */ + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_S2M_PORTX_CTL", oct->pcie_port, + CVM_CAST64(CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)))); + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_RING_RST", CVM_CAST64(CN23XX_SLI_PKT_IOQ_RING_RST), + (u64)octeon_read_csr64(oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + + /*In cn23xx_setup_global_mac_regs*/ + for (i = 0; i < CN23XX_MAX_MACS; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_MAC_RINFO64", i, + CVM_CAST64(CN23XX_SLI_PKT_MAC_RINFO64(i, oct->pf_num)), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_PKT_MAC_RINFO64 + (i, oct->pf_num)))); + } + + /*In cn23xx_setup_global_input_regs*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_PKT_CONTROL64", i, + CVM_CAST64(CN23XX_SLI_IQ_PKT_CONTROL64(i)), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_PKT_CONTROL64(i)))); + } + + /*In cn23xx_setup_global_output_regs*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_WMARK", CVM_CAST64(CN23XX_SLI_OQ_WMARK), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_OQ_WMARK))); + + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKT_CONTROL", i, + CVM_CAST64(CN23XX_SLI_OQ_PKT_CONTROL(i)), + CVM_CAST64(octeon_read_csr( + oct, CN23XX_SLI_OQ_PKT_CONTROL(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKT_INT_LEVELS", i, + CVM_CAST64(CN23XX_SLI_OQ_PKT_INT_LEVELS(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(i)))); + } + + /*In cn23xx_enable_interrupt and cn23xx_disable_interrupt*/ + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "cn23xx->intr_enb_reg64", + CVM_CAST64((long)(cn23xx->intr_enb_reg64)), + CVM_CAST64(readq(cn23xx->intr_enb_reg64))); + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "cn23xx->intr_sum_reg64", + CVM_CAST64((long)(cn23xx->intr_sum_reg64)), + CVM_CAST64(readq(cn23xx->intr_sum_reg64))); + + /*In cn23xx_setup_iq_regs*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_BASE_ADDR64", i, + CVM_CAST64(CN23XX_SLI_IQ_BASE_ADDR64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_BASE_ADDR64(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_SIZE", i, + CVM_CAST64(CN23XX_SLI_IQ_SIZE(i)), + CVM_CAST64(octeon_read_csr + (oct, CN23XX_SLI_IQ_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_DOORBELL", i, + CVM_CAST64(CN23XX_SLI_IQ_DOORBELL(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_DOORBELL(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_IQ_INSTR_COUNT64", i, + CVM_CAST64(CN23XX_SLI_IQ_INSTR_COUNT64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_IQ_INSTR_COUNT64(i)))); + } + + /*In cn23xx_setup_oq_regs*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_BASE_ADDR64", i, + CVM_CAST64(CN23XX_SLI_OQ_BASE_ADDR64(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_BASE_ADDR64(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_SIZE", i, + CVM_CAST64(CN23XX_SLI_OQ_SIZE(i)), + CVM_CAST64(octeon_read_csr + (oct, CN23XX_SLI_OQ_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_BUFF_INFO_SIZE", i, + CVM_CAST64(CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)), + CVM_CAST64(octeon_read_csr( + oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKTS_SENT", i, + CVM_CAST64(CN23XX_SLI_OQ_PKTS_SENT(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKTS_SENT(i)))); + dev_dbg(&oct->pci_dev->dev, "%s(%d)[%llx] : 0x%016llx\n", + "CN23XX_SLI_OQ_PKTS_CREDIT", i, + CVM_CAST64(CN23XX_SLI_OQ_PKTS_CREDIT(i)), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKTS_CREDIT(i)))); + } + + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_TIME_INT", + CVM_CAST64(CN23XX_SLI_PKT_TIME_INT), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_TIME_INT))); + dev_dbg(&oct->pci_dev->dev, "%s[%llx] : 0x%016llx\n", + "CN23XX_SLI_PKT_CNT_INT", + CVM_CAST64(CN23XX_SLI_PKT_CNT_INT), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_PKT_CNT_INT))); +} + +static int cn23xx_pf_soft_reset(struct octeon_device *oct) +{ + octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF); + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: BIST enabled for CN23XX soft reset\n", + oct->octeon_id); + + octeon_write_csr64(oct, CN23XX_SLI_SCRATCH1, 0x1234ULL); + + /* Initiate chip-wide soft reset */ + lio_pci_readq(oct, CN23XX_RST_SOFT_RST); + lio_pci_writeq(oct, 1, CN23XX_RST_SOFT_RST); + + /* Wait for 100ms as Octeon resets. */ + mdelay(100); + + if (octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1) == 0x1234ULL) { + dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Soft reset failed\n", + oct->octeon_id); + return 1; + } + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Reset completed\n", + oct->octeon_id); + + /* restore the reset value*/ + octeon_write_csr64(oct, CN23XX_WIN_WR_MASK_REG, 0xFF); + + return 0; +} + +static void cn23xx_enable_error_reporting(struct octeon_device *oct) +{ + u32 regval; + u32 uncorrectable_err_mask, corrtable_err_status; + + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + if (regval & CN23XX_CONFIG_PCIE_DEVCTL_MASK) { + uncorrectable_err_mask = 0; + corrtable_err_status = 0; + pci_read_config_dword(oct->pci_dev, + CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK, + &uncorrectable_err_mask); + pci_read_config_dword(oct->pci_dev, + CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS, + &corrtable_err_status); + dev_err(&oct->pci_dev->dev, "PCI-E Fatal error detected;\n" + "\tdev_ctl_status_reg = 0x%08x\n" + "\tuncorrectable_error_mask_reg = 0x%08x\n" + "\tcorrectable_error_status_reg = 0x%08x\n", + regval, uncorrectable_err_mask, + corrtable_err_status); + } + + regval |= 0xf; /* Enable Link error reporting */ + + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: Enabling PCI-E error reporting..\n", + oct->octeon_id); + pci_write_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, regval); +} + +static u32 cn23xx_coprocessor_clock(struct octeon_device *oct) +{ + /* Bits 29:24 of RST_BOOT[PNR_MUL] holds the ref.clock MULTIPLIER + * for SLI. + */ + + /* TBD: get the info in Hand-shake */ + return (((lio_pci_readq(oct, CN23XX_RST_BOOT) >> 24) & 0x3f) * 50); +} + +u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us) +{ + /* This gives the SLI clock per microsec */ + u32 oqticks_per_us = cn23xx_coprocessor_clock(oct); + + oct->pfvf_hsword.coproc_tics_per_us = oqticks_per_us; + + /* This gives the clock cycles per millisecond */ + oqticks_per_us *= 1000; + + /* This gives the oq ticks (1024 core clock cycles) per millisecond */ + oqticks_per_us /= 1024; + + /* time_intr is in microseconds. The next 2 steps gives the oq ticks + * corressponding to time_intr. + */ + oqticks_per_us *= time_intr_in_us; + oqticks_per_us /= 1000; + + return oqticks_per_us; +} + +static void cn23xx_setup_global_mac_regs(struct octeon_device *oct) +{ + u64 reg_val; + u16 mac_no = oct->pcie_port; + u16 pf_num = oct->pf_num; + + /* programming SRN and TRS for each MAC(0..3) */ + + dev_dbg(&oct->pci_dev->dev, "%s:Using pcie port %d\n", + __func__, mac_no); + /* By default, mapping all 64 IOQs to a single MACs */ + + reg_val = + octeon_read_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num)); + + if (oct->rev_id == OCTEON_CN23XX_REV_1_1) { + /* setting SRN <6:0> */ + reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF_PASS_1_1; + } else { + /* setting SRN <6:0> */ + reg_val = pf_num * CN23XX_MAX_RINGS_PER_PF; + } + + /* setting TRS <23:16> */ + reg_val = reg_val | + (oct->sriov_info.trs << CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS); + /* write these settings to MAC register */ + octeon_write_csr64(oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num), + reg_val); + + dev_dbg(&oct->pci_dev->dev, "SLI_PKT_MAC(%d)_PF(%d)_RINFO : 0x%016llx\n", + mac_no, pf_num, (u64)octeon_read_csr64 + (oct, CN23XX_SLI_PKT_MAC_RINFO64(mac_no, pf_num))); +} + +static int cn23xx_reset_io_queues(struct octeon_device *oct) +{ + int ret_val = 0; + u64 d64; + u32 q_no, srn, ern; + u32 loop = 1000; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + /*As per HRM reg description, s/w cant write 0 to ENB. */ + /*to make the queue off, need to set the RST bit. */ + + /* Reset the Enable bit for all the 64 IQs. */ + for (q_no = srn; q_no < ern; q_no++) { + /* set RST bit to 1. This bit applies to both IQ and OQ */ + d64 = octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + d64 = d64 | CN23XX_PKT_INPUT_CTL_RST; + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), d64); + } + + /*wait until the RST bit is clear or the RST and quite bits are set*/ + for (q_no = srn; q_no < ern; q_no++) { + u64 reg_val = octeon_read_csr64(oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + while ((READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) && + !(READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_QUIET) && + loop--) { + WRITE_ONCE(reg_val, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + } + if (!loop) { + dev_err(&oct->pci_dev->dev, + "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n", + q_no); + return -1; + } + WRITE_ONCE(reg_val, READ_ONCE(reg_val) & + ~CN23XX_PKT_INPUT_CTL_RST); + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + READ_ONCE(reg_val)); + + WRITE_ONCE(reg_val, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + if (READ_ONCE(reg_val) & CN23XX_PKT_INPUT_CTL_RST) { + dev_err(&oct->pci_dev->dev, + "clearing the reset failed for qno: %u\n", + q_no); + ret_val = -1; + } + } + + return ret_val; +} + +static int cn23xx_pf_setup_global_input_regs(struct octeon_device *oct) +{ + u32 q_no, ern, srn; + u64 pf_num; + u64 intr_threshold, reg_val; + struct octeon_instr_queue *iq; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + pf_num = oct->pf_num; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + if (cn23xx_reset_io_queues(oct)) + return -1; + + /** Set the MAC_NUM and PVF_NUM in IQ_PKT_CONTROL reg + * for all queues.Only PF can set these bits. + * bits 29:30 indicate the MAC num. + * bits 32:47 indicate the PVF num. + */ + for (q_no = 0; q_no < ern; q_no++) { + reg_val = oct->pcie_port << CN23XX_PKT_INPUT_CTL_MAC_NUM_POS; + reg_val |= pf_num << CN23XX_PKT_INPUT_CTL_PF_NUM_POS; + + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + } + + /* Select ES, RO, NS, RDSIZE,DPTR Fomat#0 for + * pf queues + */ + for (q_no = srn; q_no < ern; q_no++) { + void __iomem *inst_cnt_reg; + + iq = oct->instr_queue[q_no]; + if (iq) + inst_cnt_reg = iq->inst_cnt_reg; + else + inst_cnt_reg = (u8 *)oct->mmio[0].hw_addr + + CN23XX_SLI_IQ_INSTR_COUNT64(q_no); + + reg_val = + octeon_read_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + + reg_val |= CN23XX_PKT_INPUT_CTL_MASK; + + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + + /* Set WMARK level for triggering PI_INT */ + /* intr_threshold = CN23XX_DEF_IQ_INTR_THRESHOLD & */ + intr_threshold = CFG_GET_IQ_INTR_PKT(cn23xx->conf) & + CN23XX_PKT_IN_DONE_WMARK_MASK; + + writeq((readq(inst_cnt_reg) & + ~(CN23XX_PKT_IN_DONE_WMARK_MASK << + CN23XX_PKT_IN_DONE_WMARK_BIT_POS)) | + (intr_threshold << CN23XX_PKT_IN_DONE_WMARK_BIT_POS), + inst_cnt_reg); + } + return 0; +} + +static void cn23xx_pf_setup_global_output_regs(struct octeon_device *oct) +{ + u32 reg_val; + u32 q_no, ern, srn; + u64 time_threshold; + + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->sriov_info.num_pf_rings; + + if (CFG_GET_IS_SLI_BP_ON(cn23xx->conf)) { + octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 32); + } else { + /** Set Output queue watermark to 0 to disable backpressure */ + octeon_write_csr64(oct, CN23XX_SLI_OQ_WMARK, 0); + } + + for (q_no = srn; q_no < ern; q_no++) { + reg_val = octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); + + /* set IPTR & DPTR */ + reg_val |= + (CN23XX_PKT_OUTPUT_CTL_IPTR | CN23XX_PKT_OUTPUT_CTL_DPTR); + + /* reset BMODE */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_BMODE); + + /* No Relaxed Ordering, No Snoop, 64-bit Byte swap + * for Output Queue ScatterList + * reset ROR_P, NSR_P + */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR_P); + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR_P); + +#ifdef __LITTLE_ENDIAN_BITFIELD + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ES_P); +#else + reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES_P); +#endif + /* No Relaxed Ordering, No Snoop, 64-bit Byte swap + * for Output Queue Data + * reset ROR, NSR + */ + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_ROR); + reg_val &= ~(CN23XX_PKT_OUTPUT_CTL_NSR); + /* set the ES bit */ + reg_val |= (CN23XX_PKT_OUTPUT_CTL_ES); + + /* write all the selected settings */ + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), reg_val); + + /* Enabling these interrupt in oct->fn_list.enable_interrupt() + * routine which called after IOQ init. + * Set up interrupt packet and time thresholds + * for all the OQs + */ + time_threshold = cn23xx_pf_get_oq_ticks( + oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf)); + + octeon_write_csr64(oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (CFG_GET_OQ_INTR_PKT(cn23xx->conf) | + (time_threshold << 32))); + } + + /** Setting the water mark level for pko back pressure **/ + writeq(0x40, (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_WMARK); + + /** Disabling setting OQs in reset when ring has no dorebells + * enabling this will cause of head of line blocking + */ + /* Do it only for pass1.1. and pass1.2 */ + if ((oct->rev_id == OCTEON_CN23XX_REV_1_0) || + (oct->rev_id == OCTEON_CN23XX_REV_1_1)) + writeq(readq((u8 *)oct->mmio[0].hw_addr + + CN23XX_SLI_GBL_CONTROL) | 0x2, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_GBL_CONTROL); + + /** Enable channel-level backpressure */ + if (oct->pf_num) + writeq(0xffffffffffffffffULL, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN2_W1S); + else + writeq(0xffffffffffffffffULL, + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OUT_BP_EN_W1S); +} + +static int cn23xx_setup_pf_device_regs(struct octeon_device *oct) +{ + cn23xx_enable_error_reporting(oct); + + /* program the MAC(0..3)_RINFO before setting up input/output regs */ + cn23xx_setup_global_mac_regs(oct); + + if (cn23xx_pf_setup_global_input_regs(oct)) + return -1; + + cn23xx_pf_setup_global_output_regs(oct); + + /* Default error timeout value should be 0x200000 to avoid host hang + * when reads invalid register + */ + octeon_write_csr64(oct, CN23XX_SLI_WINDOW_CTL, + CN23XX_SLI_WINDOW_CTL_DEFAULT); + + /* set SLI_PKT_IN_JABBER to handle large VXLAN packets */ + octeon_write_csr64(oct, CN23XX_SLI_PKT_IN_JABBER, CN23XX_INPUT_JABBER); + return 0; +} + +static void cn23xx_setup_iq_regs(struct octeon_device *oct, u32 iq_no) +{ + struct octeon_instr_queue *iq = oct->instr_queue[iq_no]; + u64 pkt_in_done; + + iq_no += oct->sriov_info.pf_srn; + + /* Write the start of the input queue's ring and its size */ + octeon_write_csr64(oct, CN23XX_SLI_IQ_BASE_ADDR64(iq_no), + iq->base_addr_dma); + octeon_write_csr(oct, CN23XX_SLI_IQ_SIZE(iq_no), iq->max_count); + + /* Remember the doorbell & instruction count register addr + * for this queue + */ + iq->doorbell_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_DOORBELL(iq_no); + iq->inst_cnt_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_IQ_INSTR_COUNT64(iq_no); + dev_dbg(&oct->pci_dev->dev, "InstQ[%d]:dbell reg @ 0x%p instcnt_reg @ 0x%p\n", + iq_no, iq->doorbell_reg, iq->inst_cnt_reg); + + /* Store the current instruction counter (used in flush_iq + * calculation) + */ + pkt_in_done = readq(iq->inst_cnt_reg); + + if (oct->msix_on) { + /* Set CINT_ENB to enable IQ interrupt */ + writeq((pkt_in_done | CN23XX_INTR_CINT_ENB), + iq->inst_cnt_reg); + } else { + /* Clear the count by writing back what we read, but don't + * enable interrupts + */ + writeq(pkt_in_done, iq->inst_cnt_reg); + } + + iq->reset_instr_cnt = 0; +} + +static void cn23xx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) +{ + u32 reg_val; + struct octeon_droq *droq = oct->droq[oq_no]; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 time_threshold; + u64 cnt_threshold; + + oq_no += oct->sriov_info.pf_srn; + + octeon_write_csr64(oct, CN23XX_SLI_OQ_BASE_ADDR64(oq_no), + droq->desc_ring_dma); + octeon_write_csr(oct, CN23XX_SLI_OQ_SIZE(oq_no), droq->max_count); + + octeon_write_csr(oct, CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq_no), + (droq->buffer_size | (OCT_RH_SIZE << 16))); + + /* Get the mapped address of the pkt_sent and pkts_credit regs */ + droq->pkts_sent_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_SENT(oq_no); + droq->pkts_credit_reg = + (u8 *)oct->mmio[0].hw_addr + CN23XX_SLI_OQ_PKTS_CREDIT(oq_no); + + if (!oct->msix_on) { + /* Enable this output queue to generate Packet Timer Interrupt + */ + reg_val = + octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_TENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); + + /* Enable this output queue to generate Packet Count Interrupt + */ + reg_val = + octeon_read_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no)); + reg_val |= CN23XX_PKT_OUTPUT_CTL_CENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(oq_no), + reg_val); + } else { + time_threshold = cn23xx_pf_get_oq_ticks( + oct, (u32)CFG_GET_OQ_INTR_TIME(cn23xx->conf)); + cnt_threshold = (u32)CFG_GET_OQ_INTR_PKT(cn23xx->conf); + + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(oq_no), + ((time_threshold << 32 | cnt_threshold))); + } +} + +static int cn23xx_enable_io_queues(struct octeon_device *oct) +{ + u64 reg_val; + u32 srn, ern, q_no; + u32 loop = 1000; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->num_iqs; + + for (q_no = srn; q_no < ern; q_no++) { + /* set the corresponding IQ IS_64B bit */ + if (oct->io_qmask.iq64B & BIT_ULL(q_no - srn)) { + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + reg_val = reg_val | CN23XX_PKT_INPUT_CTL_IS_64B; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); + } + + /* set the corresponding IQ ENB bit */ + if (oct->io_qmask.iq & BIT_ULL(q_no - srn)) { + /* IOQs are in reset by default in PEM2 mode, + * clearing reset bit + */ + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + + if (reg_val & CN23XX_PKT_INPUT_CTL_RST) { + while ((reg_val & CN23XX_PKT_INPUT_CTL_RST) && + !(reg_val & + CN23XX_PKT_INPUT_CTL_QUIET) && + loop--) { + reg_val = octeon_read_csr64( + oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + } + if (!loop) { + dev_err(&oct->pci_dev->dev, + "clearing the reset reg failed or setting the quiet reg failed for qno: %u\n", + q_no); + return -1; + } + reg_val = reg_val & ~CN23XX_PKT_INPUT_CTL_RST; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + reg_val); + + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + if (reg_val & CN23XX_PKT_INPUT_CTL_RST) { + dev_err(&oct->pci_dev->dev, + "clearing the reset failed for qno: %u\n", + q_no); + return -1; + } + } + reg_val = octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no)); + reg_val = reg_val | CN23XX_PKT_INPUT_CTL_RING_ENB; + octeon_write_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), reg_val); + } + } + for (q_no = srn; q_no < ern; q_no++) { + u32 reg_val; + /* set the corresponding OQ ENB bit */ + if (oct->io_qmask.oq & BIT_ULL(q_no - srn)) { + reg_val = octeon_read_csr( + oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no)); + reg_val = reg_val | CN23XX_PKT_OUTPUT_CTL_RING_ENB; + octeon_write_csr(oct, CN23XX_SLI_OQ_PKT_CONTROL(q_no), + reg_val); + } + } + return 0; +} + +static void cn23xx_disable_io_queues(struct octeon_device *oct) +{ + int q_no, loop; + u64 d64; + u32 d32; + u32 srn, ern; + + srn = oct->sriov_info.pf_srn; + ern = srn + oct->num_iqs; + + /*** Disable Input Queues. ***/ + for (q_no = srn; q_no < ern; q_no++) { + loop = HZ; + + /* start the Reset for a particular ring */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no))); + WRITE_ONCE(d64, READ_ONCE(d64) & + (~(CN23XX_PKT_INPUT_CTL_RING_ENB))); + WRITE_ONCE(d64, READ_ONCE(d64) | CN23XX_PKT_INPUT_CTL_RST); + octeon_write_csr64(oct, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + READ_ONCE(d64)); + + /* Wait until hardware indicates that the particular IQ + * is out of reset. + */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) { + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + schedule_timeout_uninterruptible(1); + } + + /* Reset the doorbell register for this Input Queue. */ + octeon_write_csr(oct, CN23XX_SLI_IQ_DOORBELL(q_no), 0xFFFFFFFF); + while (octeon_read_csr64(oct, CN23XX_SLI_IQ_DOORBELL(q_no)) && + loop--) { + schedule_timeout_uninterruptible(1); + } + } + + /*** Disable Output Queues. ***/ + for (q_no = srn; q_no < ern; q_no++) { + loop = HZ; + + /* Wait until hardware indicates that the particular IQ + * is out of reset.It given that SLI_PKT_RING_RST is + * common for both IQs and OQs + */ + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + while (!(READ_ONCE(d64) & BIT_ULL(q_no)) && loop--) { + WRITE_ONCE(d64, octeon_read_csr64( + oct, CN23XX_SLI_PKT_IOQ_RING_RST)); + schedule_timeout_uninterruptible(1); + } + + /* Reset the doorbell register for this Output Queue. */ + octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_CREDIT(q_no), + 0xFFFFFFFF); + while (octeon_read_csr64(oct, + CN23XX_SLI_OQ_PKTS_CREDIT(q_no)) && + loop--) { + schedule_timeout_uninterruptible(1); + } + + /* clear the SLI_PKT(0..63)_CNTS[CNT] reg value */ + WRITE_ONCE(d32, octeon_read_csr( + oct, CN23XX_SLI_OQ_PKTS_SENT(q_no))); + octeon_write_csr(oct, CN23XX_SLI_OQ_PKTS_SENT(q_no), + READ_ONCE(d32)); + } +} + +static u64 cn23xx_pf_msix_interrupt_handler(void *dev) +{ + struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev; + struct octeon_device *oct = ioq_vector->oct_dev; + u64 pkts_sent; + u64 ret = 0; + struct octeon_droq *droq = oct->droq[ioq_vector->droq_index]; + + dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct); + + if (!droq) { + dev_err(&oct->pci_dev->dev, "23XX bringup FIXME: oct pfnum:%d ioq_vector->ioq_num :%d droq is NULL\n", + oct->pf_num, ioq_vector->ioq_num); + return 0; + } + + pkts_sent = readq(droq->pkts_sent_reg); + + /* If our device has interrupted, then proceed. Also check + * for all f's if interrupt was triggered on an error + * and the PCI read fails. + */ + if (!pkts_sent || (pkts_sent == 0xFFFFFFFFFFFFFFFFULL)) + return ret; + + /* Write count reg in sli_pkt_cnts to clear these int.*/ + if ((pkts_sent & CN23XX_INTR_PO_INT) || + (pkts_sent & CN23XX_INTR_PI_INT)) { + if (pkts_sent & CN23XX_INTR_PO_INT) + ret |= MSIX_PO_INT; + } + + if (pkts_sent & CN23XX_INTR_PI_INT) + /* We will clear the count when we update the read_index. */ + ret |= MSIX_PI_INT; + + /* Never need to handle msix mbox intr for pf. They arrive on the last + * msix + */ + return ret; +} + +static irqreturn_t cn23xx_interrupt_handler(void *dev) +{ + struct octeon_device *oct = (struct octeon_device *)dev; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr64; + + dev_dbg(&oct->pci_dev->dev, "In %s octeon_dev @ %p\n", __func__, oct); + intr64 = readq(cn23xx->intr_sum_reg64); + + oct->int_status = 0; + + if (intr64 & CN23XX_INTR_ERR) + dev_err(&oct->pci_dev->dev, "OCTEON[%d]: Error Intr: 0x%016llx\n", + oct->octeon_id, CVM_CAST64(intr64)); + + if (oct->msix_on != LIO_FLAG_MSIX_ENABLED) { + if (intr64 & CN23XX_INTR_PKT_DATA) + oct->int_status |= OCT_DEV_INTR_PKT_DATA; + } + + if (intr64 & (CN23XX_INTR_DMA0_FORCE)) + oct->int_status |= OCT_DEV_INTR_DMA0_FORCE; + if (intr64 & (CN23XX_INTR_DMA1_FORCE)) + oct->int_status |= OCT_DEV_INTR_DMA1_FORCE; + + /* Clear the current interrupts */ + writeq(intr64, cn23xx->intr_sum_reg64); + + return IRQ_HANDLED; +} + +static void cn23xx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, + u32 idx, int valid) +{ + u64 bar1; + u64 reg_adr; + + if (!valid) { + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + lio_pci_writeq(oct, (READ_ONCE(bar1) & 0xFFFFFFFEULL), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + reg_adr = lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + WRITE_ONCE(bar1, reg_adr); + return; + } + + /* The PEM(0..3)_BAR1_INDEX(0..15)[ADDR_IDX]<23:4> stores + * bits <41:22> of the Core Addr + */ + lio_pci_writeq(oct, (((core_addr >> 22) << 4) | PCI_BAR1_MASK), + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); + + WRITE_ONCE(bar1, lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx))); +} + +static void cn23xx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask) +{ + lio_pci_writeq(oct, mask, + CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +static u32 cn23xx_bar1_idx_read(struct octeon_device *oct, u32 idx) +{ + return (u32)lio_pci_readq( + oct, CN23XX_PEM_BAR1_INDEX_REG(oct->pcie_port, idx)); +} + +/* always call with lock held */ +static u32 cn23xx_update_read_index(struct octeon_instr_queue *iq) +{ + u32 new_idx; + u32 last_done; + u32 pkt_in_done = readl(iq->inst_cnt_reg); + + last_done = pkt_in_done - iq->pkt_in_done; + iq->pkt_in_done = pkt_in_done; + + /* Modulo of the new index with the IQ size will give us + * the new index. The iq->reset_instr_cnt is always zero for + * cn23xx, so no extra adjustments are needed. + */ + new_idx = (iq->octeon_read_index + + (u32)(last_done & CN23XX_PKT_IN_DONE_CNT_MASK)) % + iq->max_count; + + return new_idx; +} + +static void cn23xx_enable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) +{ + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr_val = 0; + + /* Divide the single write to multiple writes based on the flag. */ + /* Enable Interrupt */ + if (intr_flag == OCTEON_ALL_INTR) { + writeq(cn23xx->intr_mask64, cn23xx->intr_enb_reg64); + } else if (intr_flag & OCTEON_OUTPUT_INTR) { + intr_val = readq(cn23xx->intr_enb_reg64); + intr_val |= CN23XX_INTR_PKT_DATA; + writeq(intr_val, cn23xx->intr_enb_reg64); + } +} + +static void cn23xx_disable_pf_interrupt(struct octeon_device *oct, u8 intr_flag) +{ + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + u64 intr_val = 0; + + /* Disable Interrupts */ + if (intr_flag == OCTEON_ALL_INTR) { + writeq(0, cn23xx->intr_enb_reg64); + } else if (intr_flag & OCTEON_OUTPUT_INTR) { + intr_val = readq(cn23xx->intr_enb_reg64); + intr_val &= ~CN23XX_INTR_PKT_DATA; + writeq(intr_val, cn23xx->intr_enb_reg64); + } +} + +static void cn23xx_get_pcie_qlmport(struct octeon_device *oct) +{ + oct->pcie_port = (octeon_read_csr(oct, CN23XX_SLI_MAC_NUMBER)) & 0xff; + + dev_dbg(&oct->pci_dev->dev, "OCTEON: CN23xx uses PCIE Port %d\n", + oct->pcie_port); +} + +static void cn23xx_get_pf_num(struct octeon_device *oct) +{ + u32 fdl_bit = 0; + + /** Read Function Dependency Link reg to get the function number */ + pci_read_config_dword(oct->pci_dev, CN23XX_PCIE_SRIOV_FDL, &fdl_bit); + oct->pf_num = ((fdl_bit >> CN23XX_PCIE_SRIOV_FDL_BIT_POS) & + CN23XX_PCIE_SRIOV_FDL_MASK); +} + +static void cn23xx_setup_reg_address(struct octeon_device *oct) +{ + u8 __iomem *bar0_pciaddr = oct->mmio[0].hw_addr; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + + oct->reg_list.pci_win_wr_addr_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_HI); + oct->reg_list.pci_win_wr_addr_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR_LO); + oct->reg_list.pci_win_wr_addr = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_ADDR64); + + oct->reg_list.pci_win_rd_addr_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_HI); + oct->reg_list.pci_win_rd_addr_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR_LO); + oct->reg_list.pci_win_rd_addr = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_ADDR64); + + oct->reg_list.pci_win_wr_data_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_HI); + oct->reg_list.pci_win_wr_data_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA_LO); + oct->reg_list.pci_win_wr_data = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_WR_DATA64); + + oct->reg_list.pci_win_rd_data_hi = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_HI); + oct->reg_list.pci_win_rd_data_lo = + (u32 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA_LO); + oct->reg_list.pci_win_rd_data = + (u64 __iomem *)(bar0_pciaddr + CN23XX_WIN_RD_DATA64); + + cn23xx_get_pcie_qlmport(oct); + + cn23xx->intr_mask64 = CN23XX_INTR_MASK; + if (!oct->msix_on) + cn23xx->intr_mask64 |= CN23XX_INTR_PKT_TIME; + if (oct->rev_id >= OCTEON_CN23XX_REV_1_1) + cn23xx->intr_mask64 |= CN23XX_INTR_VF_MBOX; + + cn23xx->intr_sum_reg64 = + bar0_pciaddr + + CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num); + cn23xx->intr_enb_reg64 = + bar0_pciaddr + + CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); +} + +static int cn23xx_sriov_config(struct octeon_device *oct) +{ + u32 total_rings; + struct octeon_cn23xx_pf *cn23xx = (struct octeon_cn23xx_pf *)oct->chip; + /* num_vfs is already filled for us */ + u32 pf_srn, num_pf_rings; + + cn23xx->conf = + (struct octeon_config *)oct_get_config_info(oct, LIO_23XX); + switch (oct->rev_id) { + case OCTEON_CN23XX_REV_1_0: + total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_0; + break; + case OCTEON_CN23XX_REV_1_1: + total_rings = CN23XX_MAX_RINGS_PER_PF_PASS_1_1; + break; + default: + total_rings = CN23XX_MAX_RINGS_PER_PF; + break; + } + if (!oct->sriov_info.num_pf_rings) { + if (total_rings > num_present_cpus()) + num_pf_rings = num_present_cpus(); + else + num_pf_rings = total_rings; + } else { + num_pf_rings = oct->sriov_info.num_pf_rings; + + if (num_pf_rings > total_rings) { + dev_warn(&oct->pci_dev->dev, + "num_queues_per_pf requested %u is more than available rings. Reducing to %u\n", + num_pf_rings, total_rings); + num_pf_rings = total_rings; + } + } + + total_rings = num_pf_rings; + /* the first ring of the pf */ + pf_srn = total_rings - num_pf_rings; + + oct->sriov_info.trs = total_rings; + oct->sriov_info.pf_srn = pf_srn; + oct->sriov_info.num_pf_rings = num_pf_rings; + dev_dbg(&oct->pci_dev->dev, "trs:%d pf_srn:%d num_pf_rings:%d\n", + oct->sriov_info.trs, oct->sriov_info.pf_srn, + oct->sriov_info.num_pf_rings); + return 0; +} + +int setup_cn23xx_octeon_pf_device(struct octeon_device *oct) +{ + if (octeon_map_pci_barx(oct, 0, 0)) + return 1; + + if (octeon_map_pci_barx(oct, 1, MAX_BAR1_IOREMAP_SIZE)) { + dev_err(&oct->pci_dev->dev, "%s CN23XX BAR1 map failed\n", + __func__); + octeon_unmap_pci_barx(oct, 0); + return 1; + } + + cn23xx_get_pf_num(oct); + + if (cn23xx_sriov_config(oct)) { + octeon_unmap_pci_barx(oct, 0); + octeon_unmap_pci_barx(oct, 1); + return 1; + } + + octeon_write_csr64(oct, CN23XX_SLI_MAC_CREDIT_CNT, 0x3F802080802080ULL); + + oct->fn_list.setup_iq_regs = cn23xx_setup_iq_regs; + oct->fn_list.setup_oq_regs = cn23xx_setup_oq_regs; + oct->fn_list.process_interrupt_regs = cn23xx_interrupt_handler; + oct->fn_list.msix_interrupt_handler = cn23xx_pf_msix_interrupt_handler; + + oct->fn_list.soft_reset = cn23xx_pf_soft_reset; + oct->fn_list.setup_device_regs = cn23xx_setup_pf_device_regs; + oct->fn_list.update_iq_read_idx = cn23xx_update_read_index; + + oct->fn_list.bar1_idx_setup = cn23xx_bar1_idx_setup; + oct->fn_list.bar1_idx_write = cn23xx_bar1_idx_write; + oct->fn_list.bar1_idx_read = cn23xx_bar1_idx_read; + + oct->fn_list.enable_interrupt = cn23xx_enable_pf_interrupt; + oct->fn_list.disable_interrupt = cn23xx_disable_pf_interrupt; + + oct->fn_list.enable_io_queues = cn23xx_enable_io_queues; + oct->fn_list.disable_io_queues = cn23xx_disable_io_queues; + + cn23xx_setup_reg_address(oct); + + oct->coproc_clock_rate = 1000000ULL * cn23xx_coprocessor_clock(oct); + + return 0; +} + +int validate_cn23xx_pf_config_info(struct octeon_device *oct, + struct octeon_config *conf23xx) +{ + if (CFG_GET_IQ_MAX_Q(conf23xx) > CN23XX_MAX_INPUT_QUEUES) { + dev_err(&oct->pci_dev->dev, "%s: Num IQ (%d) exceeds Max (%d)\n", + __func__, CFG_GET_IQ_MAX_Q(conf23xx), + CN23XX_MAX_INPUT_QUEUES); + return 1; + } + + if (CFG_GET_OQ_MAX_Q(conf23xx) > CN23XX_MAX_OUTPUT_QUEUES) { + dev_err(&oct->pci_dev->dev, "%s: Num OQ (%d) exceeds Max (%d)\n", + __func__, CFG_GET_OQ_MAX_Q(conf23xx), + CN23XX_MAX_OUTPUT_QUEUES); + return 1; + } + + if (CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_32BYTE_INSTR && + CFG_GET_IQ_INSTR_TYPE(conf23xx) != OCTEON_64BYTE_INSTR) { + dev_err(&oct->pci_dev->dev, "%s: Invalid instr type for IQ\n", + __func__); + return 1; + } + + if (!(CFG_GET_OQ_INFO_PTR(conf23xx)) || + !(CFG_GET_OQ_REFILL_THRESHOLD(conf23xx))) { + dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n", + __func__); + return 1; + } + + if (!(CFG_GET_OQ_INTR_TIME(conf23xx))) { + dev_err(&oct->pci_dev->dev, "%s: Invalid parameter for OQ\n", + __func__); + return 1; + } + + return 0; +} + +void cn23xx_dump_iq_regs(struct octeon_device *oct) +{ + u32 regval, q_no; + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_DOORBELL_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_DOORBELL(0), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_DOORBELL(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_BASEADDR_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_BASE_ADDR64(0), + CVM_CAST64(octeon_read_csr64 + (oct, CN23XX_SLI_IQ_BASE_ADDR64(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_IQ_FIFO_RSIZE_0 [0x%x]: 0x%016llx\n", + CN23XX_SLI_IQ_SIZE(0), + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_IQ_SIZE(0)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_CTL_STATUS [0x%x]: 0x%016llx\n", + CN23XX_SLI_CTL_STATUS, + CVM_CAST64(octeon_read_csr64(oct, CN23XX_SLI_CTL_STATUS))); + + for (q_no = 0; q_no < CN23XX_MAX_INPUT_QUEUES; q_no++) { + dev_dbg(&oct->pci_dev->dev, "SLI_PKT[%d]_INPUT_CTL [0x%x]: 0x%016llx\n", + q_no, CN23XX_SLI_IQ_PKT_CONTROL64(q_no), + CVM_CAST64(octeon_read_csr64 + (oct, + CN23XX_SLI_IQ_PKT_CONTROL64(q_no)))); + } + + pci_read_config_dword(oct->pci_dev, CN23XX_CONFIG_PCIE_DEVCTL, ®val); + dev_dbg(&oct->pci_dev->dev, "Config DevCtl [0x%x]: 0x%08x\n", + CN23XX_CONFIG_PCIE_DEVCTL, regval); + + dev_dbg(&oct->pci_dev->dev, "SLI_PRT[%d]_CFG [0x%llx]: 0x%016llx\n", + oct->pcie_port, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port), + CVM_CAST64(lio_pci_readq( + oct, CN23XX_DPI_SLI_PRTX_CFG(oct->pcie_port)))); + + dev_dbg(&oct->pci_dev->dev, "SLI_S2M_PORT[%d]_CTL [0x%x]: 0x%016llx\n", + oct->pcie_port, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port), + CVM_CAST64(octeon_read_csr64( + oct, CN23XX_SLI_S2M_PORTX_CTL(oct->pcie_port)))); +} + +int cn23xx_fw_loaded(struct octeon_device *oct) +{ + u64 val; + + val = octeon_read_csr64(oct, CN23XX_SLI_SCRATCH1); + return (val >> 1) & 1ULL; +} diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h new file mode 100644 index 000000000000..21b5c9051967 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_device.h @@ -0,0 +1,59 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +/*! \file cn23xx_device.h + * \brief Host Driver: Routines that perform CN23XX specific operations. +*/ + +#ifndef __CN23XX_PF_DEVICE_H__ +#define __CN23XX_PF_DEVICE_H__ + +#include "cn23xx_pf_regs.h" + +/* Register address and configuration for a CN23XX devices. + * If device specific changes need to be made then add a struct to include + * device specific fields as shown in the commented section + */ +struct octeon_cn23xx_pf { + /** PCI interrupt summary register */ + u8 __iomem *intr_sum_reg64; + + /** PCI interrupt enable register */ + u8 __iomem *intr_enb_reg64; + + /** The PCI interrupt mask used by interrupt handler */ + u64 intr_mask64; + + struct octeon_config *conf; +}; + +int setup_cn23xx_octeon_pf_device(struct octeon_device *oct); + +int validate_cn23xx_pf_config_info(struct octeon_device *oct, + struct octeon_config *conf23xx); + +u32 cn23xx_pf_get_oq_ticks(struct octeon_device *oct, u32 time_intr_in_us); + +void cn23xx_dump_pf_initialized_regs(struct octeon_device *oct); + +int cn23xx_fw_loaded(struct octeon_device *oct); +#endif diff --git a/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h new file mode 100644 index 000000000000..03d79d95ab75 --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/cn23xx_pf_regs.h @@ -0,0 +1,604 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ + +/*! \file cn23xx_regs.h + * \brief Host Driver: Register Address and Register Mask values for + * Octeon CN23XX devices. +*/ + +#ifndef __CN23XX_PF_REGS_H__ +#define __CN23XX_PF_REGS_H__ + +#define CN23XX_CONFIG_VENDOR_ID 0x00 +#define CN23XX_CONFIG_DEVICE_ID 0x02 + +#define CN23XX_CONFIG_XPANSION_BAR 0x38 + +#define CN23XX_CONFIG_MSIX_CAP 0x50 +#define CN23XX_CONFIG_MSIX_LMSI 0x54 +#define CN23XX_CONFIG_MSIX_UMSI 0x58 +#define CN23XX_CONFIG_MSIX_MSIMD 0x5C +#define CN23XX_CONFIG_MSIX_MSIMM 0x60 +#define CN23XX_CONFIG_MSIX_MSIMP 0x64 + +#define CN23XX_CONFIG_PCIE_CAP 0x70 +#define CN23XX_CONFIG_PCIE_DEVCAP 0x74 +#define CN23XX_CONFIG_PCIE_DEVCTL 0x78 +#define CN23XX_CONFIG_PCIE_LINKCAP 0x7C +#define CN23XX_CONFIG_PCIE_LINKCTL 0x80 +#define CN23XX_CONFIG_PCIE_SLOTCAP 0x84 +#define CN23XX_CONFIG_PCIE_SLOTCTL 0x88 +#define CN23XX_CONFIG_PCIE_DEVCTL2 0x98 +#define CN23XX_CONFIG_PCIE_LINKCTL2 0xA0 +#define CN23XX_CONFIG_PCIE_UNCORRECT_ERR_MASK 0x108 +#define CN23XX_CONFIG_PCIE_CORRECT_ERR_STATUS 0x110 +#define CN23XX_CONFIG_PCIE_DEVCTL_MASK 0x00040000 + +#define CN23XX_PCIE_SRIOV_FDL 0x188 +#define CN23XX_PCIE_SRIOV_FDL_BIT_POS 0x10 +#define CN23XX_PCIE_SRIOV_FDL_MASK 0xFF + +#define CN23XX_CONFIG_PCIE_FLTMSK 0x720 + +#define CN23XX_CONFIG_SRIOV_VFDEVID 0x190 + +#define CN23XX_CONFIG_SRIOV_BAR_START 0x19C +#define CN23XX_CONFIG_SRIOV_BARX(i) \ + (CN23XX_CONFIG_SRIOV_BAR_START + (i * 4)) +#define CN23XX_CONFIG_SRIOV_BAR_PF 0x08 +#define CN23XX_CONFIG_SRIOV_BAR_64BIT 0x04 +#define CN23XX_CONFIG_SRIOV_BAR_IO 0x01 + +/* ############## BAR0 Registers ################ */ + +#define CN23XX_SLI_CTL_PORT_START 0x286E0 +#define CN23XX_PORT_OFFSET 0x10 + +#define CN23XX_SLI_CTL_PORT(p) \ + (CN23XX_SLI_CTL_PORT_START + ((p) * CN23XX_PORT_OFFSET)) + +/* 2 scatch registers (64-bit) */ +#define CN23XX_SLI_WINDOW_CTL 0x282E0 +#define CN23XX_SLI_SCRATCH1 0x283C0 +#define CN23XX_SLI_SCRATCH2 0x283D0 +#define CN23XX_SLI_WINDOW_CTL_DEFAULT 0x200000ULL + +/* 1 registers (64-bit) - SLI_CTL_STATUS */ +#define CN23XX_SLI_CTL_STATUS 0x28570 + +/* SLI Packet Input Jabber Register (64 bit register) + * <31:0> for Byte count for limiting sizes of packet sizes + * that are allowed for sli packet inbound packets. + * the default value is 0xFA00(=64000). + */ +#define CN23XX_SLI_PKT_IN_JABBER 0x29170 +/* The input jabber is used to determine the TSO max size. + * Due to H/W limitation, this need to be reduced to 60000 + * in order to to H/W TSO and avoid the WQE malfarmation + * PKO_BUG_24989_WQE_LEN + */ +#define CN23XX_DEFAULT_INPUT_JABBER 0xEA60 /*60000*/ + +#define CN23XX_WIN_WR_ADDR_LO 0x20000 +#define CN23XX_WIN_WR_ADDR_HI 0x20004 +#define CN23XX_WIN_WR_ADDR64 CN23XX_WIN_WR_ADDR_LO + +#define CN23XX_WIN_RD_ADDR_LO 0x20010 +#define CN23XX_WIN_RD_ADDR_HI 0x20014 +#define CN23XX_WIN_RD_ADDR64 CN23XX_WIN_RD_ADDR_LO + +#define CN23XX_WIN_WR_DATA_LO 0x20020 +#define CN23XX_WIN_WR_DATA_HI 0x20024 +#define CN23XX_WIN_WR_DATA64 CN23XX_WIN_WR_DATA_LO + +#define CN23XX_WIN_RD_DATA_LO 0x20040 +#define CN23XX_WIN_RD_DATA_HI 0x20044 +#define CN23XX_WIN_RD_DATA64 CN23XX_WIN_RD_DATA_LO + +#define CN23XX_WIN_WR_MASK_LO 0x20030 +#define CN23XX_WIN_WR_MASK_HI 0x20034 +#define CN23XX_WIN_WR_MASK_REG CN23XX_WIN_WR_MASK_LO +#define CN23XX_SLI_MAC_CREDIT_CNT 0x23D70 + +/* 4 registers (64-bit) for mapping IOQs to MACs(PEMs)- + * SLI_PKT_MAC(0..3)_PF(0..1)_RINFO + */ +#define CN23XX_SLI_PKT_MAC_RINFO_START64 0x29030 + +/*1 register (64-bit) to determine whether IOQs are in reset. */ +#define CN23XX_SLI_PKT_IOQ_RING_RST 0x291E0 + +/* Each Input Queue register is at a 16-byte Offset in BAR0 */ +#define CN23XX_IQ_OFFSET 0x20000 + +#define CN23XX_MAC_RINFO_OFFSET 0x20 +#define CN23XX_PF_RINFO_OFFSET 0x10 + +#define CN23XX_SLI_PKT_MAC_RINFO64(mac, pf) \ + (CN23XX_SLI_PKT_MAC_RINFO_START64 + \ + ((mac) * CN23XX_MAC_RINFO_OFFSET) + \ + ((pf) * CN23XX_PF_RINFO_OFFSET)) + +/** mask for total rings, setting TRS to base */ +#define CN23XX_PKT_MAC_CTL_RINFO_TRS BIT_ULL(16) +/** mask for starting ring number: setting SRN <6:0> = 0x7F */ +#define CN23XX_PKT_MAC_CTL_RINFO_SRN (0x7F) + +/* Starting bit of the TRS field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_TRS_BIT_POS 16 +/* Starting bit of SRN field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_SRN_BIT_POS 0 +/* Starting bit of RPVF field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_RPVF_BIT_POS 32 +/* Starting bit of NVFS field in CN23XX_SLI_PKT_MAC_RINFO64 register */ +#define CN23XX_PKT_MAC_CTL_RINFO_NVFS_BIT_POS 48 + +/*###################### REQUEST QUEUE #########################*/ + +/* 64 registers for Input Queue Instr Count - SLI_PKT_IN_DONE0_CNTS */ +#define CN23XX_SLI_IQ_INSTR_COUNT_START64 0x10040 + +/* 64 registers for Input Queues Start Addr - SLI_PKT0_INSTR_BADDR */ +#define CN23XX_SLI_IQ_BASE_ADDR_START64 0x10010 + +/* 64 registers for Input Doorbell - SLI_PKT0_INSTR_BAOFF_DBELL */ +#define CN23XX_SLI_IQ_DOORBELL_START 0x10020 + +/* 64 registers for Input Queue size - SLI_PKT0_INSTR_FIFO_RSIZE */ +#define CN23XX_SLI_IQ_SIZE_START 0x10030 + +/* 64 registers (64-bit) - ES, RO, NS, Arbitration for Input Queue Data & + * gather list fetches. SLI_PKT(0..63)_INPUT_CONTROL. + */ +#define CN23XX_SLI_IQ_PKT_CONTROL_START64 0x10000 + +/*------- Request Queue Macros ---------*/ +#define CN23XX_SLI_IQ_PKT_CONTROL64(iq) \ + (CN23XX_SLI_IQ_PKT_CONTROL_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_BASE_ADDR64(iq) \ + (CN23XX_SLI_IQ_BASE_ADDR_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_SIZE(iq) \ + (CN23XX_SLI_IQ_SIZE_START + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_DOORBELL(iq) \ + (CN23XX_SLI_IQ_DOORBELL_START + ((iq) * CN23XX_IQ_OFFSET)) + +#define CN23XX_SLI_IQ_INSTR_COUNT64(iq) \ + (CN23XX_SLI_IQ_INSTR_COUNT_START64 + ((iq) * CN23XX_IQ_OFFSET)) + +/*------------------ Masks ----------------*/ +#define CN23XX_PKT_INPUT_CTL_VF_NUM BIT_ULL(32) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM BIT(29) +/* Number of instructions to be read in one MAC read request. + * setting to Max value(4) + */ +#define CN23XX_PKT_INPUT_CTL_RDSIZE (3 << 25) +#define CN23XX_PKT_INPUT_CTL_IS_64B BIT(24) +#define CN23XX_PKT_INPUT_CTL_RST BIT(23) +#define CN23XX_PKT_INPUT_CTL_QUIET BIT(28) +#define CN23XX_PKT_INPUT_CTL_RING_ENB BIT(22) +#define CN23XX_PKT_INPUT_CTL_DATA_NS BIT(8) +#define CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP BIT(6) +#define CN23XX_PKT_INPUT_CTL_DATA_RO BIT(5) +#define CN23XX_PKT_INPUT_CTL_USE_CSR BIT(4) +#define CN23XX_PKT_INPUT_CTL_GATHER_NS BIT(3) +#define CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP (2) +#define CN23XX_PKT_INPUT_CTL_GATHER_RO (1) + +/** Rings per Virtual Function **/ +#define CN23XX_PKT_INPUT_CTL_RPVF_MASK (0x3F) +#define CN23XX_PKT_INPUT_CTL_RPVF_POS (48) +/** These bits[47:44] select the Physical function number within the MAC */ +#define CN23XX_PKT_INPUT_CTL_PF_NUM_MASK (0x7) +#define CN23XX_PKT_INPUT_CTL_PF_NUM_POS (45) +/** These bits[43:32] select the function number within the PF */ +#define CN23XX_PKT_INPUT_CTL_VF_NUM_MASK (0x1FFF) +#define CN23XX_PKT_INPUT_CTL_VF_NUM_POS (32) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM_MASK (0x3) +#define CN23XX_PKT_INPUT_CTL_MAC_NUM_POS (29) +#define CN23XX_PKT_IN_DONE_WMARK_MASK (0xFFFFULL) +#define CN23XX_PKT_IN_DONE_WMARK_BIT_POS (32) +#define CN23XX_PKT_IN_DONE_CNT_MASK (0x00000000FFFFFFFFULL) + +#ifdef __LITTLE_ENDIAN_BITFIELD +#define CN23XX_PKT_INPUT_CTL_MASK \ + (CN23XX_PKT_INPUT_CTL_RDSIZE | \ + CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP | \ + CN23XX_PKT_INPUT_CTL_USE_CSR) +#else +#define CN23XX_PKT_INPUT_CTL_MASK \ + (CN23XX_PKT_INPUT_CTL_RDSIZE | \ + CN23XX_PKT_INPUT_CTL_DATA_ES_64B_SWAP | \ + CN23XX_PKT_INPUT_CTL_USE_CSR | \ + CN23XX_PKT_INPUT_CTL_GATHER_ES_64B_SWAP) +#endif + +/** Masks for SLI_PKT_IN_DONE(0..63)_CNTS Register */ +#define CN23XX_IN_DONE_CNTS_PI_INT BIT_ULL(62) +#define CN23XX_IN_DONE_CNTS_CINT_ENB BIT_ULL(48) + +/*############################ OUTPUT QUEUE #########################*/ + +/* 64 registers for Output queue control - SLI_PKT(0..63)_OUTPUT_CONTROL */ +#define CN23XX_SLI_OQ_PKT_CONTROL_START 0x10050 + +/* 64 registers for Output queue buffer and info size - SLI_PKT0_OUT_SIZE */ +#define CN23XX_SLI_OQ0_BUFF_INFO_SIZE 0x10060 + +/* 64 registers for Output Queue Start Addr - SLI_PKT0_SLIST_BADDR */ +#define CN23XX_SLI_OQ_BASE_ADDR_START64 0x10070 + +/* 64 registers for Output Queue Packet Credits - SLI_PKT0_SLIST_BAOFF_DBELL */ +#define CN23XX_SLI_OQ_PKT_CREDITS_START 0x10080 + +/* 64 registers for Output Queue size - SLI_PKT0_SLIST_FIFO_RSIZE */ +#define CN23XX_SLI_OQ_SIZE_START 0x10090 + +/* 64 registers for Output Queue Packet Count - SLI_PKT0_CNTS */ +#define CN23XX_SLI_OQ_PKT_SENT_START 0x100B0 + +/* 64 registers for Output Queue INT Levels - SLI_PKT0_INT_LEVELS */ +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 0x100A0 + +/* Each Output Queue register is at a 16-byte Offset in BAR0 */ +#define CN23XX_OQ_OFFSET 0x20000 + +/* 1 (64-bit register) for Output Queue backpressure across all rings. */ +#define CN23XX_SLI_OQ_WMARK 0x29180 + +/* Global pkt control register */ +#define CN23XX_SLI_GBL_CONTROL 0x29210 + +/* Backpressure enable register for PF0 */ +#define CN23XX_SLI_OUT_BP_EN_W1S 0x29260 + +/* Backpressure enable register for PF1 */ +#define CN23XX_SLI_OUT_BP_EN2_W1S 0x29270 + +/* Backpressure disable register for PF0 */ +#define CN23XX_SLI_OUT_BP_EN_W1C 0x29280 + +/* Backpressure disable register for PF1 */ +#define CN23XX_SLI_OUT_BP_EN2_W1C 0x29290 + +/*------- Output Queue Macros ---------*/ + +#define CN23XX_SLI_OQ_PKT_CONTROL(oq) \ + (CN23XX_SLI_OQ_PKT_CONTROL_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_BASE_ADDR64(oq) \ + (CN23XX_SLI_OQ_BASE_ADDR_START64 + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_SIZE(oq) \ + (CN23XX_SLI_OQ_SIZE_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_BUFF_INFO_SIZE(oq) \ + (CN23XX_SLI_OQ0_BUFF_INFO_SIZE + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKTS_SENT(oq) \ + (CN23XX_SLI_OQ_PKT_SENT_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKTS_CREDIT(oq) \ + (CN23XX_SLI_OQ_PKT_CREDITS_START + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKT_INT_LEVELS(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET)) + +/*Macro's for accessing CNT and TIME separately from INT_LEVELS*/ +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_CNT(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET)) + +#define CN23XX_SLI_OQ_PKT_INT_LEVELS_TIME(oq) \ + (CN23XX_SLI_OQ_PKT_INT_LEVELS_START64 + \ + ((oq) * CN23XX_OQ_OFFSET) + 4) + +/*------------------ Masks ----------------*/ +#define CN23XX_PKT_OUTPUT_CTL_TENB BIT(13) +#define CN23XX_PKT_OUTPUT_CTL_CENB BIT(12) +#define CN23XX_PKT_OUTPUT_CTL_IPTR BIT(11) +#define CN23XX_PKT_OUTPUT_CTL_ES BIT(9) +#define CN23XX_PKT_OUTPUT_CTL_NSR BIT(8) +#define CN23XX_PKT_OUTPUT_CTL_ROR BIT(7) +#define CN23XX_PKT_OUTPUT_CTL_DPTR BIT(6) +#define CN23XX_PKT_OUTPUT_CTL_BMODE BIT(5) +#define CN23XX_PKT_OUTPUT_CTL_ES_P BIT(3) +#define CN23XX_PKT_OUTPUT_CTL_NSR_P BIT(2) +#define CN23XX_PKT_OUTPUT_CTL_ROR_P BIT(1) +#define CN23XX_PKT_OUTPUT_CTL_RING_ENB BIT(0) + +/*######################### Mailbox Reg Macros ########################*/ +#define CN23XX_SLI_PKT_MBOX_INT_START 0x10210 +#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START 0x10200 +#define CN23XX_SLI_MAC_PF_MBOX_INT_START 0x27380 + +#define CN23XX_SLI_MBOX_OFFSET 0x20000 +#define CN23XX_SLI_MBOX_SIG_IDX_OFFSET 0x8 + +#define CN23XX_SLI_PKT_MBOX_INT(q) \ + (CN23XX_SLI_PKT_MBOX_INT_START + ((q) * CN23XX_SLI_MBOX_OFFSET)) + +#define CN23XX_SLI_PKT_PF_VF_MBOX_SIG(q, idx) \ + (CN23XX_SLI_PKT_PF_VF_MBOX_SIG_START + \ + ((q) * CN23XX_SLI_MBOX_OFFSET + \ + (idx) * CN23XX_SLI_MBOX_SIG_IDX_OFFSET)) + +#define CN23XX_SLI_MAC_PF_MBOX_INT(mac, pf) \ + (CN23XX_SLI_MAC_PF_MBOX_INT_START + \ + ((mac) * CN23XX_MAC_INT_OFFSET + \ + (pf) * CN23XX_PF_INT_OFFSET)) + +/*######################### DMA Counters #########################*/ + +/* 2 registers (64-bit) - DMA Count - 1 for each DMA counter 0/1. */ +#define CN23XX_DMA_CNT_START 0x28400 + +/* 2 registers (64-bit) - DMA Timer 0/1, contains DMA timer values */ +/* SLI_DMA_0_TIM */ +#define CN23XX_DMA_TIM_START 0x28420 + +/* 2 registers (64-bit) - DMA count & Time Interrupt threshold - + * SLI_DMA_0_INT_LEVEL + */ +#define CN23XX_DMA_INT_LEVEL_START 0x283E0 + +/* Each DMA register is at a 16-byte Offset in BAR0 */ +#define CN23XX_DMA_OFFSET 0x10 + +/*---------- DMA Counter Macros ---------*/ +#define CN23XX_DMA_CNT(dq) \ + (CN23XX_DMA_CNT_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_PKT_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_TIME_INT_LEVEL(dq) \ + (CN23XX_DMA_INT_LEVEL_START + 4 + ((dq) * CN23XX_DMA_OFFSET)) + +#define CN23XX_DMA_TIM(dq) \ + (CN23XX_DMA_TIM_START + ((dq) * CN23XX_DMA_OFFSET)) + +/*######################## MSIX TABLE #########################*/ + +#define CN23XX_MSIX_TABLE_ADDR_START 0x0 +#define CN23XX_MSIX_TABLE_DATA_START 0x8 + +#define CN23XX_MSIX_TABLE_SIZE 0x10 +#define CN23XX_MSIX_TABLE_ENTRIES 0x41 + +#define CN23XX_MSIX_ENTRY_VECTOR_CTL BIT_ULL(32) + +#define CN23XX_MSIX_TABLE_ADDR(idx) \ + (CN23XX_MSIX_TABLE_ADDR_START + ((idx) * CN23XX_MSIX_TABLE_SIZE)) + +#define CN23XX_MSIX_TABLE_DATA(idx) \ + (CN23XX_MSIX_TABLE_DATA_START + ((idx) * CN23XX_MSIX_TABLE_SIZE)) + +/*######################## INTERRUPTS #########################*/ +#define CN23XX_MAC_INT_OFFSET 0x20 +#define CN23XX_PF_INT_OFFSET 0x10 + +/* 1 register (64-bit) for Interrupt Summary */ +#define CN23XX_SLI_INT_SUM64 0x27000 + +/* 4 registers (64-bit) for Interrupt Enable for each Port */ +#define CN23XX_SLI_INT_ENB64 0x27080 + +#define CN23XX_SLI_MAC_PF_INT_SUM64(mac, pf) \ + (CN23XX_SLI_INT_SUM64 + \ + ((mac) * CN23XX_MAC_INT_OFFSET) + \ + ((pf) * CN23XX_PF_INT_OFFSET)) + +#define CN23XX_SLI_MAC_PF_INT_ENB64(mac, pf) \ + (CN23XX_SLI_INT_ENB64 + \ + ((mac) * CN23XX_MAC_INT_OFFSET) + \ + ((pf) * CN23XX_PF_INT_OFFSET)) + +/* 1 register (64-bit) to indicate which Output Queue reached pkt threshold */ +#define CN23XX_SLI_PKT_CNT_INT 0x29130 + +/* 1 register (64-bit) to indicate which Output Queue reached time threshold */ +#define CN23XX_SLI_PKT_TIME_INT 0x29140 + +/*------------------ Interrupt Masks ----------------*/ + +#define CN23XX_INTR_PO_INT BIT_ULL(63) +#define CN23XX_INTR_PI_INT BIT_ULL(62) +#define CN23XX_INTR_MBOX_INT BIT_ULL(61) +#define CN23XX_INTR_RESEND BIT_ULL(60) + +#define CN23XX_INTR_CINT_ENB BIT_ULL(48) +#define CN23XX_INTR_MBOX_ENB BIT(0) + +#define CN23XX_INTR_RML_TIMEOUT_ERR (1) + +#define CN23XX_INTR_MIO_INT BIT(1) + +#define CN23XX_INTR_RESERVED1 (3 << 2) + +#define CN23XX_INTR_PKT_COUNT BIT(4) +#define CN23XX_INTR_PKT_TIME BIT(5) + +#define CN23XX_INTR_RESERVED2 (3 << 6) + +#define CN23XX_INTR_M0UPB0_ERR BIT(8) +#define CN23XX_INTR_M0UPWI_ERR BIT(9) +#define CN23XX_INTR_M0UNB0_ERR BIT(10) +#define CN23XX_INTR_M0UNWI_ERR BIT(11) + +#define CN23XX_INTR_RESERVED3 (0xFFFFFULL << 12) + +#define CN23XX_INTR_DMA0_FORCE BIT_ULL(32) +#define CN23XX_INTR_DMA1_FORCE BIT_ULL(33) + +#define CN23XX_INTR_DMA0_COUNT BIT_ULL(34) +#define CN23XX_INTR_DMA1_COUNT BIT_ULL(35) + +#define CN23XX_INTR_DMA0_TIME BIT_ULL(36) +#define CN23XX_INTR_DMA1_TIME BIT_ULL(37) + +#define CN23XX_INTR_RESERVED4 (0x7FFFFULL << 38) + +#define CN23XX_INTR_VF_MBOX BIT_ULL(57) +#define CN23XX_INTR_DMAVF_ERR BIT_ULL(58) +#define CN23XX_INTR_DMAPF_ERR BIT_ULL(59) + +#define CN23XX_INTR_PKTVF_ERR BIT_ULL(60) +#define CN23XX_INTR_PKTPF_ERR BIT_ULL(61) +#define CN23XX_INTR_PPVF_ERR BIT_ULL(62) +#define CN23XX_INTR_PPPF_ERR BIT_ULL(63) + +#define CN23XX_INTR_DMA0_DATA (CN23XX_INTR_DMA0_TIME) +#define CN23XX_INTR_DMA1_DATA (CN23XX_INTR_DMA1_TIME) + +#define CN23XX_INTR_DMA_DATA \ + (CN23XX_INTR_DMA0_DATA | CN23XX_INTR_DMA1_DATA) + +/* By fault only TIME based */ +#define CN23XX_INTR_PKT_DATA (CN23XX_INTR_PKT_TIME) +/* For both COUNT and TIME based */ +/* #define CN23XX_INTR_PKT_DATA \ + * (CN23XX_INTR_PKT_COUNT | CN23XX_INTR_PKT_TIME) + */ + +/* Sum of interrupts for all PCI-Express Data Interrupts */ +#define CN23XX_INTR_PCIE_DATA \ + (CN23XX_INTR_DMA_DATA | CN23XX_INTR_PKT_DAT) + +/* Sum of interrupts for error events */ +#define CN23XX_INTR_ERR \ + (CN23XX_INTR_M0UPB0_ERR | \ + CN23XX_INTR_M0UPWI_ERR | \ + CN23XX_INTR_M0UNB0_ERR | \ + CN23XX_INTR_M0UNWI_ERR | \ + CN23XX_INTR_DMAVF_ERR | \ + CN23XX_INTR_DMAPF_ERR | \ + CN23XX_INTR_PKTPF_ERR | \ + CN23XX_INTR_PPPF_ERR | \ + CN23XX_INTR_PPVF_ERR) + +/* Programmed Mask for Interrupt Sum */ +#define CN23XX_INTR_MASK \ + (CN23XX_INTR_DMA_DATA | \ + CN23XX_INTR_DMA0_FORCE | \ + CN23XX_INTR_DMA1_FORCE | \ + CN23XX_INTR_MIO_INT | \ + CN23XX_INTR_ERR) + +/* 4 Registers (64 - bit) */ +#define CN23XX_SLI_S2M_PORT_CTL_START 0x23D80 +#define CN23XX_SLI_S2M_PORTX_CTL(port) \ + (CN23XX_SLI_S2M_PORT_CTL_START + (port * 0x10)) + +#define CN23XX_SLI_MAC_NUMBER 0x20050 + +/** PEM(0..3)_BAR1_INDEX(0..15)address is defined as + * addr = (0x00011800C0000100 |port <<24 |idx <<3 ) + * Here, port is PEM(0..3) & idx is INDEX(0..15) + */ +#define CN23XX_PEM_BAR1_INDEX_START 0x00011800C0000100ULL +#define CN23XX_PEM_OFFSET 24 +#define CN23XX_BAR1_INDEX_OFFSET 3 + +#define CN23XX_PEM_BAR1_INDEX_REG(port, idx) \ + (CN23XX_PEM_BAR1_INDEX_START + ((port) << CN23XX_PEM_OFFSET) + \ + ((idx) << CN23XX_BAR1_INDEX_OFFSET)) + +/*############################ DPI #########################*/ + +/* 1 register (64-bit) - provides DMA Enable */ +#define CN23XX_DPI_CTL 0x0001df0000000040ULL + +/* 1 register (64-bit) - Controls the DMA IO Operation */ +#define CN23XX_DPI_DMA_CONTROL 0x0001df0000000048ULL + +/* 1 register (64-bit) - Provides DMA Instr'n Queue Enable */ +#define CN23XX_DPI_REQ_GBL_ENB 0x0001df0000000050ULL + +/* 1 register (64-bit) - DPI_REQ_ERR_RSP + * Indicates which Instr'n Queue received error response from the IO sub-system + */ +#define CN23XX_DPI_REQ_ERR_RSP 0x0001df0000000058ULL + +/* 1 register (64-bit) - DPI_REQ_ERR_RST + * Indicates which Instr'n Queue dropped an Instr'n + */ +#define CN23XX_DPI_REQ_ERR_RST 0x0001df0000000060ULL + +/* 6 register (64-bit) - DPI_DMA_ENG(0..5)_EN + * Provides DMA Engine Queue Enable + */ +#define CN23XX_DPI_DMA_ENG0_ENB 0x0001df0000000080ULL +#define CN23XX_DPI_DMA_ENG_ENB(eng) (CN23XX_DPI_DMA_ENG0_ENB + (eng * 8)) + +/* 8 register (64-bit) - DPI_DMA(0..7)_REQQ_CTL + * Provides control bits for transaction on 8 Queues + */ +#define CN23XX_DPI_DMA_REQQ0_CTL 0x0001df0000000180ULL +#define CN23XX_DPI_DMA_REQQ_CTL(q_no) \ + (CN23XX_DPI_DMA_REQQ0_CTL + (q_no * 8)) + +/* 6 register (64-bit) - DPI_ENG(0..5)_BUF + * Provides DMA Engine FIFO (Queue) Size + */ +#define CN23XX_DPI_DMA_ENG0_BUF 0x0001df0000000880ULL +#define CN23XX_DPI_DMA_ENG_BUF(eng) \ + (CN23XX_DPI_DMA_ENG0_BUF + (eng * 8)) + +/* 4 Registers (64-bit) */ +#define CN23XX_DPI_SLI_PRT_CFG_START 0x0001df0000000900ULL +#define CN23XX_DPI_SLI_PRTX_CFG(port) \ + (CN23XX_DPI_SLI_PRT_CFG_START + (port * 0x8)) + +/* Masks for DPI_DMA_CONTROL Register */ +#define CN23XX_DPI_DMA_COMMIT_MODE BIT_ULL(58) +#define CN23XX_DPI_DMA_PKT_EN BIT_ULL(56) +#define CN23XX_DPI_DMA_ENB (0x0FULL << 48) +/* Set the DMA Control, to update packet count not byte count sent by DMA, + * when we use Interrupt Coalescing (CA mode) + */ +#define CN23XX_DPI_DMA_O_ADD1 BIT(19) +/*selecting 64-bit Byte Swap Mode */ +#define CN23XX_DPI_DMA_O_ES BIT(15) +#define CN23XX_DPI_DMA_O_MODE BIT(14) + +#define CN23XX_DPI_DMA_CTL_MASK \ + (CN23XX_DPI_DMA_COMMIT_MODE | \ + CN23XX_DPI_DMA_PKT_EN | \ + CN23XX_DPI_DMA_O_ES | \ + CN23XX_DPI_DMA_O_MODE) + +/*############################ RST #########################*/ + +#define CN23XX_RST_BOOT 0x0001180006001600ULL +#define CN23XX_RST_SOFT_RST 0x0001180006001680ULL + +#define CN23XX_LMC0_RESET_CTL 0x0001180088000180ULL +#define CN23XX_LMC0_RESET_CTL_DDR3RST_MASK 0x0000000000000001ULL + +#endif diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c index c03d37016a48..e779af88621b 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.c @@ -338,7 +338,7 @@ void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no) octeon_write_csr(oct, CN6XXX_SLI_PKT_CNT_INT_ENB, intr); } -void lio_cn6xxx_enable_io_queues(struct octeon_device *oct) +int lio_cn6xxx_enable_io_queues(struct octeon_device *oct) { u32 mask; @@ -353,6 +353,8 @@ void lio_cn6xxx_enable_io_queues(struct octeon_device *oct) mask = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); mask |= oct->io_qmask.oq; octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, mask); + + return 0; } void lio_cn6xxx_disable_io_queues(struct octeon_device *oct) @@ -418,36 +420,6 @@ void lio_cn6xxx_disable_io_queues(struct octeon_device *oct) octeon_write_csr(oct, CN6XXX_SLI_PKT_TIME_INT, d32); } -void lio_cn6xxx_reinit_regs(struct octeon_device *oct) -{ - int i; - - for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - if (!(oct->io_qmask.iq & (1ULL << i))) - continue; - oct->fn_list.setup_iq_regs(oct, i); - } - - for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) - continue; - oct->fn_list.setup_oq_regs(oct, i); - } - - oct->fn_list.setup_device_regs(oct); - - oct->fn_list.enable_interrupt(oct->chip); - - oct->fn_list.enable_io_queues(oct); - - /* for (i = 0; i < oct->num_oqs; i++) { */ - for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) - continue; - writel(oct->droq[i]->max_count, oct->droq[i]->pkts_credit_reg); - } -} - void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, @@ -507,18 +479,20 @@ lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq) return new_idx; } -void lio_cn6xxx_enable_interrupt(void *chip) +void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, + u8 unused __attribute__((unused))) { - struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip; + struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip; u64 mask = cn6xxx->intr_mask64 | CN6XXX_INTR_DMA0_FORCE; /* Enable Interrupt */ writeq(mask, cn6xxx->intr_enb_reg64); } -void lio_cn6xxx_disable_interrupt(void *chip) +void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, + u8 unused __attribute__((unused))) { - struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)chip; + struct octeon_cn6xxx *cn6xxx = (struct octeon_cn6xxx *)oct->chip; /* Disable Interrupts */ writeq(0, cn6xxx->intr_enb_reg64); @@ -714,7 +688,6 @@ int lio_setup_cn66xx_octeon_device(struct octeon_device *oct) oct->fn_list.soft_reset = lio_cn6xxx_soft_reset; oct->fn_list.setup_device_regs = lio_cn6xxx_setup_device_regs; - oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs; oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index; oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup; diff --git a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h index 28c47224221a..a40a91394079 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h +++ b/drivers/net/ethernet/cavium/liquidio/cn66xx_device.h @@ -80,18 +80,17 @@ void lio_cn6xxx_setup_global_input_regs(struct octeon_device *oct); void lio_cn6xxx_setup_global_output_regs(struct octeon_device *oct); void lio_cn6xxx_setup_iq_regs(struct octeon_device *oct, u32 iq_no); void lio_cn6xxx_setup_oq_regs(struct octeon_device *oct, u32 oq_no); -void lio_cn6xxx_enable_io_queues(struct octeon_device *oct); +int lio_cn6xxx_enable_io_queues(struct octeon_device *oct); void lio_cn6xxx_disable_io_queues(struct octeon_device *oct); irqreturn_t lio_cn6xxx_process_interrupt_regs(void *dev); -void lio_cn6xxx_reinit_regs(struct octeon_device *oct); void lio_cn6xxx_bar1_idx_setup(struct octeon_device *oct, u64 core_addr, u32 idx, int valid); void lio_cn6xxx_bar1_idx_write(struct octeon_device *oct, u32 idx, u32 mask); u32 lio_cn6xxx_bar1_idx_read(struct octeon_device *oct, u32 idx); u32 lio_cn6xxx_update_read_index(struct octeon_instr_queue *iq); -void lio_cn6xxx_enable_interrupt(void *chip); -void lio_cn6xxx_disable_interrupt(void *chip); +void lio_cn6xxx_enable_interrupt(struct octeon_device *oct, u8 unused); +void lio_cn6xxx_disable_interrupt(struct octeon_device *oct, u8 unused); void cn6xxx_get_pcie_qlmport(struct octeon_device *oct); void lio_cn6xxx_setup_reg_address(struct octeon_device *oct, void *chip, struct octeon_reg_list *reg_list); diff --git a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c index 29755bc68f12..dbf3566ead53 100644 --- a/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c +++ b/drivers/net/ethernet/cavium/liquidio/cn68xx_device.c @@ -148,7 +148,6 @@ int lio_setup_cn68xx_octeon_device(struct octeon_device *oct) oct->fn_list.process_interrupt_regs = lio_cn6xxx_process_interrupt_regs; oct->fn_list.soft_reset = lio_cn68xx_soft_reset; oct->fn_list.setup_device_regs = lio_cn68xx_setup_device_regs; - oct->fn_list.reinit_regs = lio_cn6xxx_reinit_regs; oct->fn_list.update_iq_read_idx = lio_cn6xxx_update_read_index; oct->fn_list.bar1_idx_setup = lio_cn6xxx_bar1_idx_setup; diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c new file mode 100644 index 000000000000..201eddb3013a --- /dev/null +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -0,0 +1,266 @@ +/********************************************************************** +* Author: Cavium, Inc. +* +* Contact: support@cavium.com +* Please include "LiquidIO" in the subject. +* +* Copyright (c) 2003-2015 Cavium, Inc. +* +* This file is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License, Version 2, as +* published by the Free Software Foundation. +* +* This file is distributed in the hope that it will be useful, but +* AS-IS and WITHOUT ANY WARRANTY; without even the implied warranty +* of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, TITLE, or +* NONINFRINGEMENT. See the GNU General Public License for more +* details. +* +* This file may also be available under a different license from Cavium. +* Contact Cavium, Inc. for more information +**********************************************************************/ +#include +#include +#include "liquidio_common.h" +#include "octeon_droq.h" +#include "octeon_iq.h" +#include "response_manager.h" +#include "octeon_device.h" +#include "octeon_nic.h" +#include "octeon_main.h" +#include "octeon_network.h" + +int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = cmd; + nctrl.ncmd.s.param1 = param1; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n", + ret); + } + return ret; +} + +void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, + unsigned int bytes_compl) +{ + struct netdev_queue *netdev_queue = txq; + + netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl); +} + +void octeon_update_tx_completion_counters(void *buf, int reqtype, + unsigned int *pkts_compl, + unsigned int *bytes_compl) +{ + struct octnet_buf_free_info *finfo; + struct sk_buff *skb = NULL; + struct octeon_soft_command *sc; + + switch (reqtype) { + case REQTYPE_NORESP_NET: + case REQTYPE_NORESP_NET_SG: + finfo = buf; + skb = finfo->skb; + break; + + case REQTYPE_RESP_NET_SG: + case REQTYPE_RESP_NET: + sc = buf; + skb = sc->callback_arg; + break; + + default: + return; + } + + (*pkts_compl)++; +/*TODO, Use some other pound define to suggest + * the fact that iqs are not tied to netdevs + * and can take traffic from different netdevs + * hence bql reporting is done per packet + * than in bulk. Usage of NO_NAPI in txq completion is + * a little confusing + */ + *bytes_compl += skb->len; +} + +void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) +{ + struct octnet_buf_free_info *finfo; + struct sk_buff *skb; + struct octeon_soft_command *sc; + struct netdev_queue *txq; + + switch (reqtype) { + case REQTYPE_NORESP_NET: + case REQTYPE_NORESP_NET_SG: + finfo = buf; + skb = finfo->skb; + break; + + case REQTYPE_RESP_NET_SG: + case REQTYPE_RESP_NET: + sc = buf; + skb = sc->callback_arg; + break; + + default: + return; + } + + txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); + netdev_tx_sent_queue(txq, skb->len); +} + +void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) +{ + struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr; + struct net_device *netdev = (struct net_device *)nctrl->netpndev; + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + u8 *mac; + + switch (nctrl->ncmd.s.cmd) { + case OCTNET_CMD_CHANGE_DEVFLAGS: + case OCTNET_CMD_SET_MULTI_LIST: + break; + + case OCTNET_CMD_CHANGE_MACADDR: + mac = ((u8 *)&nctrl->udd[0]) + 2; + netif_info(lio, probe, lio->netdev, + "MACAddr changed to %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", + mac[0], mac[1], + mac[2], mac[3], + mac[4], mac[5]); + break; + + case OCTNET_CMD_CHANGE_MTU: + /* If command is successful, change the MTU. */ + netif_info(lio, probe, lio->netdev, "MTU Changed from %d to %d\n", + netdev->mtu, nctrl->ncmd.s.param1); + dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", + netdev->name, netdev->mtu, + nctrl->ncmd.s.param1); + netdev->mtu = nctrl->ncmd.s.param1; + queue_delayed_work(lio->link_status_wq.wq, + &lio->link_status_wq.wk.work, 0); + break; + + case OCTNET_CMD_GPIO_ACCESS: + netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + + break; + + case OCTNET_CMD_ID_ACTIVE: + netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); + + break; + + case OCTNET_CMD_LRO_ENABLE: + dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); + break; + + case OCTNET_CMD_LRO_DISABLE: + dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", + netdev->name); + break; + + case OCTNET_CMD_VERBOSE_ENABLE: + dev_info(&oct->pci_dev->dev, "%s Firmware debug enabled\n", + netdev->name); + break; + + case OCTNET_CMD_VERBOSE_DISABLE: + dev_info(&oct->pci_dev->dev, "%s Firmware debug disabled\n", + netdev->name); + break; + + case OCTNET_CMD_ENABLE_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n", + netdev->name); + break; + + case OCTNET_CMD_ADD_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n", + netdev->name, nctrl->ncmd.s.param1); + break; + + case OCTNET_CMD_DEL_VLAN_FILTER: + dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n", + netdev->name, nctrl->ncmd.s.param1); + break; + + case OCTNET_CMD_SET_SETTINGS: + dev_info(&oct->pci_dev->dev, "%s settings changed\n", + netdev->name); + + break; + + /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_RX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "RX Checksum Offload Enabled\n"); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_RXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "RX Checksum Offload Disabled\n"); + } + break; + + /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL" + * Command passed by NIC driver + */ + case OCTNET_CMD_TNL_TX_CSUM_CTL: + if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) { + netif_info(lio, probe, lio->netdev, + "TX Checksum Offload Enabled\n"); + } else if (nctrl->ncmd.s.param1 == + OCTNET_CMD_TXCSUM_DISABLE) { + netif_info(lio, probe, lio->netdev, + "TX Checksum Offload Disabled\n"); + } + break; + + /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG" + * Command passed by NIC driver + */ + case OCTNET_CMD_VXLAN_PORT_CONFIG: + if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) { + netif_info(lio, probe, lio->netdev, + "VxLAN Destination UDP PORT:%d ADDED\n", + nctrl->ncmd.s.param1); + } else if (nctrl->ncmd.s.more == + OCTNET_CMD_VXLAN_PORT_DEL) { + netif_info(lio, probe, lio->netdev, + "VxLAN Destination UDP PORT:%d DELETED\n", + nctrl->ncmd.s.param1); + } + break; + + case OCTNET_CMD_SET_FLOW_CTL: + netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n"); + break; + + default: + dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__, + nctrl->ncmd.s.cmd); + } +} diff --git a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c index 289eb8907922..f163e0abbeb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_ethtool.c @@ -32,6 +32,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" static int octnet_get_link_stats(struct net_device *netdev); @@ -75,6 +76,7 @@ enum { #define ARRAY_LENGTH(a) (sizeof(a) / sizeof((a)[0])) #define OCT_ETHTOOL_REGDUMP_LEN 4096 +#define OCT_ETHTOOL_REGDUMP_LEN_23XX (4096 * 11) #define OCT_ETHTOOL_REGSVER 1 /* statistics of PF */ @@ -188,6 +190,10 @@ static const char oct_droq_stats_strings[][ETH_GSTRING_LEN] = { "buffer_alloc_failure", }; +/* LiquidIO driver private flags */ +static const char oct_priv_flags_strings[][ETH_GSTRING_LEN] = { +}; + #define OCTNIC_NCMD_AUTONEG_ON 0x1 #define OCTNIC_NCMD_PHY_ON 0x2 @@ -259,6 +265,13 @@ lio_ethtool_get_channels(struct net_device *dev, max_tx = CFG_GET_IQ_MAX_Q(conf6x); rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf6x, lio->ifidx); tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + max_rx = CFG_GET_OQ_MAX_Q(conf23); + max_tx = CFG_GET_IQ_MAX_Q(conf23); + rx_count = CFG_GET_NUM_RXQS_NIC_IF(conf23, lio->ifidx); + tx_count = CFG_GET_NUM_TXQS_NIC_IF(conf23, lio->ifidx); } channel->max_rx = max_rx; @@ -290,18 +303,16 @@ lio_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom, struct lio *lio = GET_LIO(netdev); struct octeon_device *oct_dev = lio->oct_dev; struct octeon_board_info *board_info; - int len; - if (eeprom->offset != 0) + if (eeprom->offset) return -EINVAL; eeprom->magic = oct_dev->pci_dev->vendor; board_info = (struct octeon_board_info *)(&oct_dev->boardinfo); - len = - sprintf((char *)bytes, - "boardname:%s serialnum:%s maj:%lld min:%lld\n", - board_info->name, board_info->serial_number, - board_info->major, board_info->minor); + sprintf((char *)bytes, + "boardname:%s serialnum:%s maj:%lld min:%lld\n", + board_info->name, board_info->serial_number, + board_info->major, board_info->minor); return 0; } @@ -333,6 +344,32 @@ static int octnet_gpio_access(struct net_device *netdev, int addr, int val) return 0; } +static int octnet_id_active(struct net_device *netdev, int val) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + int ret = 0; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_ID_ACTIVE; + nctrl.ncmd.s.param1 = val; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to configure gpio value\n"); + return -EINVAL; + } + + return 0; +} + /* Callback for when mdio command response arrives */ static void octnet_mdio_resp_callback(struct octeon_device *oct, @@ -406,7 +443,7 @@ octnet_mdio45_access(struct lio *lio, int op, int loc, int *value) dev_err(&oct_dev->pci_dev->dev, "octnet_mdio45_access instruction failed status: %x\n", retval); - retval = -EBUSY; + retval = -EBUSY; } else { /* Sleep on a wait queue till the cond flag indicates that the * response arrived @@ -476,6 +513,11 @@ static int lio_set_phys_id(struct net_device *netdev, &value); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_ON); + + /* returns 0 since updates are asynchronous */ + return 0; } else { return -EINVAL; } @@ -521,7 +563,10 @@ static int lio_set_phys_id(struct net_device *netdev, &lio->phy_beacon_val); if (ret) return ret; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + octnet_id_active(netdev, LED_IDENTIFICATION_OFF); + return 0; } else { return -EINVAL; } @@ -550,6 +595,13 @@ lio_ethtool_get_ringparam(struct net_device *netdev, rx_max_pending = CN6XXX_MAX_OQ_DESCRIPTORS; rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf6x, lio->ifidx); tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf6x, lio->ifidx); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + tx_max_pending = CN23XX_MAX_IQ_DESCRIPTORS; + rx_max_pending = CN23XX_MAX_OQ_DESCRIPTORS; + rx_pending = CFG_GET_NUM_RX_DESCS_NIC_IF(conf23, lio->ifidx); + tx_pending = CFG_GET_NUM_TX_DESCS_NIC_IF(conf23, lio->ifidx); } if (lio->mtu > OCTNET_DEFAULT_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) { @@ -610,6 +662,69 @@ lio_get_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) pause->rx_pause = oct->rx_pause; } +static int +lio_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause) +{ + /* Notes: Not supporting any auto negotiation in these + * drivers. + */ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + struct octnic_ctrl_pkt nctrl; + struct oct_link_info *linfo = &lio->linfo; + + int ret = 0; + + if (oct->chip_id != OCTEON_CN23XX_PF_VID) + return -EINVAL; + + if (linfo->link.s.duplex == 0) { + /*no flow control for half duplex*/ + if (pause->rx_pause || pause->tx_pause) + return -EINVAL; + } + + /*do not support autoneg of link flow control*/ + if (pause->autoneg == AUTONEG_ENABLE) + return -EINVAL; + + memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + + nctrl.ncmd.u64 = 0; + nctrl.ncmd.s.cmd = OCTNET_CMD_SET_FLOW_CTL; + nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; + nctrl.wait_time = 100; + nctrl.netpndev = (u64)netdev; + nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; + + if (pause->rx_pause) { + /*enable rx pause*/ + nctrl.ncmd.s.param1 = 1; + } else { + /*disable rx pause*/ + nctrl.ncmd.s.param1 = 0; + } + + if (pause->tx_pause) { + /*enable tx pause*/ + nctrl.ncmd.s.param2 = 1; + } else { + /*disable tx pause*/ + nctrl.ncmd.s.param2 = 0; + } + + ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); + if (ret < 0) { + dev_err(&oct->pci_dev->dev, "Failed to set pause parameter\n"); + return -EINVAL; + } + + oct->rx_pause = pause->rx_pause; + oct->tx_pause = pause->tx_pause; + + return 0; +} + static void lio_get_ethtool_stats(struct net_device *netdev, struct ethtool_stats *stats __attribute__((unused)), @@ -877,6 +992,27 @@ lio_get_ethtool_stats(struct net_device *netdev, } } +static void lio_get_priv_flags_strings(struct lio *lio, u8 *data) +{ + struct octeon_device *oct_dev = lio->oct_dev; + int i; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + for (i = 0; i < ARRAY_SIZE(oct_priv_flags_strings); i++) { + sprintf(data, "%s", oct_priv_flags_strings[i]); + data += ETH_GSTRING_LEN; + } + break; + case OCTEON_CN68XX: + case OCTEON_CN66XX: + break; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + break; + } +} + static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) { struct lio *lio = GET_LIO(netdev); @@ -916,12 +1052,31 @@ static void lio_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } break; + case ETH_SS_PRIV_FLAGS: + lio_get_priv_flags_strings(lio, data); + break; default: netif_info(lio, drv, lio->netdev, "Unknown Stringset !!\n"); break; } } +static int lio_get_priv_flags_ss_count(struct lio *lio) +{ + struct octeon_device *oct_dev = lio->oct_dev; + + switch (oct_dev->chip_id) { + case OCTEON_CN23XX_PF_VID: + return ARRAY_SIZE(oct_priv_flags_strings); + case OCTEON_CN68XX: + case OCTEON_CN66XX: + return -EOPNOTSUPP; + default: + netif_info(lio, drv, lio->netdev, "Unknown Chip !!\n"); + return -EOPNOTSUPP; + } +} + static int lio_get_sset_count(struct net_device *netdev, int sset) { struct lio *lio = GET_LIO(netdev); @@ -932,6 +1087,8 @@ static int lio_get_sset_count(struct net_device *netdev, int sset) return (ARRAY_SIZE(oct_stats_strings) + ARRAY_SIZE(oct_iq_stats_strings) * oct_dev->num_iqs + ARRAY_SIZE(oct_droq_stats_strings) * oct_dev->num_oqs); + case ETH_SS_PRIV_FLAGS: + return lio_get_priv_flags_ss_count(lio); default: return -EOPNOTSUPP; } @@ -948,6 +1105,16 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intrmod_cfg = &oct->intrmod; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + if (!intrmod_cfg->rx_enable) { + intr_coal->rx_coalesce_usecs = intrmod_cfg->rx_usecs; + intr_coal->rx_max_coalesced_frames = + intrmod_cfg->rx_frames; + } + if (!intrmod_cfg->tx_enable) + intr_coal->tx_max_coalesced_frames = + intrmod_cfg->tx_frames; + break; case OCTEON_CN68XX: case OCTEON_CN66XX: { struct octeon_cn6xxx *cn6xxx = @@ -983,7 +1150,15 @@ static int lio_get_intr_coalesce(struct net_device *netdev, intr_coal->rx_coalesce_usecs_low = intrmod_cfg->rx_mintmr_trigger; intr_coal->rx_max_coalesced_frames_low = - intrmod_cfg->rx_mincnt_trigger; + intrmod_cfg->rx_mincnt_trigger; + } + if (OCTEON_CN23XX_PF(oct) && + (intrmod_cfg->tx_enable)) { + intr_coal->use_adaptive_tx_coalesce = intrmod_cfg->tx_enable; + intr_coal->tx_max_coalesced_frames_high = + intrmod_cfg->tx_maxcnt_trigger; + intr_coal->tx_max_coalesced_frames_low = + intrmod_cfg->tx_mincnt_trigger; } return 0; } @@ -1060,11 +1235,11 @@ static void octnet_nic_stats_callback(struct octeon_device *oct_dev, u32 status, void *ptr) { - struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; - struct oct_nic_stats_resp *resp = (struct oct_nic_stats_resp *) - sc->virtrptr; - struct oct_nic_stats_ctrl *ctrl = (struct oct_nic_stats_ctrl *) - sc->ctxptr; + struct octeon_soft_command *sc = (struct octeon_soft_command *)ptr; + struct oct_nic_stats_resp *resp = + (struct oct_nic_stats_resp *)sc->virtrptr; + struct oct_nic_stats_ctrl *ctrl = + (struct oct_nic_stats_ctrl *)sc->ctxptr; struct nic_rx_stats *rsp_rstats = &resp->stats.fromwire; struct nic_tx_stats *rsp_tstats = &resp->stats.fromhost; @@ -1314,14 +1489,35 @@ oct_cfg_rx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal) CFG_SET_OQ_INTR_PKT(cn6xxx->conf, rx_max_coalesced_frames); break; } + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->rx_max_coalesced_frames) + rx_max_coalesced_frames = oct->intrmod.rx_frames; + else + rx_max_coalesced_frames = + intr_coal->rx_max_coalesced_frames; + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (octeon_read_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no)) & + (0x3fffff00000000UL)) | + rx_max_coalesced_frames); + /*consider setting resend bit*/ + } + oct->intrmod.rx_frames = rx_max_coalesced_frames; + break; + } default: return -EINVAL; } return 0; } -static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce - *intr_coal) +static int oct_cfg_rx_intrtime(struct lio *lio, + struct ethtool_coalesce *intr_coal) { struct octeon_device *oct = lio->oct_dev; u32 time_threshold, rx_coalesce_usecs; @@ -1346,6 +1542,27 @@ static int oct_cfg_rx_intrtime(struct lio *lio, struct ethtool_coalesce CFG_SET_OQ_INTR_TIME(cn6xxx->conf, rx_coalesce_usecs); break; } + case OCTEON_CN23XX_PF_VID: { + u64 time_threshold; + int q_no; + + if (!intr_coal->rx_coalesce_usecs) + rx_coalesce_usecs = oct->intrmod.rx_usecs; + else + rx_coalesce_usecs = intr_coal->rx_coalesce_usecs; + time_threshold = + cn23xx_pf_get_oq_ticks(oct, (u32)rx_coalesce_usecs); + for (q_no = 0; q_no < oct->num_oqs; q_no++) { + q_no += oct->sriov_info.pf_srn; + octeon_write_csr64(oct, + CN23XX_SLI_OQ_PKT_INT_LEVELS(q_no), + (oct->intrmod.rx_frames | + (time_threshold << 32))); + /*consider writing to resend bit here*/ + } + oct->intrmod.rx_usecs = rx_coalesce_usecs; + break; + } default: return -EINVAL; } @@ -1358,12 +1575,37 @@ oct_cfg_tx_intrcnt(struct lio *lio, struct ethtool_coalesce *intr_coal __attribute__((unused))) { struct octeon_device *oct = lio->oct_dev; + u32 iq_intr_pkt; + void __iomem *inst_cnt_reg; + u64 val; /* Config Cnt based interrupt values */ switch (oct->chip_id) { case OCTEON_CN68XX: case OCTEON_CN66XX: break; + case OCTEON_CN23XX_PF_VID: { + int q_no; + + if (!intr_coal->tx_max_coalesced_frames) + iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD & + CN23XX_PKT_IN_DONE_WMARK_MASK; + else + iq_intr_pkt = intr_coal->tx_max_coalesced_frames & + CN23XX_PKT_IN_DONE_WMARK_MASK; + for (q_no = 0; q_no < oct->num_iqs; q_no++) { + inst_cnt_reg = (oct->instr_queue[q_no])->inst_cnt_reg; + val = readq(inst_cnt_reg); + /*clear wmark and count.dont want to write count back*/ + val = (val & 0xFFFF000000000000ULL) | + ((u64)iq_intr_pkt + << CN23XX_PKT_IN_DONE_WMARK_BIT_POS); + writeq(val, inst_cnt_reg); + /*consider setting resend bit*/ + } + oct->intrmod.tx_frames = iq_intr_pkt; + break; + } default: return -EINVAL; } @@ -1399,6 +1641,8 @@ static int lio_set_intr_coalesce(struct net_device *netdev, return -EINVAL; } break; + case OCTEON_CN23XX_PF_VID: + break; default: return -EINVAL; } @@ -1541,9 +1785,237 @@ static int lio_nway_reset(struct net_device *netdev) } /* Return register dump len. */ -static int lio_get_regs_len(struct net_device *dev __attribute__((unused))) +static int lio_get_regs_len(struct net_device *dev) { - return OCT_ETHTOOL_REGDUMP_LEN; + struct lio *lio = GET_LIO(dev); + struct octeon_device *oct = lio->oct_dev; + + switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + return OCT_ETHTOOL_REGDUMP_LEN_23XX; + default: + return OCT_ETHTOOL_REGDUMP_LEN; + } +} + +static int cn23xx_read_csr_reg(char *s, struct octeon_device *oct) +{ + u32 reg; + u8 pf_num = oct->pf_num; + int len = 0; + int i; + + /* PCI Window Registers */ + + len += sprintf(s + len, "\n\t Octeon CSR Registers\n\n"); + + /*0x29030 or 0x29040*/ + reg = CN23XX_SLI_PKT_MAC_RINFO64(oct->pcie_port, oct->pf_num); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_MAC%d_PF%d_RINFO): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27080 or 0x27090*/ + reg = CN23XX_SLI_MAC_PF_INT_ENB64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_ENB): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27000 or 0x27010*/ + reg = CN23XX_SLI_MAC_PF_INT_SUM64(oct->pcie_port, oct->pf_num); + len += + sprintf(s + len, "\n[%08x] (SLI_MAC%d_PF%d_INT_SUM): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29120*/ + reg = 0x29120; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_MEM_CTL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x27300*/ + reg = 0x27300 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf( + s + len, "\n[%08x] (SLI_MAC%d_PF%d_PKT_VF_INT): %016llx\n", reg, + oct->pcie_port, oct->pf_num, (u64)octeon_read_csr64(oct, reg)); + + /*0x27200*/ + reg = 0x27200 + oct->pcie_port * CN23XX_MAC_INT_OFFSET + + (oct->pf_num) * CN23XX_PF_INT_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_MAC%d_PF%d_PP_VF_INT): %016llx\n", + reg, oct->pcie_port, oct->pf_num, + (u64)octeon_read_csr64(oct, reg)); + + /*29130*/ + reg = CN23XX_SLI_PKT_CNT_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_CNT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29140*/ + reg = CN23XX_SLI_PKT_TIME_INT; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_TIME_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29160*/ + reg = 0x29160; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_INT): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29180*/ + reg = CN23XX_SLI_OQ_WMARK; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_OUTPUT_WMARK): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*0x291E0*/ + reg = CN23XX_SLI_PKT_IOQ_RING_RST; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_RING_RST): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29210*/ + reg = CN23XX_SLI_GBL_CONTROL; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_GBL_CONTROL): %016llx\n", reg, + (u64)octeon_read_csr64(oct, reg)); + + /*0x29220*/ + reg = 0x29220; + len += sprintf(s + len, "\n[%08x] (SLI_PKT_BIST_STATUS): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + + /*PF only*/ + if (pf_num == 0) { + /*0x29260*/ + reg = CN23XX_SLI_OUT_BP_EN_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } else if (pf_num == 1) { + /*0x29270*/ + reg = CN23XX_SLI_OUT_BP_EN2_W1S; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_OUT_BP_EN2_W1S): %016llx\n", + reg, (u64)octeon_read_csr64(oct, reg)); + } + + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BUFF_INFO_SIZE(i); + len += + sprintf(s + len, "\n[%08x] (SLI_PKT%d_OUT_SIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10080*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_CREDIT(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10090*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_SIZE(i); + len += sprintf( + s + len, "\n[%08x] (SLI_PKT%d_SLIST_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10050*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_CONTROL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d__OUTPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10070*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_BASE_ADDR64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_SLIST_BADDR): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100a0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKT_INT_LEVELS(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_INT_LEVELS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100b0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = CN23XX_SLI_OQ_PKTS_SENT(i); + len += sprintf(s + len, "\n[%08x] (SLI_PKT%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x100c0*/ + for (i = 0; i < CN23XX_MAX_OUTPUT_QUEUES; i++) { + reg = 0x100c0 + i * CN23XX_OQ_OFFSET; + len += sprintf(s + len, + "\n[%08x] (SLI_PKT%d_ERROR_INFO): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + + /*0x10000*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_PKT_CONTROL64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INPUT_CONTROL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10010*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_BASE_ADDR64(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BADDR): %016llx\n", reg, + i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10020*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_DOORBELL(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_BAOFF_DBELL): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10030*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) { + reg = CN23XX_SLI_IQ_SIZE(i); + len += sprintf( + s + len, + "\n[%08x] (SLI_PKT%d_INSTR_FIFO_RSIZE): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + /*0x10040*/ + for (i = 0; i < CN23XX_MAX_INPUT_QUEUES; i++) + reg = CN23XX_SLI_IQ_INSTR_COUNT64(i); + len += sprintf(s + len, + "\n[%08x] (SLI_PKT_IN_DONE%d_CNTS): %016llx\n", + reg, i, (u64)octeon_read_csr64(oct, reg)); + } + + return len; } static int cn6xxx_read_csr_reg(char *s, struct octeon_device *oct) @@ -1688,6 +2160,10 @@ static void lio_get_regs(struct net_device *dev, regs->version = OCT_ETHTOOL_REGSVER; switch (oct->chip_id) { + case OCTEON_CN23XX_PF_VID: + memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN_23XX); + len += cn23xx_read_csr_reg(regbuf + len, oct); + break; case OCTEON_CN68XX: case OCTEON_CN66XX: memset(regbuf, 0, OCT_ETHTOOL_REGDUMP_LEN); @@ -1729,6 +2205,7 @@ static const struct ethtool_ops lio_ethtool_ops = { .get_strings = lio_get_strings, .get_ethtool_stats = lio_get_ethtool_stats, .get_pauseparam = lio_get_pauseparam, + .set_pauseparam = lio_set_pauseparam, .get_regs_len = lio_get_regs_len, .get_regs = lio_get_regs, .get_msglevel = lio_get_msglevel, diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 20d6942edf40..afc6f9dc8119 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -21,11 +21,10 @@ **********************************************************************/ #include #include -#include -#include #include #include #include +#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" @@ -37,6 +36,7 @@ #include "cn66xx_regs.h" #include "cn66xx_device.h" #include "cn68xx_device.h" +#include "cn23xx_pf_device.h" #include "liquidio_image.h" MODULE_AUTHOR("Cavium Networks, "); @@ -52,11 +52,6 @@ module_param(ddr_timeout, int, 0644); MODULE_PARM_DESC(ddr_timeout, "Number of milliseconds to wait for DDR initialization. 0 waits for ddr_timeout to be set to non-zero value before starting to check"); -static u32 console_bitmask; -module_param(console_bitmask, int, 0644); -MODULE_PARM_DESC(console_bitmask, - "Bitmask indicating which consoles have debug output redirected to syslog."); - #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK) #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ @@ -102,6 +97,14 @@ struct liquidio_if_cfg_resp { u64 status; }; +struct liquidio_rx_ctl_context { + int octeon_id; + + wait_queue_head_t wc; + + int cond; +}; + struct oct_link_status_resp { u64 rh; struct oct_link_info link_info; @@ -139,7 +142,8 @@ union tx_info { #define OCTNIC_MAX_SG (MAX_SKB_FRAGS) #define OCTNIC_GSO_MAX_HEADER_SIZE 128 -#define OCTNIC_GSO_MAX_SIZE (GSO_MAX_SIZE - OCTNIC_GSO_MAX_HEADER_SIZE) +#define OCTNIC_GSO_MAX_SIZE \ + (CN23XX_DEFAULT_INPUT_JABBER - OCTNIC_GSO_MAX_HEADER_SIZE) /** Structure of a node in list of gather components maintained by * NIC driver for each network device. @@ -162,27 +166,6 @@ struct octnic_gather { u64 sg_dma_ptr; }; -/** This structure is used by NIC driver to store information required - * to free the sk_buff when the packet has been fetched by Octeon. - * Bytes offset below assume worst-case of a 64-bit system. - */ -struct octnet_buf_free_info { - /** Bytes 1-8. Pointer to network device private structure. */ - struct lio *lio; - - /** Bytes 9-16. Pointer to sk_buff. */ - struct sk_buff *skb; - - /** Bytes 17-24. Pointer to gather list. */ - struct octnic_gather *g; - - /** Bytes 25-32. Physical address of skb->data or gather list. */ - u64 dptr; - - /** Bytes 33-47. Piggybacked soft command, if any */ - struct octeon_soft_command *sc; -}; - struct handshake { struct completion init; struct completion started; @@ -198,6 +181,7 @@ struct octeon_device_priv { }; static int octeon_device_init(struct octeon_device *); +static int liquidio_stop(struct net_device *netdev); static void liquidio_remove(struct pci_dev *pdev); static int liquidio_probe(struct pci_dev *pdev, const struct pci_device_id *ent); @@ -219,6 +203,20 @@ static void octeon_droq_bh(unsigned long pdev) continue; reschedule |= octeon_droq_process_packets(oct, oct->droq[q_no], MAX_PACKET_BUDGET); + lio_enable_irq(oct->droq[q_no], NULL); + + if (OCTEON_CN23XX_PF(oct) && oct->msix_on) { + /* set time and cnt interrupt thresholds for this DROQ + * for NAPI + */ + int adjusted_q_no = q_no + oct->sriov_info.pf_srn; + + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKT_INT_LEVELS(adjusted_q_no), + 0x5700000040ULL); + octeon_write_csr64( + oct, CN23XX_SLI_OQ_PKTS_SENT(adjusted_q_no), 0); + } } if (reschedule) @@ -252,76 +250,6 @@ static int lio_wait_for_oq_pkts(struct octeon_device *oct) return pkt_cnt; } -void octeon_report_tx_completion_to_bql(void *txq, unsigned int pkts_compl, - unsigned int bytes_compl) -{ - struct netdev_queue *netdev_queue = txq; - - netdev_tx_completed_queue(netdev_queue, pkts_compl, bytes_compl); -} - -void octeon_update_tx_completion_counters(void *buf, int reqtype, - unsigned int *pkts_compl, - unsigned int *bytes_compl) -{ - struct octnet_buf_free_info *finfo; - struct sk_buff *skb = NULL; - struct octeon_soft_command *sc; - - switch (reqtype) { - case REQTYPE_NORESP_NET: - case REQTYPE_NORESP_NET_SG: - finfo = buf; - skb = finfo->skb; - break; - - case REQTYPE_RESP_NET_SG: - case REQTYPE_RESP_NET: - sc = buf; - skb = sc->callback_arg; - break; - - default: - return; - } - - (*pkts_compl)++; - *bytes_compl += skb->len; -} - -void octeon_report_sent_bytes_to_bql(void *buf, int reqtype) -{ - struct octnet_buf_free_info *finfo; - struct sk_buff *skb; - struct octeon_soft_command *sc; - struct netdev_queue *txq; - - switch (reqtype) { - case REQTYPE_NORESP_NET: - case REQTYPE_NORESP_NET_SG: - finfo = buf; - skb = finfo->skb; - break; - - case REQTYPE_RESP_NET_SG: - case REQTYPE_RESP_NET: - sc = buf; - skb = sc->callback_arg; - break; - - default: - return; - } - - txq = netdev_get_tx_queue(skb->dev, skb_get_queue_mapping(skb)); - netdev_tx_sent_queue(txq, skb->len); -} - -int octeon_console_debug_enabled(u32 console) -{ - return (console_bitmask >> (console)) & 0x1; -} - /** * \brief Forces all IO queues off on a given device * @param oct Pointer to Octeon device @@ -441,7 +369,7 @@ static void stop_pci_io(struct octeon_device *oct) pci_disable_device(oct->pci_dev); /* Disable interrupts */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); pcierror_quiesce_device(oct); @@ -570,6 +498,9 @@ static const struct pci_device_id liquidio_pci_tbl[] = { { /* 66xx */ PCI_VENDOR_ID_CAVIUM, 0x92, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, + { /* 23xx pf */ + PCI_VENDOR_ID_CAVIUM, 0x9702, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 + }, { 0, 0, 0, 0, 0, 0, 0 } @@ -587,7 +518,6 @@ static struct pci_driver liquidio_pci_driver = { .suspend = liquidio_suspend, .resume = liquidio_resume, #endif - }; /** @@ -935,6 +865,52 @@ static void print_link_info(struct net_device *netdev) } } +/** + * \brief Routine to notify MTU change + * @param work work_struct data structure + */ +static void octnet_link_status_change(struct work_struct *work) +{ + struct cavium_wk *wk = (struct cavium_wk *)work; + struct lio *lio = (struct lio *)wk->ctxptr; + + rtnl_lock(); + call_netdevice_notifiers(NETDEV_CHANGEMTU, lio->netdev); + rtnl_unlock(); +} + +/** + * \brief Sets up the mtu status change work + * @param netdev network device + */ +static inline int setup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + struct octeon_device *oct = lio->oct_dev; + + lio->link_status_wq.wq = alloc_workqueue("link-status", + WQ_MEM_RECLAIM, 0); + if (!lio->link_status_wq.wq) { + dev_err(&oct->pci_dev->dev, "unable to create cavium link status wq\n"); + return -1; + } + INIT_DELAYED_WORK(&lio->link_status_wq.wk.work, + octnet_link_status_change); + lio->link_status_wq.wk.ctxptr = lio; + + return 0; +} + +static inline void cleanup_link_status_change_wq(struct net_device *netdev) +{ + struct lio *lio = GET_LIO(netdev); + + if (lio->link_status_wq.wq) { + cancel_delayed_work_sync(&lio->link_status_wq.wk.work); + destroy_workqueue(lio->link_status_wq.wq); + } +} + /** * \brief Update link status * @param netdev network device @@ -973,8 +949,6 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) struct lio *lio; struct octeon_instr_queue *iq = oct->instr_queue[iq_num]; - /*octeon_update_iq_read_idx(oct, iq);*/ - netdev = oct->props[iq->ifidx].netdev; /* This is needed because the first IQ does not have @@ -1002,12 +976,32 @@ static void update_txq_status(struct octeon_device *oct, int iq_num) } } +static +int liquidio_schedule_msix_droq_pkt_handler(struct octeon_droq *droq, u64 ret) +{ + struct octeon_device *oct = droq->oct_dev; + struct octeon_device_priv *oct_priv = + (struct octeon_device_priv *)oct->priv; + + if (droq->ops.poll_mode) { + droq->ops.napi_fn(droq); + } else { + if (ret & MSIX_PO_INT) { + tasklet_schedule(&oct_priv->droq_tasklet); + return 1; + } + /* this will be flushed periodically by check iq db */ + if (ret & MSIX_PI_INT) + return 0; + } + return 0; +} + /** * \brief Droq packet processor sceduler * @param oct octeon device */ -static -void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) +static void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) { struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; @@ -1032,19 +1026,36 @@ void liquidio_schedule_droq_pkt_handlers(struct octeon_device *oct) } } +static irqreturn_t +liquidio_msix_intr_handler(int irq __attribute__((unused)), void *dev) +{ + u64 ret; + struct octeon_ioq_vector *ioq_vector = (struct octeon_ioq_vector *)dev; + struct octeon_device *oct = ioq_vector->oct_dev; + struct octeon_droq *droq = oct->droq[ioq_vector->droq_index]; + + ret = oct->fn_list.msix_interrupt_handler(ioq_vector); + + if ((ret & MSIX_PO_INT) || (ret & MSIX_PI_INT)) + liquidio_schedule_msix_droq_pkt_handler(droq, ret); + + return IRQ_HANDLED; +} + /** * \brief Interrupt handler for octeon * @param irq unused * @param dev octeon device */ static -irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) +irqreturn_t liquidio_legacy_intr_handler(int irq __attribute__((unused)), + void *dev) { struct octeon_device *oct = (struct octeon_device *)dev; irqreturn_t ret; /* Disable our interrupts for the duration of ISR */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); ret = oct->fn_list.process_interrupt_regs(oct); @@ -1053,7 +1064,7 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) /* Re-enable our interrupts */ if (!(atomic_read(&oct->status) == OCT_DEV_IN_RESET)) - oct->fn_list.enable_interrupt(oct->chip); + oct->fn_list.enable_interrupt(oct, OCTEON_ALL_INTR); return ret; } @@ -1067,22 +1078,204 @@ irqreturn_t liquidio_intr_handler(int irq __attribute__((unused)), void *dev) static int octeon_setup_interrupt(struct octeon_device *oct) { int irqret, err; + struct msix_entry *msix_entries; + int i; + int num_ioq_vectors; + int num_alloc_ioq_vectors; - err = pci_enable_msi(oct->pci_dev); - if (err) - dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n", - err); - else - oct->flags |= LIO_FLAG_MSI_ENABLED; + if (OCTEON_CN23XX_PF(oct) && oct->msix_on) { + oct->num_msix_irqs = oct->sriov_info.num_pf_rings; + /* one non ioq interrupt for handling sli_mac_pf_int_sum */ + oct->num_msix_irqs += 1; - irqret = request_irq(oct->pci_dev->irq, liquidio_intr_handler, - IRQF_SHARED, "octeon", oct); - if (irqret) { - if (oct->flags & LIO_FLAG_MSI_ENABLED) - pci_disable_msi(oct->pci_dev); - dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n", - irqret); - return 1; + oct->msix_entries = kcalloc( + oct->num_msix_irqs, sizeof(struct msix_entry), GFP_KERNEL); + if (!oct->msix_entries) + return 1; + + msix_entries = (struct msix_entry *)oct->msix_entries; + /*Assumption is that pf msix vectors start from pf srn to pf to + * trs and not from 0. if not change this code + */ + for (i = 0; i < oct->num_msix_irqs - 1; i++) + msix_entries[i].entry = oct->sriov_info.pf_srn + i; + msix_entries[oct->num_msix_irqs - 1].entry = + oct->sriov_info.trs; + num_alloc_ioq_vectors = pci_enable_msix_range( + oct->pci_dev, msix_entries, + oct->num_msix_irqs, + oct->num_msix_irqs); + if (num_alloc_ioq_vectors < 0) { + dev_err(&oct->pci_dev->dev, "unable to Allocate MSI-X interrupts\n"); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + dev_dbg(&oct->pci_dev->dev, "OCTEON: Enough MSI-X interrupts are allocated...\n"); + + num_ioq_vectors = oct->num_msix_irqs; + + /** For PF, there is one non-ioq interrupt handler */ + num_ioq_vectors -= 1; + irqret = request_irq(msix_entries[num_ioq_vectors].vector, + liquidio_legacy_intr_handler, 0, "octeon", + oct); + if (irqret) { + dev_err(&oct->pci_dev->dev, + "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", + irqret); + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + + for (i = 0; i < num_ioq_vectors; i++) { + irqret = request_irq(msix_entries[i].vector, + liquidio_msix_intr_handler, 0, + "octeon", &oct->ioq_vector[i]); + if (irqret) { + dev_err(&oct->pci_dev->dev, + "OCTEON: Request_irq failed for MSIX interrupt Error: %d\n", + irqret); + /** Freeing the non-ioq irq vector here . */ + free_irq(msix_entries[num_ioq_vectors].vector, + oct); + + while (i) { + i--; + /** clearing affinity mask. */ + irq_set_affinity_hint( + msix_entries[i].vector, NULL); + free_irq(msix_entries[i].vector, + &oct->ioq_vector[i]); + } + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + return 1; + } + oct->ioq_vector[i].vector = msix_entries[i].vector; + /* assign the cpu mask for this msix interrupt vector */ + irq_set_affinity_hint( + msix_entries[i].vector, + (&oct->ioq_vector[i].affinity_mask)); + } + dev_dbg(&oct->pci_dev->dev, "OCTEON[%d]: MSI-X enabled\n", + oct->octeon_id); + } else { + err = pci_enable_msi(oct->pci_dev); + if (err) + dev_warn(&oct->pci_dev->dev, "Reverting to legacy interrupts. Error: %d\n", + err); + else + oct->flags |= LIO_FLAG_MSI_ENABLED; + + irqret = request_irq(oct->pci_dev->irq, + liquidio_legacy_intr_handler, IRQF_SHARED, + "octeon", oct); + if (irqret) { + if (oct->flags & LIO_FLAG_MSI_ENABLED) + pci_disable_msi(oct->pci_dev); + dev_err(&oct->pci_dev->dev, "Request IRQ failed with code: %d\n", + irqret); + return 1; + } + } + return 0; +} + +static int liquidio_watchdog(void *param) +{ + u64 wdog; + u16 mask_of_stuck_cores = 0; + u16 mask_of_crashed_cores = 0; + int core_num; + u8 core_is_stuck[LIO_MAX_CORES]; + u8 core_crashed[LIO_MAX_CORES]; + struct octeon_device *oct = param; + + memset(core_is_stuck, 0, sizeof(core_is_stuck)); + memset(core_crashed, 0, sizeof(core_crashed)); + + while (!kthread_should_stop()) { + mask_of_crashed_cores = + (u16)octeon_read_csr64(oct, CN23XX_SLI_SCRATCH2); + + for (core_num = 0; core_num < LIO_MAX_CORES; core_num++) { + if (!core_is_stuck[core_num]) { + wdog = lio_pci_readq(oct, CIU3_WDOG(core_num)); + + /* look at watchdog state field */ + wdog &= CIU3_WDOG_MASK; + if (wdog) { + /* this watchdog timer has expired */ + core_is_stuck[core_num] = + LIO_MONITOR_WDOG_EXPIRE; + mask_of_stuck_cores |= (1 << core_num); + } + } + + if (!core_crashed[core_num]) + core_crashed[core_num] = + (mask_of_crashed_cores >> core_num) & 1; + } + + if (mask_of_stuck_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_is_stuck[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d is stuck!\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_is_stuck[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } + + if (mask_of_crashed_cores) { + for (core_num = 0; core_num < LIO_MAX_CORES; + core_num++) { + if (core_crashed[core_num] == 1) { + dev_err(&oct->pci_dev->dev, + "ERROR: Octeon core %d crashed! See oct-fwdump for details.\n", + core_num); + /* 2 means we have printk'd an error + * so no need to repeat the same printk + */ + core_crashed[core_num] = + LIO_MONITOR_CORE_STUCK_MSGD; + } + } + } +#ifdef CONFIG_MODULE_UNLOAD + if (mask_of_stuck_cores || mask_of_crashed_cores) { + /* make module refcount=0 so that rmmod will work */ + long refcount; + + refcount = module_refcount(THIS_MODULE); + + while (refcount > 0) { + module_put(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + + /* compensate for and withstand an unlikely (but still + * possible) race condition + */ + while (refcount < 0) { + try_module_get(THIS_MODULE); + refcount = module_refcount(THIS_MODULE); + } + } +#endif + /* sleep for two seconds */ + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(2 * HZ); } return 0; @@ -1107,6 +1300,9 @@ liquidio_probe(struct pci_dev *pdev, return -ENOMEM; } + if (pdev->device == OCTEON_CN23XX_PF_VID) + oct_dev->msix_on = LIO_FLAG_MSIX_ENABLED; + dev_info(&pdev->dev, "Initializing device %x:%x.\n", (u32)pdev->vendor, (u32)pdev->device); @@ -1130,6 +1326,30 @@ liquidio_probe(struct pci_dev *pdev, return -ENOMEM; } + if (OCTEON_CN23XX_PF(oct_dev)) { + u64 scratch1; + u8 bus, device, function; + + scratch1 = octeon_read_csr64(oct_dev, CN23XX_SLI_SCRATCH1); + if (!(scratch1 & 4ULL)) { + /* Bit 2 of SLI_SCRATCH_1 is a flag that indicates that + * the lio watchdog kernel thread is running for this + * NIC. Each NIC gets one watchdog kernel thread. + */ + scratch1 |= 4ULL; + octeon_write_csr64(oct_dev, CN23XX_SLI_SCRATCH1, + scratch1); + + bus = pdev->bus->number; + device = PCI_SLOT(pdev->devfn); + function = PCI_FUNC(pdev->devfn); + oct_dev->watchdog_task = kthread_create( + liquidio_watchdog, oct_dev, + "liowd/%02hhx:%02hhx.%hhx", bus, device, function); + wake_up_process(oct_dev->watchdog_task); + } + } + oct_dev->rx_pause = 1; oct_dev->tx_pause = 1; @@ -1146,6 +1366,7 @@ liquidio_probe(struct pci_dev *pdev, static void octeon_destroy_resources(struct octeon_device *oct) { int i; + struct msix_entry *msix_entries; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)oct->priv; @@ -1190,21 +1411,40 @@ static void octeon_destroy_resources(struct octeon_device *oct) dev_err(&oct->pci_dev->dev, "OQ had pending packets\n"); /* Disable interrupts */ - oct->fn_list.disable_interrupt(oct->chip); + oct->fn_list.disable_interrupt(oct, OCTEON_ALL_INTR); - /* Release the interrupt line */ - free_irq(oct->pci_dev->irq, oct); + if (oct->msix_on) { + msix_entries = (struct msix_entry *)oct->msix_entries; + for (i = 0; i < oct->num_msix_irqs - 1; i++) { + /* clear the affinity_cpumask */ + irq_set_affinity_hint(msix_entries[i].vector, + NULL); + free_irq(msix_entries[i].vector, + &oct->ioq_vector[i]); + } + /* non-iov vector's argument is oct struct */ + free_irq(msix_entries[i].vector, oct); - if (oct->flags & LIO_FLAG_MSI_ENABLED) - pci_disable_msi(oct->pci_dev); + pci_disable_msix(oct->pci_dev); + kfree(oct->msix_entries); + oct->msix_entries = NULL; + } else { + /* Release the interrupt line */ + free_irq(oct->pci_dev->irq, oct); - /* fallthrough */ + if (oct->flags & LIO_FLAG_MSI_ENABLED) + pci_disable_msi(oct->pci_dev); + } + + if (OCTEON_CN23XX_PF(oct)) + octeon_free_ioq_vector(oct); + /* fallthrough */ case OCT_DEV_IN_RESET: case OCT_DEV_DROQ_INIT_DONE: /*atomic_set(&oct->status, OCT_DEV_DROQ_INIT_DONE);*/ mdelay(100); for (i = 0; i < MAX_OCTEON_OUTPUT_QUEUES(oct); i++) { - if (!(oct->io_qmask.oq & (1ULL << i))) + if (!(oct->io_qmask.oq & BIT_ULL(i))) continue; octeon_delete_droq(oct, i); } @@ -1225,17 +1465,16 @@ static void octeon_destroy_resources(struct octeon_device *oct) case OCT_DEV_RESP_LIST_INIT_DONE: octeon_delete_response_list(oct); - /* fallthrough */ - case OCT_DEV_SC_BUFF_POOL_INIT_DONE: - octeon_free_sc_buffer_pool(oct); - /* fallthrough */ case OCT_DEV_INSTR_QUEUE_INIT_DONE: for (i = 0; i < MAX_OCTEON_INSTR_QUEUES(oct); i++) { - if (!(oct->io_qmask.iq & (1ULL << i))) + if (!(oct->io_qmask.iq & BIT_ULL(i))) continue; octeon_delete_instr_queue(oct, i); } + /* fallthrough */ + case OCT_DEV_SC_BUFF_POOL_INIT_DONE: + octeon_free_sc_buffer_pool(oct); /* fallthrough */ case OCT_DEV_DISPATCH_INIT_DONE: @@ -1244,9 +1483,9 @@ static void octeon_destroy_resources(struct octeon_device *oct) /* fallthrough */ case OCT_DEV_PCI_MAP_DONE: - /* Soft reset the octeon device before exiting */ - oct->fn_list.soft_reset(oct); + if ((!OCTEON_CN23XX_PF(oct)) || !oct->octeon_id) + oct->fn_list.soft_reset(oct); octeon_unmap_pci_barx(oct, 0); octeon_unmap_pci_barx(oct, 1); @@ -1263,6 +1502,34 @@ static void octeon_destroy_resources(struct octeon_device *oct) tasklet_kill(&oct_priv->droq_tasklet); } +/** + * \brief Callback for rx ctrl + * @param status status of request + * @param buf pointer to resp structure + */ +static void rx_ctl_callback(struct octeon_device *oct, + u32 status, + void *buf) +{ + struct octeon_soft_command *sc = (struct octeon_soft_command *)buf; + struct liquidio_rx_ctl_context *ctx; + + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + oct = lio_get_device(ctx->octeon_id); + if (status) + dev_err(&oct->pci_dev->dev, "rx ctl instruction failed. Status: %llx\n", + CVM_CAST64(status)); + WRITE_ONCE(ctx->cond, 1); + + /* This barrier is required to be sure that the response has been + * written fully before waking up the handler + */ + wmb(); + + wake_up_interruptible(&ctx->wc); +} + /** * \brief Send Rx control command * @param lio per-network private data @@ -1270,17 +1537,55 @@ static void octeon_destroy_resources(struct octeon_device *oct) */ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) { - struct octnic_ctrl_pkt nctrl; + struct octeon_soft_command *sc; + struct liquidio_rx_ctl_context *ctx; + union octnet_cmd *ncmd; + int ctx_size = sizeof(struct liquidio_rx_ctl_context); + struct octeon_device *oct = (struct octeon_device *)lio->oct_dev; + int retval; - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); + if (oct->props[lio->ifidx].rx_on == start_stop) + return; - nctrl.ncmd.s.cmd = OCTNET_CMD_RX_CTL; - nctrl.ncmd.s.param1 = start_stop; - nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.netpndev = (u64)lio->netdev; + sc = (struct octeon_soft_command *) + octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, + 16, ctx_size); - if (octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl) < 0) + ncmd = (union octnet_cmd *)sc->virtdptr; + ctx = (struct liquidio_rx_ctl_context *)sc->ctxptr; + + WRITE_ONCE(ctx->cond, 0); + ctx->octeon_id = lio_get_device_id(oct); + init_waitqueue_head(&ctx->wc); + + ncmd->u64 = 0; + ncmd->s.cmd = OCTNET_CMD_RX_CTL; + ncmd->s.param1 = start_stop; + + octeon_swap_8B_data((u64 *)ncmd, (OCTNET_CMD_SIZE >> 3)); + + sc->iq_no = lio->linfo.txpciq[0].s.q_no; + + octeon_prepare_soft_command(oct, sc, OPCODE_NIC, + OPCODE_NIC_CMD, 0, 0, 0); + + sc->callback = rx_ctl_callback; + sc->callback_arg = sc; + sc->wait_time = 5000; + + retval = octeon_send_soft_command(oct, sc); + if (retval == IQ_SEND_FAILED) { netif_info(lio, rx_err, lio->netdev, "Failed to send RX Control message\n"); + } else { + /* Sleep on a wait queue till the cond flag indicates that the + * response arrived or timed-out. + */ + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) + return; + oct->props[lio->ifidx].rx_on = start_stop; + } + + octeon_free_soft_command(oct, sc); } /** @@ -1307,21 +1612,24 @@ static void liquidio_destroy_nic_device(struct octeon_device *oct, int ifidx) dev_dbg(&oct->pci_dev->dev, "NIC device cleanup\n"); - send_rx_ctrl_cmd(lio, 0); - if (atomic_read(&lio->ifstate) & LIO_IFSTATE_RUNNING) - txqs_stop(netdev); + liquidio_stop(netdev); if (oct->props[lio->ifidx].napi_enabled == 1) { list_for_each_entry_safe(napi, n, &netdev->napi_list, dev_list) napi_disable(napi); oct->props[lio->ifidx].napi_enabled = 0; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 0; } if (atomic_read(&lio->ifstate) & LIO_IFSTATE_REGISTERED) unregister_netdev(netdev); + cleanup_link_status_change_wq(netdev); + delete_glists(lio); free_netdev(netdev); @@ -1374,6 +1682,9 @@ static void liquidio_remove(struct pci_dev *pdev) dev_dbg(&oct_dev->pci_dev->dev, "Stopping device\n"); + if (oct_dev->watchdog_task) + kthread_stop(oct_dev->watchdog_task); + if (oct_dev->app_mode && (oct_dev->app_mode == CVM_DRV_NIC_APP)) liquidio_stop_nic_module(oct_dev); @@ -1417,6 +1728,12 @@ static int octeon_chip_specific_setup(struct octeon_device *oct) s = "CN66XX"; break; + case OCTEON_CN23XX_PCIID_PF: + oct->chip_id = OCTEON_CN23XX_PF_VID; + ret = setup_cn23xx_octeon_pf_device(oct); + s = "CN23XX"; + break; + default: s = "?"; dev_err(&oct->pci_dev->dev, "Unknown device found (dev_id: %x)\n", @@ -1867,7 +2184,7 @@ static void if_cfg_callback(struct octeon_device *oct, struct liquidio_if_cfg_context *ctx; resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; - ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; oct = lio_get_device(ctx->octeon_id); if (resp->status) @@ -2060,11 +2377,14 @@ static void napi_schedule_wrapper(void *param) */ static void liquidio_napi_drv_callback(void *arg) { + struct octeon_device *oct; struct octeon_droq *droq = arg; int this_cpu = smp_processor_id(); - if (droq->cpu_id == this_cpu) { - napi_schedule(&droq->napi); + oct = droq->oct_dev; + + if (OCTEON_CN23XX_PF(oct) || droq->cpu_id == this_cpu) { + napi_schedule_irqoff(&droq->napi); } else { struct call_single_data *csd = &droq->csd; @@ -2173,17 +2493,15 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, lio->ifidx), NULL); if (retval) { dev_err(&octeon_dev->pci_dev->dev, - " %s : Runtime DROQ(RxQ) creation failed.\n", + "%s : Runtime DROQ(RxQ) creation failed.\n", __func__); return 1; } droq = octeon_dev->droq[q_no]; napi = &droq->napi; - dev_dbg(&octeon_dev->pci_dev->dev, - "netif_napi_add netdev:%llx oct:%llx\n", - (u64)netdev, - (u64)octeon_dev); + dev_dbg(&octeon_dev->pci_dev->dev, "netif_napi_add netdev:%llx oct:%llx pf_num:%d\n", + (u64)netdev, (u64)octeon_dev, octeon_dev->pf_num); netif_napi_add(netdev, napi, liquidio_napi_poll, 64); /* designate a CPU for this droq */ @@ -2195,6 +2513,14 @@ static inline int setup_io_queues(struct octeon_device *octeon_dev, octeon_register_droq_ops(octeon_dev, q_no, &droq_ops); } + if (OCTEON_CN23XX_PF(octeon_dev)) { + /* 23XX PF can receive control messages (via the first PF-owned + * droq) from the firmware even if the ethX interface is down, + * so that's why poll_mode must be off for the first droq. + */ + octeon_dev->droq[0]->ops.poll_mode = 0; + } + /* set up IQs. */ for (q = 0; q < lio->linfo.num_txpciq; q++) { num_tx_descs = CFG_GET_NUM_TX_DESCS_NIC_IF(octeon_get_conf @@ -2235,7 +2561,7 @@ static void octnet_poll_check_txq_status(struct work_struct *work) * \brief Sets up the txq poll check * @param netdev network device */ -static inline void setup_tx_poll_fn(struct net_device *netdev) +static inline int setup_tx_poll_fn(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; @@ -2244,21 +2570,24 @@ static inline void setup_tx_poll_fn(struct net_device *netdev) WQ_MEM_RECLAIM, 0); if (!lio->txq_status_wq.wq) { dev_err(&oct->pci_dev->dev, "unable to create cavium txq status wq\n"); - return; + return -1; } INIT_DELAYED_WORK(&lio->txq_status_wq.wk.work, octnet_poll_check_txq_status); lio->txq_status_wq.wk.ctxptr = lio; queue_delayed_work(lio->txq_status_wq.wq, &lio->txq_status_wq.wk.work, msecs_to_jiffies(1)); + return 0; } static inline void cleanup_tx_poll_fn(struct net_device *netdev) { struct lio *lio = GET_LIO(netdev); - cancel_delayed_work_sync(&lio->txq_status_wq.wk.work); - destroy_workqueue(lio->txq_status_wq.wq); + if (lio->txq_status_wq.wq) { + cancel_delayed_work_sync(&lio->txq_status_wq.wk.work); + destroy_workqueue(lio->txq_status_wq.wq); + } } /** @@ -2276,24 +2605,34 @@ static int liquidio_open(struct net_device *netdev) napi_enable(napi); oct->props[lio->ifidx].napi_enabled = 1; + + if (OCTEON_CN23XX_PF(oct)) + oct->droq[0]->ops.poll_mode = 1; } oct_ptp_open(netdev); ifstate_set(lio, LIO_IFSTATE_RUNNING); - setup_tx_poll_fn(netdev); - - start_txq(netdev); + /* Ready for link status updates */ + lio->intf_open = 1; netif_info(lio, ifup, lio->netdev, "Interface Open, ready for traffic\n"); + if (OCTEON_CN23XX_PF(oct)) { + if (!oct->msix_on) + if (setup_tx_poll_fn(netdev)) + return -1; + } else { + if (setup_tx_poll_fn(netdev)) + return -1; + } + + start_txq(netdev); + /* tell Octeon to start forwarding packets to host */ send_rx_ctrl_cmd(lio, 1); - /* Ready for link status updates */ - lio->intf_open = 1; - dev_info(&oct->pci_dev->dev, "%s interface is opened\n", netdev->name); @@ -2328,7 +2667,12 @@ static int liquidio_stop(struct net_device *netdev) /* Now it should be safe to tell Octeon that nic interface is down. */ send_rx_ctrl_cmd(lio, 0); - cleanup_tx_poll_fn(netdev); + if (OCTEON_CN23XX_PF(oct)) { + if (!oct->msix_on) + cleanup_tx_poll_fn(netdev); + } else { + cleanup_tx_poll_fn(netdev); + } if (lio->ptp_clock) { ptp_clock_unregister(lio->ptp_clock); @@ -2340,143 +2684,6 @@ static int liquidio_stop(struct net_device *netdev) return 0; } -void liquidio_link_ctrl_cmd_completion(void *nctrl_ptr) -{ - struct octnic_ctrl_pkt *nctrl = (struct octnic_ctrl_pkt *)nctrl_ptr; - struct net_device *netdev = (struct net_device *)nctrl->netpndev; - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct = lio->oct_dev; - u8 *mac; - - switch (nctrl->ncmd.s.cmd) { - case OCTNET_CMD_CHANGE_DEVFLAGS: - case OCTNET_CMD_SET_MULTI_LIST: - break; - - case OCTNET_CMD_CHANGE_MACADDR: - mac = ((u8 *)&nctrl->udd[0]) + 2; - netif_info(lio, probe, lio->netdev, - "%s %2.2x:%2.2x:%2.2x:%2.2x:%2.2x:%2.2x\n", - "MACAddr changed to", mac[0], mac[1], - mac[2], mac[3], mac[4], mac[5]); - break; - - case OCTNET_CMD_CHANGE_MTU: - /* If command is successful, change the MTU. */ - netif_info(lio, probe, lio->netdev, " MTU Changed from %d to %d\n", - netdev->mtu, nctrl->ncmd.s.param1); - dev_info(&oct->pci_dev->dev, "%s MTU Changed from %d to %d\n", - netdev->name, netdev->mtu, - nctrl->ncmd.s.param1); - rtnl_lock(); - netdev->mtu = nctrl->ncmd.s.param1; - call_netdevice_notifiers(NETDEV_CHANGEMTU, netdev); - rtnl_unlock(); - break; - - case OCTNET_CMD_GPIO_ACCESS: - netif_info(lio, probe, lio->netdev, "LED Flashing visual identification\n"); - - break; - - case OCTNET_CMD_LRO_ENABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); - break; - - case OCTNET_CMD_LRO_DISABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", - netdev->name); - break; - - case OCTNET_CMD_VERBOSE_ENABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Enabled\n", netdev->name); - break; - - case OCTNET_CMD_VERBOSE_DISABLE: - dev_info(&oct->pci_dev->dev, "%s LRO Disabled\n", - netdev->name); - break; - - case OCTNET_CMD_ENABLE_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter enabled\n", - netdev->name); - break; - - case OCTNET_CMD_ADD_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter %d added\n", - netdev->name, nctrl->ncmd.s.param1); - break; - - case OCTNET_CMD_DEL_VLAN_FILTER: - dev_info(&oct->pci_dev->dev, "%s VLAN filter %d removed\n", - netdev->name, nctrl->ncmd.s.param1); - break; - - case OCTNET_CMD_SET_SETTINGS: - dev_info(&oct->pci_dev->dev, "%s settings changed\n", - netdev->name); - - break; - /* Case to handle "OCTNET_CMD_TNL_RX_CSUM_CTL" - * Command passed by NIC driver - */ - case OCTNET_CMD_TNL_RX_CSUM_CTL: - if (nctrl->ncmd.s.param1 == OCTNET_CMD_RXCSUM_ENABLE) { - netif_info(lio, probe, lio->netdev, - "%s RX Checksum Offload Enabled\n", - netdev->name); - } else if (nctrl->ncmd.s.param1 == - OCTNET_CMD_RXCSUM_DISABLE) { - netif_info(lio, probe, lio->netdev, - "%s RX Checksum Offload Disabled\n", - netdev->name); - } - break; - - /* Case to handle "OCTNET_CMD_TNL_TX_CSUM_CTL" - * Command passed by NIC driver - */ - case OCTNET_CMD_TNL_TX_CSUM_CTL: - if (nctrl->ncmd.s.param1 == OCTNET_CMD_TXCSUM_ENABLE) { - netif_info(lio, probe, lio->netdev, - "%s TX Checksum Offload Enabled\n", - netdev->name); - } else if (nctrl->ncmd.s.param1 == - OCTNET_CMD_TXCSUM_DISABLE) { - netif_info(lio, probe, lio->netdev, - "%s TX Checksum Offload Disabled\n", - netdev->name); - } - break; - - /* Case to handle "OCTNET_CMD_VXLAN_PORT_CONFIG" - * Command passed by NIC driver - */ - case OCTNET_CMD_VXLAN_PORT_CONFIG: - if (nctrl->ncmd.s.more == OCTNET_CMD_VXLAN_PORT_ADD) { - netif_info(lio, probe, lio->netdev, - "%s VxLAN Destination UDP PORT:%d ADDED\n", - netdev->name, - nctrl->ncmd.s.param1); - } else if (nctrl->ncmd.s.more == - OCTNET_CMD_VXLAN_PORT_DEL) { - netif_info(lio, probe, lio->netdev, - "%s VxLAN Destination UDP PORT:%d DELETED\n", - netdev->name, - nctrl->ncmd.s.param1); - } - break; - - case OCTNET_CMD_SET_FLOW_CTL: - netif_info(lio, probe, lio->netdev, "Set RX/TX flow control parameters\n"); - break; - - default: - dev_err(&oct->pci_dev->dev, "%s Unknown cmd %d\n", __func__, - nctrl->ncmd.s.cmd); - } -} - /** * \brief Converts a mask based on net device flags * @param netdev network device @@ -2817,8 +3024,7 @@ static void handle_timestamp(struct octeon_device *oct, */ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, struct octnic_data_pkt *ndata, - struct octnet_buf_free_info *finfo, - int xmit_more) + struct octnet_buf_free_info *finfo) { int retval; struct octeon_soft_command *sc; @@ -2846,9 +3052,15 @@ static inline int send_nic_timestamp_pkt(struct octeon_device *oct, sc->callback_arg = finfo->skb; sc->iq_no = ndata->q_no; - len = (u32)((struct octeon_instr_ih2 *)(&sc->cmd.cmd2.ih2))->dlengsz; + if (OCTEON_CN23XX_PF(oct)) + len = (u32)((struct octeon_instr_ih3 *) + (&sc->cmd.cmd3.ih3))->dlengsz; + else + len = (u32)((struct octeon_instr_ih2 *) + (&sc->cmd.cmd2.ih2))->dlengsz; + + ring_doorbell = 1; - ring_doorbell = !xmit_more; retval = octeon_send_command(oct, sc->iq_no, ring_doorbell, &sc->cmd, sc, len, ndata->reqtype); @@ -2881,7 +3093,7 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) union tx_info *tx_info; int status = 0; int q_idx = 0, iq_no = 0; - int xmit_more, j; + int j; u64 dptr = 0; u32 tag = 0; @@ -2980,7 +3192,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) return NETDEV_TX_BUSY; } - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; ndata.reqtype = REQTYPE_NORESP_NET; @@ -3055,15 +3270,23 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) g->sg_size, DMA_TO_DEVICE); dptr = g->sg_dma_ptr; - ndata.cmd.cmd2.dptr = dptr; + if (OCTEON_CN23XX_PF(oct)) + ndata.cmd.cmd3.dptr = dptr; + else + ndata.cmd.cmd2.dptr = dptr; finfo->dptr = dptr; finfo->g = g; ndata.reqtype = REQTYPE_NORESP_NET_SG; } - irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; - tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + if (OCTEON_CN23XX_PF(oct)) { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd3.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd3.ossp[0]; + } else { + irh = (struct octeon_instr_irh *)&ndata.cmd.cmd2.irh; + tx_info = (union tx_info *)&ndata.cmd.cmd2.ossp[0]; + } if (skb_shinfo(skb)->gso_size) { tx_info->s.gso_size = skb_shinfo(skb)->gso_size; @@ -3077,12 +3300,10 @@ static int liquidio_xmit(struct sk_buff *skb, struct net_device *netdev) irh->vlan = skb_vlan_tag_get(skb) & 0xfff; } - xmit_more = skb->xmit_more; - if (unlikely(cmdsetup.s.timestamp)) - status = send_nic_timestamp_pkt(oct, &ndata, finfo, xmit_more); + status = send_nic_timestamp_pkt(oct, &ndata, finfo); else - status = octnet_send_nic_data_pkt(oct, &ndata, xmit_more); + status = octnet_send_nic_data_pkt(oct, &ndata); if (status == IQ_SEND_FAILED) goto lio_xmit_failed; @@ -3190,8 +3411,8 @@ static int liquidio_vlan_rx_kill_vid(struct net_device *netdev, * OCTNET_CMD_RXCSUM_DISABLE * @returns SUCCESS or FAILURE */ -int liquidio_set_rxcsum_command(struct net_device *netdev, int command, - u8 rx_cmd) +static int liquidio_set_rxcsum_command(struct net_device *netdev, int command, + u8 rx_cmd) { struct lio *lio = GET_LIO(netdev); struct octeon_device *oct = lio->oct_dev; @@ -3249,31 +3470,6 @@ static int liquidio_vxlan_port_command(struct net_device *netdev, int command, return ret; } -int liquidio_set_feature(struct net_device *netdev, int cmd, u16 param1) -{ - struct lio *lio = GET_LIO(netdev); - struct octeon_device *oct = lio->oct_dev; - struct octnic_ctrl_pkt nctrl; - int ret = 0; - - memset(&nctrl, 0, sizeof(struct octnic_ctrl_pkt)); - - nctrl.ncmd.u64 = 0; - nctrl.ncmd.s.cmd = cmd; - nctrl.ncmd.s.param1 = param1; - nctrl.iq_no = lio->linfo.txpciq[0].s.q_no; - nctrl.wait_time = 100; - nctrl.netpndev = (u64)netdev; - nctrl.cb_fn = liquidio_link_ctrl_cmd_completion; - - ret = octnet_send_nic_ctrl_pkt(lio->oct_dev, &nctrl); - if (ret < 0) { - dev_err(&oct->pci_dev->dev, "Feature change failed in core (ret: 0x%x)\n", - ret); - } - return ret; -} - /** \brief Net device fix features * @param netdev pointer to network device * @param request features requested @@ -3492,8 +3688,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) union oct_nic_if_cfg if_cfg; unsigned int base_queue; unsigned int gmx_port_id; - u32 resp_size, ctx_size; + u32 resp_size, ctx_size, data_size; u32 ifidx_or_pfnum; + struct lio_version *vdata; /* This is to handle link status changes */ octeon_register_dispatch_fn(octeon_dev, OPCODE_NIC, @@ -3515,21 +3712,37 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) for (i = 0; i < octeon_dev->ifcount; i++) { resp_size = sizeof(struct liquidio_if_cfg_resp); ctx_size = sizeof(struct liquidio_if_cfg_context); + data_size = sizeof(struct lio_version); sc = (struct octeon_soft_command *) - octeon_alloc_soft_command(octeon_dev, 0, + octeon_alloc_soft_command(octeon_dev, data_size, resp_size, ctx_size); resp = (struct liquidio_if_cfg_resp *)sc->virtrptr; ctx = (struct liquidio_if_cfg_context *)sc->ctxptr; + vdata = (struct lio_version *)sc->virtdptr; - num_iqueues = - CFG_GET_NUM_TXQS_NIC_IF(octeon_get_conf(octeon_dev), i); - num_oqueues = - CFG_GET_NUM_RXQS_NIC_IF(octeon_get_conf(octeon_dev), i); - base_queue = - CFG_GET_BASE_QUE_NIC_IF(octeon_get_conf(octeon_dev), i); - gmx_port_id = - CFG_GET_GMXID_NIC_IF(octeon_get_conf(octeon_dev), i); - ifidx_or_pfnum = i; + *((u64 *)vdata) = 0; + vdata->major = cpu_to_be16(LIQUIDIO_BASE_MAJOR_VERSION); + vdata->minor = cpu_to_be16(LIQUIDIO_BASE_MINOR_VERSION); + vdata->micro = cpu_to_be16(LIQUIDIO_BASE_MICRO_VERSION); + + if (OCTEON_CN23XX_PF(octeon_dev)) { + num_iqueues = octeon_dev->sriov_info.num_pf_rings; + num_oqueues = octeon_dev->sriov_info.num_pf_rings; + base_queue = octeon_dev->sriov_info.pf_srn; + + gmx_port_id = octeon_dev->pf_num; + ifidx_or_pfnum = octeon_dev->pf_num; + } else { + num_iqueues = CFG_GET_NUM_TXQS_NIC_IF( + octeon_get_conf(octeon_dev), i); + num_oqueues = CFG_GET_NUM_RXQS_NIC_IF( + octeon_get_conf(octeon_dev), i); + base_queue = CFG_GET_BASE_QUE_NIC_IF( + octeon_get_conf(octeon_dev), i); + gmx_port_id = CFG_GET_GMXID_NIC_IF( + octeon_get_conf(octeon_dev), i); + ifidx_or_pfnum = i; + } dev_dbg(&octeon_dev->pci_dev->dev, "requesting config for interface %d, iqs %d, oqs %d\n", @@ -3566,7 +3779,11 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Sleep on a wait queue till the cond flag indicates that the * response arrived or timed-out. */ - sleep_cond(&ctx->wc, &ctx->cond); + if (sleep_cond(&ctx->wc, &ctx->cond) == -EINTR) { + dev_err(&octeon_dev->pci_dev->dev, "Wait interrupted\n"); + goto setup_nic_wait_intr; + } + retval = resp->status; if (retval) { dev_err(&octeon_dev->pci_dev->dev, "iq/oq config failed\n"); @@ -3633,12 +3850,16 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) lio->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); - lio->dev_capability = NETIF_F_HIGHDMA - | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM - | NETIF_F_SG | NETIF_F_RXCSUM - | NETIF_F_GRO - | NETIF_F_TSO | NETIF_F_TSO6 - | NETIF_F_LRO; + if (OCTEON_CN23XX_PF(octeon_dev) || + OCTEON_CN6XXX(octeon_dev)) { + lio->dev_capability = NETIF_F_HIGHDMA + | NETIF_F_IP_CSUM + | NETIF_F_IPV6_CSUM + | NETIF_F_SG | NETIF_F_RXCSUM + | NETIF_F_GRO + | NETIF_F_TSO | NETIF_F_TSO6 + | NETIF_F_LRO; + } netif_set_gso_max_size(netdev, OCTNIC_GSO_MAX_SIZE); /* Copy of transmit encapsulation capabilities: @@ -3713,7 +3934,10 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) /* Register ethtool support */ liquidio_set_ethtool_ops(netdev); - octeon_dev->priv_flags = 0x0; + if (lio->oct_dev->chip_id == OCTEON_CN23XX_PF_VID) + octeon_dev->priv_flags = OCT_PRIV_FLAG_DEFAULT; + else + octeon_dev->priv_flags = 0x0; if (netdev->features & NETIF_F_LRO) liquidio_set_feature(netdev, OCTNET_CMD_LRO_ENABLE, @@ -3725,6 +3949,9 @@ static int setup_nic_devices(struct octeon_device *octeon_dev) liquidio_set_feature(netdev, OCTNET_CMD_VERBOSE_ENABLE, 0); + if (setup_link_status_change_wq(netdev)) + goto setup_nic_dev_fail; + /* Register the network device with the OS */ if (register_netdev(netdev)) { dev_err(&octeon_dev->pci_dev->dev, "Device registration failed\n"); @@ -3760,6 +3987,8 @@ setup_nic_dev_fail: octeon_free_soft_command(octeon_dev, sc); +setup_nic_wait_intr: + while (i--) { dev_err(&octeon_dev->pci_dev->dev, "NIC ifidx:%d Setup failed\n", i); @@ -3789,8 +4018,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* run port_config command for each port */ oct->ifcount = num_nic_ports; - memset(oct->props, 0, - sizeof(struct octdev_props) * num_nic_ports); + memset(oct->props, 0, sizeof(struct octdev_props) * num_nic_ports); for (i = 0; i < MAX_OCTEON_LINKS; i++) oct->props[i].gmxport = -1; @@ -3806,7 +4034,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) /* Initialize interrupt moderation params */ intrmod_cfg = &((struct octeon_device *)oct)->intrmod; intrmod_cfg->rx_enable = 1; - intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; + intrmod_cfg->check_intrvl = LIO_INTRMOD_CHECK_INTERVAL; intrmod_cfg->maxpkt_ratethr = LIO_INTRMOD_MAXPKT_RATETHR; intrmod_cfg->minpkt_ratethr = LIO_INTRMOD_MINPKT_RATETHR; intrmod_cfg->rx_maxcnt_trigger = LIO_INTRMOD_RXMAXCNT_TRIGGER; @@ -3818,6 +4046,7 @@ static int liquidio_init_nic_module(struct octeon_device *oct) intrmod_cfg->tx_mincnt_trigger = LIO_INTRMOD_TXMINCNT_TRIGGER; intrmod_cfg->rx_frames = CFG_GET_OQ_INTR_PKT(octeon_get_conf(oct)); intrmod_cfg->rx_usecs = CFG_GET_OQ_INTR_TIME(octeon_get_conf(oct)); + intrmod_cfg->tx_frames = CFG_GET_IQ_INTR_PKT(octeon_get_conf(oct)); dev_dbg(&oct->pci_dev->dev, "Network interfaces ready\n"); return retval; @@ -3880,6 +4109,7 @@ static void nic_starter(struct work_struct *work) static int octeon_device_init(struct octeon_device *octeon_dev) { int j, ret; + int fw_loaded = 0; char bootcmd[] = "\n"; struct octeon_device_priv *oct_priv = (struct octeon_device_priv *)octeon_dev->priv; @@ -3901,9 +4131,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->app_mode = CVM_DRV_INVALID_APP; - /* Do a soft reset of the Octeon device. */ - if (octeon_dev->fn_list.soft_reset(octeon_dev)) + if (OCTEON_CN23XX_PF(octeon_dev)) { + if (!cn23xx_fw_loaded(octeon_dev)) { + fw_loaded = 0; + /* Do a soft reset of the Octeon device. */ + if (octeon_dev->fn_list.soft_reset(octeon_dev)) + return 1; + /* things might have changed */ + if (!cn23xx_fw_loaded(octeon_dev)) + fw_loaded = 0; + else + fw_loaded = 1; + } else { + fw_loaded = 1; + } + } else if (octeon_dev->fn_list.soft_reset(octeon_dev)) { return 1; + } /* Initialize the dispatch mechanism used to push packets arriving on * Octeon Output queues. @@ -3925,6 +4169,22 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_set_io_queues_off(octeon_dev); + if (OCTEON_CN23XX_PF(octeon_dev)) { + ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "OCTEON: Failed to configure device registers\n"); + return ret; + } + } + + /* Initialize soft command buffer pool + */ + if (octeon_setup_sc_buffer_pool(octeon_dev)) { + dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n"); + return 1; + } + atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE); + /* Setup the data structures that manage this Octeon's Input queues. */ if (octeon_setup_instr_queues(octeon_dev)) { dev_err(&octeon_dev->pci_dev->dev, @@ -3936,14 +4196,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev) } atomic_set(&octeon_dev->status, OCT_DEV_INSTR_QUEUE_INIT_DONE); - /* Initialize soft command buffer pool - */ - if (octeon_setup_sc_buffer_pool(octeon_dev)) { - dev_err(&octeon_dev->pci_dev->dev, "sc buffer pool allocation failed\n"); - return 1; - } - atomic_set(&octeon_dev->status, OCT_DEV_SC_BUFF_POOL_INIT_DONE); - /* Initialize lists to manage the requests of different types that * arrive from user & kernel applications for this octeon device. */ @@ -3963,15 +4215,23 @@ static int octeon_device_init(struct octeon_device *octeon_dev) atomic_set(&octeon_dev->status, OCT_DEV_DROQ_INIT_DONE); - /* The input and output queue registers were setup earlier (the queues - * were not enabled). Any additional registers that need to be - * programmed should be done now. - */ - ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, - "Failed to configure device registers\n"); - return ret; + if (OCTEON_CN23XX_PF(octeon_dev)) { + if (octeon_allocate_ioq_vector(octeon_dev)) { + dev_err(&octeon_dev->pci_dev->dev, "OCTEON: ioq vector allocation failed\n"); + return 1; + } + + } else { + /* The input and output queue registers were setup earlier (the + * queues were not enabled). Any additional registers + * that need to be programmed should be done now. + */ + ret = octeon_dev->fn_list.setup_device_regs(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, + "Failed to configure device registers\n"); + return ret; + } } /* Initialize the tasklet that handles output queue packet processing.*/ @@ -3985,63 +4245,76 @@ static int octeon_device_init(struct octeon_device *octeon_dev) return 1; /* Enable Octeon device interrupts */ - octeon_dev->fn_list.enable_interrupt(octeon_dev->chip); + octeon_dev->fn_list.enable_interrupt(octeon_dev, OCTEON_ALL_INTR); /* Enable the input and output queues for this Octeon device */ - octeon_dev->fn_list.enable_io_queues(octeon_dev); + ret = octeon_dev->fn_list.enable_io_queues(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Failed to enable input/output queues"); + return ret; + } atomic_set(&octeon_dev->status, OCT_DEV_IO_QUEUES_DONE); - dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); + if ((!OCTEON_CN23XX_PF(octeon_dev)) || !fw_loaded) { + dev_dbg(&octeon_dev->pci_dev->dev, "Waiting for DDR initialization...\n"); + if (!ddr_timeout) { + dev_info(&octeon_dev->pci_dev->dev, + "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n"); + } - if (ddr_timeout == 0) - dev_info(&octeon_dev->pci_dev->dev, "WAITING. Set ddr_timeout to non-zero value to proceed with initialization.\n"); + schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS); - schedule_timeout_uninterruptible(HZ * LIO_RESET_SECS); - - /* Wait for the octeon to initialize DDR after the soft-reset. */ - while (ddr_timeout == 0) { - set_current_state(TASK_INTERRUPTIBLE); - if (schedule_timeout(HZ / 10)) { - /* user probably pressed Control-C */ + /* Wait for the octeon to initialize DDR after the soft-reset.*/ + while (!ddr_timeout) { + set_current_state(TASK_INTERRUPTIBLE); + if (schedule_timeout(HZ / 10)) { + /* user probably pressed Control-C */ + return 1; + } + } + ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, + "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n", + ret); return 1; } - } - ret = octeon_wait_for_ddr_init(octeon_dev, &ddr_timeout); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, - "DDR not initialized. Please confirm that board is configured to boot from Flash, ret: %d\n", - ret); - return 1; - } - if (octeon_wait_for_bootloader(octeon_dev, 1000) != 0) { - dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n"); - return 1; - } + if (octeon_wait_for_bootloader(octeon_dev, 1000)) { + dev_err(&octeon_dev->pci_dev->dev, "Board not responding\n"); + return 1; + } - /* Divert uboot to take commands from host instead. */ - ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50); + /* Divert uboot to take commands from host instead. */ + ret = octeon_console_send_cmd(octeon_dev, bootcmd, 50); - dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n"); - ret = octeon_init_consoles(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n"); - return 1; - } - ret = octeon_add_console(octeon_dev, 0); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n"); - return 1; - } + dev_dbg(&octeon_dev->pci_dev->dev, "Initializing consoles\n"); + ret = octeon_init_consoles(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not access board consoles\n"); + return 1; + } + ret = octeon_add_console(octeon_dev, 0); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not access board console\n"); + return 1; + } - atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE); + atomic_set(&octeon_dev->status, OCT_DEV_CONSOLE_INIT_DONE); - dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n"); - ret = load_firmware(octeon_dev); - if (ret) { - dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); - return 1; + dev_dbg(&octeon_dev->pci_dev->dev, "Loading firmware\n"); + ret = load_firmware(octeon_dev); + if (ret) { + dev_err(&octeon_dev->pci_dev->dev, "Could not load firmware to board\n"); + return 1; + } + /* set bit 1 of SLI_SCRATCH_1 to indicate that firmware is + * loaded + */ + if (OCTEON_CN23XX_PF(octeon_dev)) + octeon_write_csr64(octeon_dev, CN23XX_SLI_SCRATCH1, + 2ULL); } handshake[octeon_dev->octeon_id].init_ok = 1; @@ -4057,7 +4330,6 @@ static int octeon_device_init(struct octeon_device *octeon_dev) octeon_dev->droq[j]->pkts_credit_reg); /* Packets can start arriving on the output queues from this point. */ - return 0; } diff --git a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h index 199a8b9c7dc5..0d990accb65e 100644 --- a/drivers/net/ethernet/cavium/liquidio/liquidio_common.h +++ b/drivers/net/ethernet/cavium/liquidio/liquidio_common.h @@ -30,10 +30,24 @@ #include "octeon_config.h" -#define LIQUIDIO_BASE_VERSION "1.4" -#define LIQUIDIO_MICRO_VERSION ".1" #define LIQUIDIO_PACKAGE "" -#define LIQUIDIO_VERSION "1.4.1" +#define LIQUIDIO_BASE_MAJOR_VERSION 1 +#define LIQUIDIO_BASE_MINOR_VERSION 4 +#define LIQUIDIO_BASE_MICRO_VERSION 1 +#define LIQUIDIO_BASE_VERSION __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ + __stringify(LIQUIDIO_BASE_MINOR_VERSION) +#define LIQUIDIO_MICRO_VERSION "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) +#define LIQUIDIO_VERSION LIQUIDIO_PACKAGE \ + __stringify(LIQUIDIO_BASE_MAJOR_VERSION) "." \ + __stringify(LIQUIDIO_BASE_MINOR_VERSION) \ + "." __stringify(LIQUIDIO_BASE_MICRO_VERSION) + +struct lio_version { + u16 major; + u16 minor; + u16 micro; + u16 reserved; +}; #define CONTROL_IQ 0 /** Tag types used by Octeon cores in its work. */ @@ -218,6 +232,9 @@ static inline void add_sg_size(struct octeon_sg_entry *sg_entry, #define OCTNET_CMD_ADD_VLAN_FILTER 0x17 #define OCTNET_CMD_DEL_VLAN_FILTER 0x18 #define OCTNET_CMD_VXLAN_PORT_CONFIG 0x19 + +#define OCTNET_CMD_ID_ACTIVE 0x1a + #define OCTNET_CMD_VXLAN_PORT_ADD 0x0 #define OCTNET_CMD_VXLAN_PORT_DEL 0x1 #define OCTNET_CMD_RXCSUM_ENABLE 0x0 @@ -296,6 +313,13 @@ union octnet_cmd { #define OCTNET_CMD_SIZE (sizeof(union octnet_cmd)) +/*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ +#define LIO_SOFTCMDRESP_IH2 40 +#define LIO_SOFTCMDRESP_IH3 (40 + 8) + +#define LIO_PCICMD_O2 24 +#define LIO_PCICMD_O3 (24 + 8) + /* Instruction Header(DPI) - for OCTEON-III models */ struct octeon_instr_ih3 { #ifdef __BIG_ENDIAN_BITFIELD @@ -814,6 +838,8 @@ struct oct_link_stats { #define VITESSE_PHY_GPIO_DRIVEOFF 0x4 #define VITESSE_PHY_GPIO_HIGH 0x2 #define VITESSE_PHY_GPIO_LOW 0x3 +#define LED_IDENTIFICATION_ON 0x1 +#define LED_IDENTIFICATION_OFF 0x0 struct oct_mdio_cmd { u64 op; @@ -832,7 +858,7 @@ struct oct_mdio_cmd { /* intrmod: max. packets to trigger interrupt */ #define LIO_INTRMOD_RXMAXCNT_TRIGGER 384 /* intrmod: min. packets to trigger interrupt */ -#define LIO_INTRMOD_RXMINCNT_TRIGGER 1 +#define LIO_INTRMOD_RXMINCNT_TRIGGER 0 /* intrmod: max. time to trigger interrupt */ #define LIO_INTRMOD_RXMAXTMR_TRIGGER 128 /* 66xx:intrmod: min. time to trigger interrupt diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_config.h b/drivers/net/ethernet/cavium/liquidio/octeon_config.h index b3396e3a8bab..c76556809ed1 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_config.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_config.h @@ -64,6 +64,34 @@ #define DEFAULT_NUM_NIC_PORTS_68XX 4 #define DEFAULT_NUM_NIC_PORTS_68XX_210NV 2 +/* CN23xx IQ configuration macros */ +#define CN23XX_MAX_RINGS_PER_PF_PASS_1_0 12 +#define CN23XX_MAX_RINGS_PER_PF_PASS_1_1 32 +#define CN23XX_MAX_RINGS_PER_PF 64 + +#define CN23XX_MAX_INPUT_QUEUES CN23XX_MAX_RINGS_PER_PF +#define CN23XX_MAX_IQ_DESCRIPTORS 2048 +#define CN23XX_DB_MIN 1 +#define CN23XX_DB_MAX 8 +#define CN23XX_DB_TIMEOUT 1 + +#define CN23XX_MAX_OUTPUT_QUEUES CN23XX_MAX_RINGS_PER_PF +#define CN23XX_MAX_OQ_DESCRIPTORS 2048 +#define CN23XX_OQ_BUF_SIZE 1536 +#define CN23XX_OQ_PKTSPER_INTR 128 +/*#define CAVIUM_ONLY_CN23XX_RX_PERF*/ +#define CN23XX_OQ_REFIL_THRESHOLD 128 + +#define CN23XX_OQ_INTR_PKT 64 +#define CN23XX_OQ_INTR_TIME 100 +#define DEFAULT_NUM_NIC_PORTS_23XX 1 + +#define CN23XX_CFG_IO_QUEUES CN23XX_MAX_RINGS_PER_PF +/* PEMs count */ +#define CN23XX_MAX_MACS 4 + +#define CN23XX_DEF_IQ_INTR_THRESHOLD 32 +#define CN23XX_DEF_IQ_INTR_BYTE_THRESHOLD (64 * 1024) /* common OCTEON configuration macros */ #define CN6XXX_CFG_IO_QUEUES 32 #define OCTEON_32BYTE_INSTR 32 @@ -92,6 +120,9 @@ #define CFG_GET_IQ_DB_MIN(cfg) ((cfg)->iq.db_min) #define CFG_GET_IQ_DB_TIMEOUT(cfg) ((cfg)->iq.db_timeout) +#define CFG_GET_IQ_INTR_PKT(cfg) ((cfg)->iq.iq_intr_pkt) +#define CFG_SET_IQ_INTR_PKT(cfg, val) (cfg)->iq.iq_intr_pkt = val + #define CFG_GET_OQ_MAX_Q(cfg) ((cfg)->oq.max_oqs) #define CFG_GET_OQ_INFO_PTR(cfg) ((cfg)->oq.info_ptr) #define CFG_GET_OQ_PKTS_PER_INTR(cfg) ((cfg)->oq.pkts_per_intr) @@ -140,19 +171,24 @@ enum lio_card_type { LIO_210SV = 0, /* Two port, 66xx */ LIO_210NV, /* Two port, 68xx */ - LIO_410NV /* Four port, 68xx */ + LIO_410NV, /* Four port, 68xx */ + LIO_23XX /* 23xx */ }; #define LIO_210SV_NAME "210sv" #define LIO_210NV_NAME "210nv" #define LIO_410NV_NAME "410nv" +#define LIO_23XX_NAME "23xx" /** Structure to define the configuration attributes for each Input queue. * Applicable to all Octeon processors **/ struct octeon_iq_config { #ifdef __BIG_ENDIAN_BITFIELD - u64 reserved:32; + u64 reserved:16; + + /** Tx interrupt packets. Applicable to 23xx only */ + u64 iq_intr_pkt:16; /** Minimum ticks to wait before checking for pending instructions. */ u64 db_timeout:16; @@ -192,7 +228,10 @@ struct octeon_iq_config { /** Minimum ticks to wait before checking for pending instructions. */ u64 db_timeout:16; - u64 reserved:32; + /** Tx interrupt packets. Applicable to 23xx only */ + u64 iq_intr_pkt:16; + + u64 reserved:16; #endif }; @@ -416,11 +455,15 @@ struct octeon_config { #define DISPATCH_LIST_SIZE BIT(OPCODE_MASK_BITS) /* Maximum number of Octeon Instruction (command) queues */ -#define MAX_OCTEON_INSTR_QUEUES(oct) CN6XXX_MAX_INPUT_QUEUES -/* Maximum number of Octeon Output queues */ -#define MAX_OCTEON_OUTPUT_QUEUES(oct) CN6XXX_MAX_OUTPUT_QUEUES +#define MAX_OCTEON_INSTR_QUEUES(oct) \ + (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_INPUT_QUEUES : \ + CN6XXX_MAX_INPUT_QUEUES) -#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES CN6XXX_MAX_INPUT_QUEUES -#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES CN6XXX_MAX_OUTPUT_QUEUES +/* Maximum number of Octeon Instruction (command) queues */ +#define MAX_OCTEON_OUTPUT_QUEUES(oct) \ + (OCTEON_CN23XX_PF(oct) ? CN23XX_MAX_OUTPUT_QUEUES : \ + CN6XXX_MAX_OUTPUT_QUEUES) +#define MAX_POSSIBLE_OCTEON_INSTR_QUEUES CN23XX_MAX_INPUT_QUEUES +#define MAX_POSSIBLE_OCTEON_OUTPUT_QUEUES CN23XX_MAX_OUTPUT_QUEUES #endif /* __OCTEON_CONFIG_H__ */ diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_console.c b/drivers/net/ethernet/cavium/liquidio/octeon_console.c index bbb50ea66f16..01a50f3b0c8e 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_console.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_console.c @@ -25,12 +25,13 @@ */ #include #include +#include #include "liquidio_common.h" #include "octeon_droq.h" #include "octeon_iq.h" #include "response_manager.h" #include "octeon_device.h" -#include "octeon_main.h" +#include "liquidio_image.h" #include "octeon_mem_ops.h" static void octeon_remote_lock(void); @@ -40,6 +41,10 @@ static u64 cvmx_bootmem_phy_named_block_find(struct octeon_device *oct, u32 flags); static int octeon_console_read(struct octeon_device *oct, u32 console_num, char *buffer, u32 buf_size); +static u32 console_bitmask; +module_param(console_bitmask, int, 0644); +MODULE_PARM_DESC(console_bitmask, + "Bitmask indicating which consoles have debug output redirected to syslog."); #define MIN(a, b) min((a), (b)) #define CAST_ULL(v) ((u64)(v)) @@ -177,6 +182,15 @@ struct octeon_pci_console_desc { __cvmx_bootmem_desc_get(oct, addr, \ offsetof(struct cvmx_bootmem_named_block_desc, field), \ SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) +/** + * \brief determines if a given console has debug enabled. + * @param console console to check + * @returns 1 = enabled. 0 otherwise + */ +static int octeon_console_debug_enabled(u32 console) +{ + return (console_bitmask >> (console)) & 0x1; +} /** * This function is the implementation of the get macros defined @@ -709,3 +723,104 @@ static int octeon_console_read(struct octeon_device *oct, u32 console_num, return bytes_to_read; } + +#define FBUF_SIZE (4 * 1024 * 1024) +u8 fbuf[FBUF_SIZE]; + +int octeon_download_firmware(struct octeon_device *oct, const u8 *data, + size_t size) +{ + int ret = 0; + u8 *p = fbuf; + u32 crc32_result; + u64 load_addr; + u32 image_len; + struct octeon_firmware_file_header *h; + u32 i, rem; + + if (size < sizeof(struct octeon_firmware_file_header)) { + dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n", + (u32)size, + (u32)sizeof(struct octeon_firmware_file_header)); + return -EINVAL; + } + + h = (struct octeon_firmware_file_header *)data; + + if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) { + dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n"); + return -EINVAL; + } + + crc32_result = crc32((unsigned int)~0, data, + sizeof(struct octeon_firmware_file_header) - + sizeof(u32)) ^ ~0U; + if (crc32_result != be32_to_cpu(h->crc32)) { + dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n", + crc32_result, be32_to_cpu(h->crc32)); + return -EINVAL; + } + + if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) { + dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n", + LIQUIDIO_PACKAGE, h->version); + return -EINVAL; + } + + if (memcmp(LIQUIDIO_BASE_VERSION, h->version + strlen(LIQUIDIO_PACKAGE), + strlen(LIQUIDIO_BASE_VERSION))) { + dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n", + LIQUIDIO_BASE_VERSION, + h->version + strlen(LIQUIDIO_PACKAGE)); + return -EINVAL; + } + + if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) { + dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n", + be32_to_cpu(h->num_images)); + return -EINVAL; + } + + dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version); + snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s", + h->version); + + data += sizeof(struct octeon_firmware_file_header); + + dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__, + be32_to_cpu(h->num_images)); + /* load all images */ + for (i = 0; i < be32_to_cpu(h->num_images); i++) { + load_addr = be64_to_cpu(h->desc[i].addr); + image_len = be32_to_cpu(h->desc[i].len); + + dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n", + image_len, load_addr); + + /* Write in 4MB chunks*/ + rem = image_len; + + while (rem) { + if (rem < FBUF_SIZE) + size = rem; + else + size = FBUF_SIZE; + + memcpy(p, data, size); + + /* download the image */ + octeon_pci_write_core_mem(oct, load_addr, p, (u32)size); + + data += size; + rem -= (u32)size; + load_addr += size; + } + } + dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", + h->bootcmd); + + /* Invoke the bootcmd */ + ret = octeon_console_send_cmd(oct, h->bootcmd, 50); + + return 0; +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.c b/drivers/net/ethernet/cavium/liquidio/octeon_device.c index 0eb504a4379a..586b68899b06 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.c @@ -20,7 +20,6 @@ * Contact Cavium, Inc. for more information **********************************************************************/ #include -#include #include #include #include "liquidio_common.h" @@ -32,8 +31,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" -#include "liquidio_image.h" -#include "octeon_mem_ops.h" +#include "cn23xx_pf_device.h" /** Default configuration * for CN66XX OCTEON Models. @@ -420,6 +418,108 @@ static struct octeon_config default_cn68xx_210nv_conf = { , }; +static struct octeon_config default_cn23xx_conf = { + .card_type = LIO_23XX, + .card_name = LIO_23XX_NAME, + /** IQ attributes */ + .iq = { + .max_iqs = CN23XX_CFG_IO_QUEUES, + .pending_list_size = (CN23XX_MAX_IQ_DESCRIPTORS * + CN23XX_CFG_IO_QUEUES), + .instr_type = OCTEON_64BYTE_INSTR, + .db_min = CN23XX_DB_MIN, + .db_timeout = CN23XX_DB_TIMEOUT, + .iq_intr_pkt = CN23XX_DEF_IQ_INTR_THRESHOLD, + }, + + /** OQ attributes */ + .oq = { + .max_oqs = CN23XX_CFG_IO_QUEUES, + .info_ptr = OCTEON_OQ_INFOPTR_MODE, + .pkts_per_intr = CN23XX_OQ_PKTSPER_INTR, + .refill_threshold = CN23XX_OQ_REFIL_THRESHOLD, + .oq_intr_pkt = CN23XX_OQ_INTR_PKT, + .oq_intr_time = CN23XX_OQ_INTR_TIME, + }, + + .num_nic_ports = DEFAULT_NUM_NIC_PORTS_23XX, + .num_def_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + .num_def_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + .def_rx_buf_size = CN23XX_OQ_BUF_SIZE, + + /* For ethernet interface 0: Port cfg Attributes */ + .nic_if_cfg[0] = { + /* Max Txqs: Half for each of the two ports :max_iq/2 */ + .max_txqs = MAX_TXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_txqs */ + .num_txqs = DEF_TXQS_PER_INTF, + + /* Max Rxqs: Half for each of the two ports :max_oq/2 */ + .max_rxqs = MAX_RXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_rxqs */ + .num_rxqs = DEF_RXQS_PER_INTF, + + /* Num of desc for rx rings */ + .num_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + + /* Num of desc for tx rings */ + .num_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + + /* SKB size, We need not change buf size even for Jumbo frames. + * Octeon can send jumbo frames in 4 consecutive descriptors, + */ + .rx_buf_size = CN23XX_OQ_BUF_SIZE, + + .base_queue = BASE_QUEUE_NOT_REQUESTED, + + .gmx_port_id = 0, + }, + + .nic_if_cfg[1] = { + /* Max Txqs: Half for each of the two ports :max_iq/2 */ + .max_txqs = MAX_TXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_txqs */ + .num_txqs = DEF_TXQS_PER_INTF, + + /* Max Rxqs: Half for each of the two ports :max_oq/2 */ + .max_rxqs = MAX_RXQS_PER_INTF, + + /* Actual configured value. Range could be: 1...max_rxqs */ + .num_rxqs = DEF_RXQS_PER_INTF, + + /* Num of desc for rx rings */ + .num_rx_descs = CN23XX_MAX_OQ_DESCRIPTORS, + + /* Num of desc for tx rings */ + .num_tx_descs = CN23XX_MAX_IQ_DESCRIPTORS, + + /* SKB size, We need not change buf size even for Jumbo frames. + * Octeon can send jumbo frames in 4 consecutive descriptors, + */ + .rx_buf_size = CN23XX_OQ_BUF_SIZE, + + .base_queue = BASE_QUEUE_NOT_REQUESTED, + + .gmx_port_id = 1, + }, + + .misc = { + /* Host driver link query interval */ + .oct_link_query_interval = 100, + + /* Octeon link query interval */ + .host_link_query_interval = 500, + + .enable_sli_oq_bp = 0, + + /* Control queue group */ + .ctrlq_grp = 1, + } +}; + enum { OCTEON_CONFIG_TYPE_DEFAULT = 0, NUM_OCTEON_CONFS, @@ -487,6 +587,8 @@ static void *__retrieve_octeon_config_info(struct octeon_device *oct, } else if ((oct->chip_id == OCTEON_CN68XX) && (card_type == LIO_410NV)) { ret = (void *)&default_cn68xx_conf; + } else if (oct->chip_id == OCTEON_CN23XX_PF_VID) { + ret = (void *)&default_cn23xx_conf; } break; default: @@ -501,7 +603,8 @@ static int __verify_octeon_config_info(struct octeon_device *oct, void *conf) case OCTEON_CN66XX: case OCTEON_CN68XX: return lio_validate_cn6xxx_config_info(oct, conf); - + case OCTEON_CN23XX_PF_VID: + return 0; default: break; } @@ -541,107 +644,6 @@ static char *get_oct_app_string(u32 app_mode) return oct_dev_app_str[CVM_DRV_INVALID_APP - CVM_DRV_APP_START]; } -u8 fbuf[4 * 1024 * 1024]; - -int octeon_download_firmware(struct octeon_device *oct, const u8 *data, - size_t size) -{ - int ret = 0; - u8 *p = fbuf; - u32 crc32_result; - u64 load_addr; - u32 image_len; - struct octeon_firmware_file_header *h; - u32 i, rem, base_len = strlen(LIQUIDIO_BASE_VERSION); - char *base; - - if (size < sizeof(struct octeon_firmware_file_header)) { - dev_err(&oct->pci_dev->dev, "Firmware file too small (%d < %d).\n", - (u32)size, - (u32)sizeof(struct octeon_firmware_file_header)); - return -EINVAL; - } - - h = (struct octeon_firmware_file_header *)data; - - if (be32_to_cpu(h->magic) != LIO_NIC_MAGIC) { - dev_err(&oct->pci_dev->dev, "Unrecognized firmware file.\n"); - return -EINVAL; - } - - crc32_result = crc32((unsigned int)~0, data, - sizeof(struct octeon_firmware_file_header) - - sizeof(u32)) ^ ~0U; - if (crc32_result != be32_to_cpu(h->crc32)) { - dev_err(&oct->pci_dev->dev, "Firmware CRC mismatch (0x%08x != 0x%08x).\n", - crc32_result, be32_to_cpu(h->crc32)); - return -EINVAL; - } - - if (strncmp(LIQUIDIO_PACKAGE, h->version, strlen(LIQUIDIO_PACKAGE))) { - dev_err(&oct->pci_dev->dev, "Unmatched firmware package type. Expected %s, got %s.\n", - LIQUIDIO_PACKAGE, h->version); - return -EINVAL; - } - - base = h->version + strlen(LIQUIDIO_PACKAGE); - ret = memcmp(LIQUIDIO_BASE_VERSION, base, base_len); - if (ret) { - dev_err(&oct->pci_dev->dev, "Unmatched firmware version. Expected %s.x, got %s.\n", - LIQUIDIO_BASE_VERSION, base); - return -EINVAL; - } - - if (be32_to_cpu(h->num_images) > LIO_MAX_IMAGES) { - dev_err(&oct->pci_dev->dev, "Too many images in firmware file (%d).\n", - be32_to_cpu(h->num_images)); - return -EINVAL; - } - - dev_info(&oct->pci_dev->dev, "Firmware version: %s\n", h->version); - snprintf(oct->fw_info.liquidio_firmware_version, 32, "LIQUIDIO: %s", - h->version); - - data += sizeof(struct octeon_firmware_file_header); - - dev_info(&oct->pci_dev->dev, "%s: Loading %d images\n", __func__, - be32_to_cpu(h->num_images)); - /* load all images */ - for (i = 0; i < be32_to_cpu(h->num_images); i++) { - load_addr = be64_to_cpu(h->desc[i].addr); - image_len = be32_to_cpu(h->desc[i].len); - - dev_info(&oct->pci_dev->dev, "Loading firmware %d at %llx\n", - image_len, load_addr); - - /* Write in 4MB chunks*/ - rem = image_len; - - while (rem) { - if (rem < (4 * 1024 * 1024)) - size = rem; - else - size = 4 * 1024 * 1024; - - memcpy(p, data, size); - - /* download the image */ - octeon_pci_write_core_mem(oct, load_addr, p, (u32)size); - - data += size; - rem -= (u32)size; - load_addr += size; - } - } - dev_info(&oct->pci_dev->dev, "Writing boot command: %s\n", - h->bootcmd); - - /* Invoke the bootcmd */ - ret = octeon_console_send_cmd(oct, h->bootcmd, 50); - - return 0; -} - void octeon_free_device_mem(struct octeon_device *oct) { int i; @@ -676,6 +678,9 @@ static struct octeon_device *octeon_allocate_device_mem(u32 pci_id, configsize = sizeof(struct octeon_cn6xxx); break; + case OCTEON_CN23XX_PF_VID: + configsize = sizeof(struct octeon_cn23xx_pf); + break; default: pr_err("%s: Unknown PCI Device: 0x%x\n", __func__, @@ -741,6 +746,45 @@ struct octeon_device *octeon_allocate_device(u32 pci_id, return oct; } +int +octeon_allocate_ioq_vector(struct octeon_device *oct) +{ + int i, num_ioqs = 0; + struct octeon_ioq_vector *ioq_vector; + int cpu_num; + int size; + + if (OCTEON_CN23XX_PF(oct)) + num_ioqs = oct->sriov_info.num_pf_rings; + size = sizeof(struct octeon_ioq_vector) * num_ioqs; + + oct->ioq_vector = vmalloc(size); + if (!oct->ioq_vector) + return 1; + memset(oct->ioq_vector, 0, size); + for (i = 0; i < num_ioqs; i++) { + ioq_vector = &oct->ioq_vector[i]; + ioq_vector->oct_dev = oct; + ioq_vector->iq_index = i; + ioq_vector->droq_index = i; + + cpu_num = i % num_online_cpus(); + cpumask_set_cpu(cpu_num, &ioq_vector->affinity_mask); + + if (oct->chip_id == OCTEON_CN23XX_PF_VID) + ioq_vector->ioq_num = i + oct->sriov_info.pf_srn; + else + ioq_vector->ioq_num = i; + } + return 0; +} + +void +octeon_free_ioq_vector(struct octeon_device *oct) +{ + vfree(oct->ioq_vector); +} + /* this function is only for setting up the first queue */ int octeon_setup_instr_queues(struct octeon_device *oct) { @@ -749,10 +793,12 @@ int octeon_setup_instr_queues(struct octeon_device *oct) union oct_txpciq txpciq; int numa_node = cpu_to_node(iq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + num_descs = CFG_GET_NUM_DEF_TX_DESCS(CHIP_FIELD(oct, cn23xx_pf, + conf)); oct->num_iqs = 0; @@ -769,6 +815,7 @@ int octeon_setup_instr_queues(struct octeon_device *oct) oct->instr_queue[0]->ifidx = 0; txpciq.u64 = 0; txpciq.s.q_no = iq_no; + txpciq.s.pkind = oct->pfvf_hsword.pkind; txpciq.s.use_qpg = 0; txpciq.s.qpg = 0; if (octeon_init_instr_queue(oct, txpciq, num_descs)) { @@ -788,14 +835,17 @@ int octeon_setup_output_queues(struct octeon_device *oct) u32 oq_no = 0; int numa_node = cpu_to_node(oq_no % num_online_cpus()); - /* this causes queue 0 to be default queue */ if (OCTEON_CN6XXX(oct)) { num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn6xxx, conf)); desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn6xxx, conf)); + } else if (OCTEON_CN23XX_PF(oct)) { + num_descs = CFG_GET_NUM_DEF_RX_DESCS(CHIP_FIELD(oct, cn23xx_pf, + conf)); + desc_size = CFG_GET_DEF_RX_BUF_SIZE(CHIP_FIELD(oct, cn23xx_pf, + conf)); } - oct->num_oqs = 0; oct->droq[0] = vmalloc_node(sizeof(*oct->droq[0]), numa_node); if (!oct->droq[0]) @@ -812,10 +862,10 @@ int octeon_setup_output_queues(struct octeon_device *oct) void octeon_set_io_queues_off(struct octeon_device *oct) { - /* Disable the i/p and o/p queues for this Octeon. */ - - octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + if (OCTEON_CN6XXX(oct)) { + octeon_write_csr(oct, CN6XXX_SLI_PKT_INSTR_ENB, 0); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, 0); + } } void octeon_set_droq_pkt_op(struct octeon_device *oct, @@ -825,14 +875,16 @@ void octeon_set_droq_pkt_op(struct octeon_device *oct, u32 reg_val = 0; /* Disable the i/p and o/p queues for this Octeon. */ - reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); + if (OCTEON_CN6XXX(oct)) { + reg_val = octeon_read_csr(oct, CN6XXX_SLI_PKT_OUT_ENB); - if (enable) - reg_val = reg_val | (1 << q_no); - else - reg_val = reg_val & (~(1 << q_no)); + if (enable) + reg_val = reg_val | (1 << q_no); + else + reg_val = reg_val & (~(1 << q_no)); - octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + octeon_write_csr(oct, CN6XXX_SLI_PKT_OUT_ENB, reg_val); + } } int octeon_init_dispatch_list(struct octeon_device *oct) @@ -1019,6 +1071,9 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) if (OCTEON_CN6XXX(oct)) num_nic_ports = CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + num_nic_ports = + CFG_GET_NUM_NIC_PORTS(CHIP_FIELD(oct, cn23xx_pf, conf)); if (atomic_read(&oct->status) >= OCT_DEV_RUNNING) { dev_err(&oct->pci_dev->dev, "Received CORE OK when device state is 0x%x\n", @@ -1046,6 +1101,12 @@ int octeon_core_drv_init(struct octeon_recv_info *recv_info, void *buf) } oct->fw_info.app_cap_flags = recv_pkt->rh.r_core_drv_init.app_cap_flags; oct->fw_info.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + oct->pfvf_hsword.app_mode = (u32)recv_pkt->rh.r_core_drv_init.app_mode; + + oct->pfvf_hsword.pkind = recv_pkt->rh.r_core_drv_init.pkind; + + for (i = 0; i < oct->num_iqs; i++) + oct->instr_queue[i]->txpciq.s.pkind = oct->pfvf_hsword.pkind; atomic_set(&oct->status, OCT_DEV_CORE_OK); @@ -1108,8 +1169,10 @@ struct octeon_config *octeon_get_conf(struct octeon_device *oct) if (OCTEON_CN6XXX(oct)) { default_oct_conf = (struct octeon_config *)(CHIP_FIELD(oct, cn6xxx, conf)); + } else if (OCTEON_CN23XX_PF(oct)) { + default_oct_conf = (struct octeon_config *) + (CHIP_FIELD(oct, cn23xx_pf, conf)); } - return default_oct_conf; } @@ -1141,7 +1204,9 @@ u64 lio_pci_readq(struct octeon_device *oct, u64 addr) * So write MSB first */ addrhi = (addr >> 32); - if ((oct->chip_id == OCTEON_CN66XX) || (oct->chip_id == OCTEON_CN68XX)) + if ((oct->chip_id == OCTEON_CN66XX) || + (oct->chip_id == OCTEON_CN68XX) || + (oct->chip_id == OCTEON_CN23XX_PF_VID)) addrhi |= 0x00060000; writel(addrhi, oct->reg_list.pci_win_rd_addr_hi); @@ -1185,8 +1250,15 @@ int octeon_mem_access_ok(struct octeon_device *oct) u64 lmc0_reset_ctl; /* Check to make sure a DDR interface is enabled */ - lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL); - access_okay = (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK); + if (OCTEON_CN23XX_PF(oct)) { + lmc0_reset_ctl = lio_pci_readq(oct, CN23XX_LMC0_RESET_CTL); + access_okay = + (lmc0_reset_ctl & CN23XX_LMC0_RESET_CTL_DDR3RST_MASK); + } else { + lmc0_reset_ctl = lio_pci_readq(oct, CN6XXX_LMC0_RESET_CTL); + access_okay = + (lmc0_reset_ctl & CN6XXX_LMC0_RESET_CTL_DDR3RST_MASK); + } return access_okay ? 0 : 1; } @@ -1226,3 +1298,39 @@ int lio_get_device_id(void *dev) return octeon_dev->octeon_id; return -1; } + +void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq) +{ + u64 instr_cnt; + struct octeon_device *oct = NULL; + + /* the whole thing needs to be atomic, ideally */ + if (droq) { + spin_lock_bh(&droq->lock); + writel(droq->pkt_count, droq->pkts_sent_reg); + droq->pkt_count = 0; + spin_unlock_bh(&droq->lock); + oct = droq->oct_dev; + } + if (iq) { + spin_lock_bh(&iq->lock); + writel(iq->pkt_in_done, iq->inst_cnt_reg); + iq->pkt_in_done = 0; + spin_unlock_bh(&iq->lock); + oct = iq->oct_dev; + } + /*write resend. Writing RESEND in SLI_PKTX_CNTS should be enough + *to trigger tx interrupts as well, if they are pending. + */ + if (oct && OCTEON_CN23XX_PF(oct)) { + if (droq) + writeq(CN23XX_INTR_RESEND, droq->pkts_sent_reg); + /*we race with firmrware here. read and write the IN_DONE_CNTS*/ + else if (iq) { + instr_cnt = readq(iq->inst_cnt_reg); + writeq(((instr_cnt & 0xFFFFFFFF00000000ULL) | + CN23XX_INTR_RESEND), + iq->inst_cnt_reg); + } + } +} diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h index 01edfb404346..da15c2ae9330 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h @@ -30,13 +30,19 @@ /** PCI VendorId Device Id */ #define OCTEON_CN68XX_PCIID 0x91177d #define OCTEON_CN66XX_PCIID 0x92177d - +#define OCTEON_CN23XX_PCIID_PF 0x9702177d /** Driver identifies chips by these Ids, created by clubbing together * DeviceId+RevisionId; Where Revision Id is not used to distinguish * between chips, a value of 0 is used for revision id. */ #define OCTEON_CN68XX 0x0091 #define OCTEON_CN66XX 0x0092 +#define OCTEON_CN23XX_PF_VID 0x9702 + +/**RevisionId for the chips */ +#define OCTEON_CN23XX_REV_1_0 0x00 +#define OCTEON_CN23XX_REV_1_1 0x01 +#define OCTEON_CN23XX_REV_2_0 0x80 /** Endian-swap modes supported by Octeon. */ enum octeon_pci_swap_mode { @@ -46,6 +52,9 @@ enum octeon_pci_swap_mode { OCTEON_PCI_32BIT_LW_SWAP = 3 }; +#define OCTEON_OUTPUT_INTR (2) +#define OCTEON_ALL_INTR 0xff + /*--------------- PCI BAR1 index registers -------------*/ /* BAR1 Mask */ @@ -198,9 +207,9 @@ struct octeon_fn_list { void (*setup_oq_regs)(struct octeon_device *, u32); irqreturn_t (*process_interrupt_regs)(void *); + u64 (*msix_interrupt_handler)(void *); int (*soft_reset)(struct octeon_device *); int (*setup_device_regs)(struct octeon_device *); - void (*reinit_regs)(struct octeon_device *); void (*bar1_idx_setup)(struct octeon_device *, u64, u32, int); void (*bar1_idx_write)(struct octeon_device *, u32, u32); u32 (*bar1_idx_read)(struct octeon_device *, u32); @@ -209,10 +218,10 @@ struct octeon_fn_list { void (*enable_oq_pkt_time_intr)(struct octeon_device *, u32); void (*disable_oq_pkt_time_intr)(struct octeon_device *, u32); - void (*enable_interrupt)(void *); - void (*disable_interrupt)(void *); + void (*enable_interrupt)(struct octeon_device *, u8); + void (*disable_interrupt)(struct octeon_device *, u8); - void (*enable_io_queues)(struct octeon_device *); + int (*enable_io_queues)(struct octeon_device *); void (*disable_io_queues)(struct octeon_device *); }; @@ -266,11 +275,72 @@ struct octdev_props { /* Each interface in the Octeon device has a network * device pointer (used for OS specific calls). */ + int rx_on; int napi_enabled; int gmxport; struct net_device *netdev; }; +#define LIO_FLAG_MSIX_ENABLED 0x1 +#define MSIX_PO_INT 0x1 +#define MSIX_PI_INT 0x2 + +struct octeon_pf_vf_hs_word { +#ifdef __LITTLE_ENDIAN_BITFIELD + /** PKIND value assigned for the DPI interface */ + u64 pkind : 8; + + /** OCTEON core clock multiplier */ + u64 core_tics_per_us : 16; + + /** OCTEON coprocessor clock multiplier */ + u64 coproc_tics_per_us : 16; + + /** app that currently running on OCTEON */ + u64 app_mode : 8; + + /** RESERVED */ + u64 reserved : 16; + +#else + + /** RESERVED */ + u64 reserved : 16; + + /** app that currently running on OCTEON */ + u64 app_mode : 8; + + /** OCTEON coprocessor clock multiplier */ + u64 coproc_tics_per_us : 16; + + /** OCTEON core clock multiplier */ + u64 core_tics_per_us : 16; + + /** PKIND value assigned for the DPI interface */ + u64 pkind : 8; +#endif +}; + +struct octeon_sriov_info { + /* Actual rings left for PF device */ + u32 num_pf_rings; + + /* SRN of PF usable IO queues */ + u32 pf_srn; + /* total pf rings */ + u32 trs; + +}; + +struct octeon_ioq_vector { + struct octeon_device *oct_dev; + int iq_index; + int droq_index; + int vector; + struct cpumask affinity_mask; + u32 ioq_num; +}; + /** The Octeon device. * Each Octeon device has this structure to represent all its * components. @@ -296,7 +366,7 @@ struct octeon_device { /** Octeon Chip type. */ u16 chip_id; u16 rev_id; - + u16 pf_num; /** This device's id - set by the driver. */ u32 octeon_id; @@ -305,7 +375,6 @@ struct octeon_device { u16 flags; #define LIO_FLAG_MSI_ENABLED (u32)(1 << 1) -#define LIO_FLAG_MSIX_ENABLED (u32)(1 << 2) /** The state of this device */ atomic_t status; @@ -395,6 +464,19 @@ struct octeon_device { void *priv; + int num_msix_irqs; + + void *msix_entries; + + struct octeon_sriov_info sriov_info; + + struct octeon_pf_vf_hs_word pfvf_hsword; + + int msix_on; + + /** IOq information of it's corresponding MSI-X interrupt. */ + struct octeon_ioq_vector *ioq_vector; + int rx_pause; int tx_pause; @@ -402,12 +484,15 @@ struct octeon_device { /* private flags to control driver-specific features through ethtool */ u32 priv_flags; + + void *watchdog_task; }; #define OCT_DRV_ONLINE 1 #define OCT_DRV_OFFLINE 2 #define OCTEON_CN6XXX(oct) ((oct->chip_id == OCTEON_CN66XX) || \ (oct->chip_id == OCTEON_CN68XX)) +#define OCTEON_CN23XX_PF(oct) (oct->chip_id == OCTEON_CN23XX_PF_VID) #define CHIP_FIELD(oct, TYPE, field) \ (((struct octeon_ ## TYPE *)(oct->chip))->field) @@ -661,13 +746,24 @@ void *oct_get_config_info(struct octeon_device *oct, u16 card_type); */ struct octeon_config *octeon_get_conf(struct octeon_device *oct); +void octeon_free_ioq_vector(struct octeon_device *oct); +int octeon_allocate_ioq_vector(struct octeon_device *oct); +void lio_enable_irq(struct octeon_droq *droq, struct octeon_instr_queue *iq); + /* LiquidIO driver pivate flags */ enum { OCT_PRIV_FLAG_TX_BYTES = 0, /* Tx interrupts by pending byte count */ }; -static inline void lio_set_priv_flag(struct octeon_device *octdev, u32 flag, - u32 val) +#define OCT_PRIV_FLAG_DEFAULT 0x0 + +static inline u32 lio_get_priv_flag(struct octeon_device *octdev, u32 flag) +{ + return !!(octdev->priv_flags & (0x1 << flag)); +} + +static inline void lio_set_priv_flag(struct octeon_device *octdev, + u32 flag, u32 val) { if (val) octdev->priv_flags |= (0x1 << flag); diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c index e0afe4c1fd01..f60e5320daf4 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.c @@ -31,6 +31,7 @@ #include "octeon_network.h" #include "cn66xx_regs.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define CVM_MIN(d1, d2) (((d1) < (d2)) ? (d1) : (d2)) #define CVM_MAX(d1, d2) (((d1) > (d2)) ? (d1) : (d2)) @@ -92,22 +93,25 @@ static inline void *octeon_get_dispatch_arg(struct octeon_device *octeon_dev, return fn_arg; } -/** Check for packets on Droq. This function should be called with - * lock held. +/** Check for packets on Droq. This function should be called with lock held. * @param droq - Droq on which count is checked. * @return Returns packet count. */ u32 octeon_droq_check_hw_for_pkts(struct octeon_droq *droq) { u32 pkt_count = 0; + u32 last_count; pkt_count = readl(droq->pkts_sent_reg); - if (pkt_count) { - atomic_add(pkt_count, &droq->pkts_pending); - writel(pkt_count, droq->pkts_sent_reg); - } - return pkt_count; + last_count = pkt_count - droq->pkt_count; + droq->pkt_count = pkt_count; + + /* we shall write to cnts at napi irq enable or end of droq tasklet */ + if (last_count) + atomic_add(last_count, &droq->pkts_pending); + + return last_count; } static void octeon_droq_compute_max_packet_bufs(struct octeon_droq *droq) @@ -259,6 +263,11 @@ int octeon_init_droq(struct octeon_device *oct, c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf6x); c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf6x); + } else if (OCTEON_CN23XX_PF(oct)) { + struct octeon_config *conf23 = CHIP_FIELD(oct, cn23xx_pf, conf); + + c_pkts_per_intr = (u32)CFG_GET_OQ_PKTS_PER_INTR(conf23); + c_refill_threshold = (u32)CFG_GET_OQ_REFILL_THRESHOLD(conf23); } else { return 1; } @@ -564,7 +573,7 @@ octeon_droq_dispatch_pkt(struct octeon_device *oct, (unsigned int)rh->r.opcode, (unsigned int)rh->r.subcode); droq->stats.dropped_nodispatch++; - } /* else (dispatch_fn ... */ + } return cnt; } @@ -735,16 +744,20 @@ octeon_droq_process_packets(struct octeon_device *oct, u32 pkt_count = 0, pkts_processed = 0; struct list_head *tmp, *tmp2; + /* Grab the droq lock */ + spin_lock(&droq->lock); + + octeon_droq_check_hw_for_pkts(droq); pkt_count = atomic_read(&droq->pkts_pending); - if (!pkt_count) + + if (!pkt_count) { + spin_unlock(&droq->lock); return 0; + } if (pkt_count > budget) pkt_count = budget; - /* Grab the droq lock */ - spin_lock(&droq->lock); - pkts_processed = octeon_droq_fast_process_packets(oct, droq, pkt_count); atomic_sub(pkts_processed, &droq->pkts_pending); @@ -789,6 +802,8 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct, spin_lock(&droq->lock); while (total_pkts_processed < budget) { + octeon_droq_check_hw_for_pkts(droq); + pkts_available = CVM_MIN((budget - total_pkts_processed), (u32)(atomic_read(&droq->pkts_pending))); @@ -803,8 +818,6 @@ octeon_droq_process_poll_pkts(struct octeon_device *oct, atomic_sub(pkts_processed, &droq->pkts_pending); total_pkts_processed += pkts_processed; - - octeon_droq_check_hw_for_pkts(droq); } spin_unlock(&droq->lock); @@ -874,8 +887,11 @@ octeon_process_droq_poll_cmd(struct octeon_device *oct, u32 q_no, int cmd, return 0; } break; + case OCTEON_CN23XX_PF_VID: { + lio_enable_irq(oct->droq[q_no], oct->instr_queue[q_no]); + } + break; } - return 0; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h index 5a6fb9113bbd..5be002d5dba4 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_droq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_droq.h @@ -261,6 +261,8 @@ struct octeon_droq { u32 q_no; + u32 pkt_count; + struct octeon_droq_ops ops; struct octeon_device *oct_dev; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h index ff4b1d6f007b..e4d426ba18dc 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_iq.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_iq.h @@ -88,6 +88,8 @@ struct octeon_instr_queue { /** A spinlock to protect while posting on the ring. */ spinlock_t post_lock; + u32 pkt_in_done; + /** A spinlock to protect access to the input ring.*/ spinlock_t iq_flush_running_lock; diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index bc14e4c27332..366298f7bcb2 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -38,12 +38,26 @@ #define DRV_NAME "LiquidIO" -/** - * \brief determines if a given console has debug enabled. - * @param console console to check - * @returns 1 = enabled. 0 otherwise +/** This structure is used by NIC driver to store information required + * to free the sk_buff when the packet has been fetched by Octeon. + * Bytes offset below assume worst-case of a 64-bit system. */ -int octeon_console_debug_enabled(u32 console); +struct octnet_buf_free_info { + /** Bytes 1-8. Pointer to network device private structure. */ + struct lio *lio; + + /** Bytes 9-16. Pointer to sk_buff. */ + struct sk_buff *skb; + + /** Bytes 17-24. Pointer to gather list. */ + struct octnic_gather *g; + + /** Bytes 25-32. Physical address of skb->data or gather list. */ + u64 dptr; + + /** Bytes 33-47. Piggybacked soft command, if any */ + struct octeon_soft_command *sc; +}; /* BQL-related functions */ void octeon_report_sent_bytes_to_bql(void *buf, int reqtype); @@ -167,22 +181,26 @@ cnnic_numa_alloc_aligned_dma(u32 size, #define cnnic_free_aligned_dma(pci_dev, ptr, size, orig_ptr, dma_addr) \ free_pages(orig_ptr, get_order(size)) -static inline void +static inline int sleep_cond(wait_queue_head_t *wait_queue, int *condition) { + int errno = 0; wait_queue_t we; init_waitqueue_entry(&we, current); add_wait_queue(wait_queue, &we); while (!(READ_ONCE(*condition))) { set_current_state(TASK_INTERRUPTIBLE); - if (signal_pending(current)) + if (signal_pending(current)) { + errno = -EINTR; goto out; + } schedule(); } out: set_current_state(TASK_RUNNING); remove_wait_queue(wait_queue, &we); + return errno; } static inline void diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c index 95a4bbedf557..0dc081a99b30 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_mem_ops.c @@ -19,7 +19,6 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include #include #include "liquidio_common.h" #include "octeon_droq.h" diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_network.h b/drivers/net/ethernet/cavium/liquidio/octeon_network.h index fb820dc7fcb7..e5d1debd05ad 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_network.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_network.h @@ -26,8 +26,6 @@ #ifndef __OCTEON_NETWORK_H__ #define __OCTEON_NETWORK_H__ -#include -#include #include #define LIO_MAX_MTU_SIZE (OCTNET_MAX_FRM_SIZE - OCTNET_FRM_HEADER_SIZE) @@ -124,11 +122,21 @@ struct lio { /* work queue for txq status */ struct cavium_wq txq_status_wq; + + /* work queue for link status */ + struct cavium_wq link_status_wq; + }; #define LIO_SIZE (sizeof(struct lio)) #define GET_LIO(netdev) ((struct lio *)netdev_priv(netdev)) +#define CIU3_WDOG(c) (0x1010000020000ULL + (c << 3)) +#define CIU3_WDOG_MASK 12ULL +#define LIO_MONITOR_WDOG_EXPIRE 1 +#define LIO_MONITOR_CORE_STUCK_MSGD 2 +#define LIO_MAX_CORES 12 + /** * \brief Enable or disable feature * @param netdev pointer to network device diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c index 166727be928f..40ac1fe88956 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.c +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.c @@ -19,7 +19,6 @@ * This file may also be available under a different license from Cavium. * Contact Cavium, Inc. for more information **********************************************************************/ -#include #include #include #include "liquidio_common.h" @@ -36,6 +35,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, u32 rdatasize) { struct octeon_soft_command *sc; + struct octeon_instr_ih3 *ih3; struct octeon_instr_ih2 *ih2; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -52,10 +52,19 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, /* Add in the response related fields. Opcode and Param are already * there. */ - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - ih2->fsz = 40; /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + /*pkiih3 + irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + /* irh + ossp[0] + ossp[1] + rdp + rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } irh->rflag = 1; /* a response is required */ @@ -64,7 +73,10 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, *sc->status_word = COMPLETION_WORD_INIT; - sc->cmd.cmd2.rptr = sc->dmarptr; + if (OCTEON_CN23XX_PF(oct)) + sc->cmd.cmd3.rptr = sc->dmarptr; + else + sc->cmd.cmd2.rptr = sc->dmarptr; sc->wait_time = 1000; sc->timeout = jiffies + sc->wait_time; @@ -73,12 +85,9 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, } int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata, - u32 xmit_more) + struct octnic_data_pkt *ndata) { - int ring_doorbell; - - ring_doorbell = !xmit_more; + int ring_doorbell = 1; return octeon_send_command(oct, ndata->q_no, ring_doorbell, &ndata->cmd, ndata->buf, ndata->datasize, @@ -183,8 +192,8 @@ octnet_send_nic_ctrl_pkt(struct octeon_device *oct, retval = octeon_send_soft_command(oct, sc); if (retval == IQ_SEND_FAILED) { octeon_free_soft_command(oct, sc); - dev_err(&oct->pci_dev->dev, "%s soft command:%d send failed status: %x\n", - __func__, nctrl->ncmd.s.cmd, retval); + dev_err(&oct->pci_dev->dev, "%s pf_num:%d soft command:%d send failed status: %x\n", + __func__, oct->pf_num, nctrl->ncmd.s.cmd, retval); spin_unlock_bh(&oct->cmd_resp_wqlock); return -1; } diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h index b71a2bbe4bee..4b8da67b995f 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_nic.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_nic.h @@ -138,7 +138,7 @@ octnet_prepare_pci_cmd_o2(struct octeon_device *oct, /* assume that rflag is cleared so therefore front data will only have * irh and ossp[0], ossp[1] for a total of 32 bytes */ - ih2->fsz = 24; + ih2->fsz = LIO_PCICMD_O2; ih2->tagtype = ORDERED_TAG; ih2->grp = DEFAULT_POW_GRP; @@ -196,7 +196,7 @@ octnet_prepare_pci_cmd_o3(struct octeon_device *oct, */ ih3->pkind = oct->instr_queue[setup->s.iq_no]->txpciq.s.pkind; /*PKI IH*/ - ih3->fsz = 24 + 8; + ih3->fsz = LIO_PCICMD_O3; if (!setup->s.gather) { ih3->dlengsz = setup->s.u.datasize; @@ -278,7 +278,7 @@ octeon_alloc_soft_command_resp(struct octeon_device *oct, * queue should be stopped, and IQ_SEND_OK if it sent okay. */ int octnet_send_nic_data_pkt(struct octeon_device *oct, - struct octnic_data_pkt *ndata, u32 xmit_more); + struct octnic_data_pkt *ndata); /** Send a NIC control packet to the device * @param oct - octeon device pointer diff --git a/drivers/net/ethernet/cavium/liquidio/request_manager.c b/drivers/net/ethernet/cavium/liquidio/request_manager.c index d32492f185ff..90866bb50033 100644 --- a/drivers/net/ethernet/cavium/liquidio/request_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/request_manager.c @@ -30,6 +30,7 @@ #include "octeon_main.h" #include "octeon_network.h" #include "cn66xx_device.h" +#include "cn23xx_pf_device.h" #define INCR_INSTRQUEUE_PKT_COUNT(octeon_dev_ptr, iq_no, field, count) \ (octeon_dev_ptr->instr_queue[iq_no]->stats.field += count) @@ -71,7 +72,8 @@ int octeon_init_instr_queue(struct octeon_device *oct, if (OCTEON_CN6XXX(oct)) conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn6xxx, conf))); - + else if (OCTEON_CN23XX_PF(oct)) + conf = &(CFG_GET_IQ_CFG(CHIP_FIELD(oct, cn23xx_pf, conf))); if (!conf) { dev_err(&oct->pci_dev->dev, "Unsupported Chip %x\n", oct->chip_id); @@ -88,6 +90,7 @@ int octeon_init_instr_queue(struct octeon_device *oct, q_size = (u32)conf->instr_type * num_descs; iq = oct->instr_queue[iq_no]; + iq->oct_dev = oct; set_dev_node(&oct->pci_dev->dev, numa_node); @@ -181,6 +184,9 @@ int octeon_delete_instr_queue(struct octeon_device *oct, u32 iq_no) if (OCTEON_CN6XXX(oct)) desc_size = CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn6xxx, conf)); + else if (OCTEON_CN23XX_PF(oct)) + desc_size = + CFG_GET_IQ_INSTR_TYPE(CHIP_FIELD(oct, cn23xx_pf, conf)); vfree(iq->request_list); @@ -383,7 +389,12 @@ lio_process_iq_request_list(struct octeon_device *oct, case REQTYPE_SOFT_COMMAND: sc = buf; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (OCTEON_CN23XX_PF(oct)) + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd3.irh; + else + irh = (struct octeon_instr_irh *) + &sc->cmd.cmd2.irh; if (irh->rflag) { /* We're expecting a response from Octeon. * It's up to lio_process_ordered_list() to @@ -499,6 +510,7 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no) if (!oct) return; + iq = oct->instr_queue[iq_no]; if (!iq) return; @@ -514,6 +526,8 @@ static void __check_db_timeout(struct octeon_device *oct, u64 iq_no) /* Flush the instruction queue */ octeon_flush_iq(oct, iq, 1, 0); + + lio_enable_irq(NULL, iq); } /* Called by the Poll thread at regular intervals to check the instruction @@ -580,6 +594,8 @@ octeon_prepare_soft_command(struct octeon_device *oct, { struct octeon_config *oct_cfg; struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; + struct octeon_instr_pki_ih3 *pki_ih3; struct octeon_instr_irh *irh; struct octeon_instr_rdp *rdp; @@ -588,36 +604,88 @@ octeon_prepare_soft_command(struct octeon_device *oct, oct_cfg = octeon_get_conf(oct); - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - ih2->tagtype = ATOMIC_TAG; - ih2->tag = LIO_CONTROL; - ih2->raw = 1; - ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; - if (sc->datasize) { - ih2->dlengsz = sc->datasize; - ih2->rs = 1; - } + ih3->pkind = oct->instr_queue[sc->iq_no]->txpciq.s.pkind; - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - irh->opcode = opcode; - irh->subcode = subcode; + pki_ih3 = (struct octeon_instr_pki_ih3 *)&sc->cmd.cmd3.pki_ih3; - /* opcode/subcode specific parameters (ossp) */ - irh->ossp = irh_ossp; - sc->cmd.cmd2.ossp[0] = ossp0; - sc->cmd.cmd2.ossp[1] = ossp1; + pki_ih3->w = 1; + pki_ih3->raw = 1; + pki_ih3->utag = 1; + pki_ih3->uqpg = + oct->instr_queue[sc->iq_no]->txpciq.s.use_qpg; + pki_ih3->utt = 1; + pki_ih3->tag = LIO_CONTROL; + pki_ih3->tagtype = ATOMIC_TAG; + pki_ih3->qpg = + oct->instr_queue[sc->iq_no]->txpciq.s.qpg; + pki_ih3->pm = 0x7; + pki_ih3->sl = 8; - if (sc->rdatasize) { - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rdp->pcie_port = oct->pcie_port; - rdp->rlen = sc->rdatasize; + if (sc->datasize) + ih3->dlengsz = sc->datasize; + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd3.ossp[0] = ossp0; + sc->cmd.cmd3.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /*PKI IH3*/ + /* pki_ih3 irh+ossp[0]+ossp[1]+rdp+rptr = 48 bytes */ + ih3->fsz = LIO_SOFTCMDRESP_IH3; + } else { + irh->rflag = 0; + /*PKI IH3*/ + /* pki_h3 + irh + ossp[0] + ossp[1] = 32 bytes */ + ih3->fsz = LIO_PCICMD_O3; + } - irh->rflag = 1; - ih2->fsz = 40; /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ } else { - irh->rflag = 0; - ih2->fsz = 24; /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + ih2->tagtype = ATOMIC_TAG; + ih2->tag = LIO_CONTROL; + ih2->raw = 1; + ih2->grp = CFG_GET_CTRL_Q_GRP(oct_cfg); + + if (sc->datasize) { + ih2->dlengsz = sc->datasize; + ih2->rs = 1; + } + + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + irh->opcode = opcode; + irh->subcode = subcode; + + /* opcode/subcode specific parameters (ossp) */ + irh->ossp = irh_ossp; + sc->cmd.cmd2.ossp[0] = ossp0; + sc->cmd.cmd2.ossp[1] = ossp1; + + if (sc->rdatasize) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rdp->pcie_port = oct->pcie_port; + rdp->rlen = sc->rdatasize; + + irh->rflag = 1; + /* irh+ossp[0]+ossp[1]+rdp+rptr = 40 bytes */ + ih2->fsz = LIO_SOFTCMDRESP_IH2; + } else { + irh->rflag = 0; + /* irh + ossp[0] + ossp[1] = 24 bytes */ + ih2->fsz = LIO_PCICMD_O2; + } } } @@ -625,23 +693,39 @@ int octeon_send_soft_command(struct octeon_device *oct, struct octeon_soft_command *sc) { struct octeon_instr_ih2 *ih2; + struct octeon_instr_ih3 *ih3; struct octeon_instr_irh *irh; u32 len; - ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; - if (ih2->dlengsz) { - WARN_ON(!sc->dmadptr); - sc->cmd.cmd2.dptr = sc->dmadptr; + if (OCTEON_CN23XX_PF(oct)) { + ih3 = (struct octeon_instr_ih3 *)&sc->cmd.cmd3.ih3; + if (ih3->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd3.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd3.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd3.rptr = sc->dmarptr; + } + len = (u32)ih3->dlengsz; + } else { + ih2 = (struct octeon_instr_ih2 *)&sc->cmd.cmd2.ih2; + if (ih2->dlengsz) { + WARN_ON(!sc->dmadptr); + sc->cmd.cmd2.dptr = sc->dmadptr; + } + irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; + if (irh->rflag) { + WARN_ON(!sc->dmarptr); + WARN_ON(!sc->status_word); + *sc->status_word = COMPLETION_WORD_INIT; + sc->cmd.cmd2.rptr = sc->dmarptr; + } + len = (u32)ih2->dlengsz; } - irh = (struct octeon_instr_irh *)&sc->cmd.cmd2.irh; - if (irh->rflag) { - WARN_ON(!sc->dmarptr); - WARN_ON(!sc->status_word); - *sc->status_word = COMPLETION_WORD_INIT; - - sc->cmd.cmd2.rptr = sc->dmarptr; - } - len = (u32)ih2->dlengsz; if (sc->wait_time) sc->timeout = jiffies + sc->wait_time; diff --git a/drivers/net/ethernet/cavium/liquidio/response_manager.c b/drivers/net/ethernet/cavium/liquidio/response_manager.c index 709049e36627..be52178d8cb6 100644 --- a/drivers/net/ethernet/cavium/liquidio/response_manager.c +++ b/drivers/net/ethernet/cavium/liquidio/response_manager.c @@ -91,8 +91,13 @@ int lio_process_ordered_list(struct octeon_device *octeon_dev, sc = (struct octeon_soft_command *)ordered_sc_list-> head.next; - rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; - rptr = sc->cmd.cmd2.rptr; + if (OCTEON_CN23XX_PF(octeon_dev)) { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd3.rdp; + rptr = sc->cmd.cmd3.rptr; + } else { + rdp = (struct octeon_instr_rdp *)&sc->cmd.cmd2.rdp; + rptr = sc->cmd.cmd2.rptr; + } status = OCTEON_REQUEST_PENDING; diff --git a/drivers/net/ethernet/cavium/thunder/Makefile b/drivers/net/ethernet/cavium/thunder/Makefile index 5c4615ccaa14..6b4d4add7353 100644 --- a/drivers/net/ethernet/cavium/thunder/Makefile +++ b/drivers/net/ethernet/cavium/thunder/Makefile @@ -2,6 +2,7 @@ # Makefile for Cavium's Thunder ethernet device # +obj-$(CONFIG_THUNDER_NIC_RGX) += thunder_xcv.o obj-$(CONFIG_THUNDER_NIC_BGX) += thunder_bgx.o obj-$(CONFIG_THUNDER_NIC_PF) += nicpf.o obj-$(CONFIG_THUNDER_NIC_VF) += nicvf.o diff --git a/drivers/net/ethernet/cavium/thunder/nic.h b/drivers/net/ethernet/cavium/thunder/nic.h index e29815d9e6f4..30426109711c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic.h +++ b/drivers/net/ethernet/cavium/thunder/nic.h @@ -20,6 +20,17 @@ #define PCI_DEVICE_ID_THUNDER_NIC_VF 0xA034 #define PCI_DEVICE_ID_THUNDER_BGX 0xA026 +/* Subsystem device IDs */ +#define PCI_SUBSYS_DEVID_88XX_NIC_PF 0xA11E +#define PCI_SUBSYS_DEVID_81XX_NIC_PF 0xA21E +#define PCI_SUBSYS_DEVID_83XX_NIC_PF 0xA31E + +#define PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF 0xA11E +#define PCI_SUBSYS_DEVID_88XX_NIC_VF 0xA134 +#define PCI_SUBSYS_DEVID_81XX_NIC_VF 0xA234 +#define PCI_SUBSYS_DEVID_83XX_NIC_VF 0xA334 + + /* PCI BAR nos */ #define PCI_CFG_REG_BAR_NUM 0 #define PCI_MSIX_REG_BAR_NUM 4 @@ -41,40 +52,8 @@ /* Max pkinds */ #define NIC_MAX_PKIND 16 -/* Rx Channels */ -/* Receive channel configuration in TNS bypass mode - * Below is configuration in TNS bypass mode - * BGX0-LMAC0-CHAN0 - VNIC CHAN0 - * BGX0-LMAC1-CHAN0 - VNIC CHAN16 - * ... - * BGX1-LMAC0-CHAN0 - VNIC CHAN128 - * ... - * BGX1-LMAC3-CHAN0 - VNIC CHAN174 - */ -#define NIC_INTF_COUNT 2 /* Interfaces btw VNIC and TNS/BGX */ -#define NIC_CHANS_PER_INF 128 -#define NIC_MAX_CHANS (NIC_INTF_COUNT * NIC_CHANS_PER_INF) -#define NIC_CPI_COUNT 2048 /* No of channel parse indices */ - -/* TNS bypass mode: 1-1 mapping between VNIC and BGX:LMAC */ -#define NIC_MAX_BGX MAX_BGX_PER_CN88XX -#define NIC_CPI_PER_BGX (NIC_CPI_COUNT / NIC_MAX_BGX) -#define NIC_MAX_CPI_PER_LMAC 64 /* Max when CPI_ALG is IP diffserv */ -#define NIC_RSSI_PER_BGX (NIC_RSSI_COUNT / NIC_MAX_BGX) - -/* Tx scheduling */ -#define NIC_MAX_TL4 1024 -#define NIC_MAX_TL4_SHAPERS 256 /* 1 shaper for 4 TL4s */ -#define NIC_MAX_TL3 256 -#define NIC_MAX_TL3_SHAPERS 64 /* 1 shaper for 4 TL3s */ -#define NIC_MAX_TL2 64 -#define NIC_MAX_TL2_SHAPERS 2 /* 1 shaper for 32 TL2s */ -#define NIC_MAX_TL1 2 - -/* TNS bypass mode */ -#define NIC_TL2_PER_BGX 32 -#define NIC_TL4_PER_BGX (NIC_MAX_TL4 / NIC_MAX_BGX) -#define NIC_TL4_PER_LMAC (NIC_MAX_TL4 / NIC_CHANS_PER_INF) +/* Max when CPI_ALG is IP diffserv */ +#define NIC_MAX_CPI_PER_LMAC 64 /* NIC VF Interrupts */ #define NICVF_INTR_CQ 0 @@ -148,7 +127,6 @@ struct nicvf_cq_poll { struct napi_struct napi; }; -#define NIC_RSSI_COUNT 4096 /* Total no of RSS indices */ #define NIC_MAX_RSS_HASH_BITS 8 #define NIC_MAX_RSS_IDR_TBL_SIZE (1 << NIC_MAX_RSS_HASH_BITS) #define RSS_HASH_KEY_SIZE 5 /* 320 bit key */ @@ -273,6 +251,7 @@ struct nicvf { struct net_device *netdev; struct pci_dev *pdev; void __iomem *reg_base; +#define MAX_QUEUES_PER_QSET 8 struct queue_set *qs; struct nicvf_cq_poll *napi[8]; u8 vf_id; @@ -326,7 +305,7 @@ struct nicvf { bool msix_enabled; u8 num_vec; struct msix_entry msix_entries[NIC_VF_MSIX_VECTORS]; - char irq_name[NIC_VF_MSIX_VECTORS][20]; + char irq_name[NIC_VF_MSIX_VECTORS][IFNAMSIZ + 15]; bool irq_allocated[NIC_VF_MSIX_VECTORS]; cpumask_var_t affinity_mask[NIC_VF_MSIX_VECTORS]; @@ -369,6 +348,7 @@ struct nicvf { #define NIC_MBOX_MSG_PNICVF_PTR 0x14 /* Get primary qset nicvf ptr */ #define NIC_MBOX_MSG_SNICVF_PTR 0x15 /* Send sqet nicvf ptr to PVF */ #define NIC_MBOX_MSG_LOOPBACK 0x16 /* Set interface in loopback */ +#define NIC_MBOX_MSG_RESET_STAT_COUNTER 0x17 /* Reset statistics counters */ #define NIC_MBOX_MSG_CFG_DONE 0xF0 /* VF configuration done */ #define NIC_MBOX_MSG_SHUTDOWN 0xF1 /* VF is being shutdown */ @@ -485,6 +465,31 @@ struct set_loopback { bool enable; }; +/* Reset statistics counters */ +struct reset_stat_cfg { + u8 msg; + /* Bitmap to select NIC_PF_VNIC(vf_id)_RX_STAT(0..13) */ + u16 rx_stat_mask; + /* Bitmap to select NIC_PF_VNIC(vf_id)_TX_STAT(0..4) */ + u8 tx_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_RQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_RQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_RQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_RQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_RQ0_STAT(0..1) + */ + u16 rq_stat_mask; + /* Bitmap to select NIC_PF_QS(0..127)_SQ(0..7)_STAT(0..1) + * bit14, bit15 NIC_PF_QS(vf_id)_SQ7_STAT(0..1) + * bit12, bit13 NIC_PF_QS(vf_id)_SQ6_STAT(0..1) + * .. + * bit2, bit3 NIC_PF_QS(vf_id)_SQ1_STAT(0..1) + * bit0, bit1 NIC_PF_QS(vf_id)_SQ0_STAT(0..1) + */ + u16 sq_stat_mask; +}; + /* 128 bit shared memory between PF and each VF */ union nic_mbx { struct { u8 msg; } msg; @@ -502,6 +507,7 @@ union nic_mbx { struct sqs_alloc sqs_alloc; struct nicvf_ptr nicvf; struct set_loopback lbk; + struct reset_stat_cfg reset_stat; }; #define NIC_NODE_ID_MASK 0x03 @@ -515,7 +521,14 @@ static inline int nic_get_node_id(struct pci_dev *pdev) static inline bool pass1_silicon(struct pci_dev *pdev) { - return pdev->revision < 8; + return (pdev->revision < 8) && + (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF); +} + +static inline bool pass2_silicon(struct pci_dev *pdev) +{ + return (pdev->revision >= 8) && + (pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF); } int nicvf_set_real_num_queues(struct net_device *netdev, diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 85cc782b9060..2bbf4cbf08b2 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -20,8 +20,25 @@ #define DRV_NAME "thunder-nic" #define DRV_VERSION "1.0" +struct hw_info { + u8 bgx_cnt; + u8 chans_per_lmac; + u8 chans_per_bgx; /* Rx/Tx chans */ + u8 chans_per_rgx; + u8 chans_per_lbk; + u16 cpi_cnt; + u16 rssi_cnt; + u16 rss_ind_tbl_size; + u16 tl4_cnt; + u16 tl3_cnt; + u8 tl2_cnt; + u8 tl1_cnt; + bool tl1_per_bgx; /* TL1 per BGX or per LMAC */ +}; + struct nicpf { struct pci_dev *pdev; + struct hw_info *hw; u8 node; unsigned int flags; u8 num_vf_en; /* No of VF enabled */ @@ -36,22 +53,22 @@ struct nicpf { #define NIC_SET_VF_LMAC_MAP(bgx, lmac) (((bgx & 0xF) << 4) | (lmac & 0xF)) #define NIC_GET_BGX_FROM_VF_LMAC_MAP(map) ((map >> 4) & 0xF) #define NIC_GET_LMAC_FROM_VF_LMAC_MAP(map) (map & 0xF) - u8 vf_lmac_map[MAX_LMAC]; + u8 *vf_lmac_map; struct delayed_work dwork; struct workqueue_struct *check_link; - u8 link[MAX_LMAC]; - u8 duplex[MAX_LMAC]; - u32 speed[MAX_LMAC]; + u8 *link; + u8 *duplex; + u32 *speed; u16 cpi_base[MAX_NUM_VFS_SUPPORTED]; u16 rssi_base[MAX_NUM_VFS_SUPPORTED]; - u16 rss_ind_tbl_size; bool mbx_lock[MAX_NUM_VFS_SUPPORTED]; /* MSI-X */ bool msix_enabled; u8 num_vec; - struct msix_entry msix_entries[NIC_PF_MSIX_VECTORS]; + struct msix_entry *msix_entries; bool irq_allocated[NIC_PF_MSIX_VECTORS]; + char irq_name[NIC_PF_MSIX_VECTORS][20]; }; /* Supported devices */ @@ -89,9 +106,22 @@ static u64 nic_reg_read(struct nicpf *nic, u64 offset) /* PF -> VF mailbox communication APIs */ static void nic_enable_mbx_intr(struct nicpf *nic) { - /* Enable mailbox interrupt for all 128 VFs */ - nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, ~0ull); - nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), ~0ull); + int vf_cnt = pci_sriov_get_totalvfs(nic->pdev); + +#define INTR_MASK(vfs) ((vfs < 64) ? (BIT_ULL(vfs) - 1) : (~0ull)) + + /* Clear it, to avoid spurious interrupts (if any) */ + nic_reg_write(nic, NIC_PF_MAILBOX_INT, INTR_MASK(vf_cnt)); + + /* Enable mailbox interrupt for all VFs */ + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S, INTR_MASK(vf_cnt)); + /* One mailbox intr enable reg per 64 VFs */ + if (vf_cnt > 64) { + nic_reg_write(nic, NIC_PF_MAILBOX_INT + sizeof(u64), + INTR_MASK(vf_cnt - 64)); + nic_reg_write(nic, NIC_PF_MAILBOX_ENA_W1S + sizeof(u64), + INTR_MASK(vf_cnt - 64)); + } } static void nic_clear_mbx_intr(struct nicpf *nic, int vf, int mbx_reg) @@ -144,7 +174,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf) mbx.nic_cfg.tns_mode = NIC_TNS_BYPASS_MODE; - if (vf < MAX_LMAC) { + if (vf < nic->num_vf_en) { bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vf]); @@ -155,7 +185,7 @@ static void nic_mbx_send_ready(struct nicpf *nic, int vf) mbx.nic_cfg.sqs_mode = (vf >= nic->num_vf_en) ? true : false; mbx.nic_cfg.node_id = nic->node; - mbx.nic_cfg.loopback_supported = vf < MAX_LMAC; + mbx.nic_cfg.loopback_supported = vf < nic->num_vf_en; nic_send_msg_to_vf(nic, vf, &mbx); } @@ -248,7 +278,8 @@ static int nic_update_hw_frs(struct nicpf *nic, int new_frs, int vf) /* Set minimum transmit packet size */ static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) { - int lmac; + int lmac, max_lmac; + u16 sdevid; u64 lmac_cfg; /* There is a issue in HW where-in while sending GSO sized @@ -260,7 +291,14 @@ static void nic_set_tx_pkt_pad(struct nicpf *nic, int size) if (size > 52) size = 52; - for (lmac = 0; lmac < (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX); lmac++) { + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); + /* 81xx's RGX has only one LMAC */ + if (sdevid == PCI_SUBSYS_DEVID_81XX_NIC_PF) + max_lmac = ((nic->hw->bgx_cnt - 1) * MAX_LMAC_PER_BGX) + 1; + else + max_lmac = nic->hw->bgx_cnt * MAX_LMAC_PER_BGX; + + for (lmac = 0; lmac < max_lmac; lmac++) { lmac_cfg = nic_reg_read(nic, NIC_PF_LMAC_0_7_CFG | (lmac << 3)); lmac_cfg &= ~(0xF << 2); lmac_cfg |= ((size / 4) << 2); @@ -280,7 +318,7 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic->num_vf_en = 0; - for (bgx = 0; bgx < NIC_MAX_BGX; bgx++) { + for (bgx = 0; bgx < nic->hw->bgx_cnt; bgx++) { if (!(bgx_map & (1 << bgx))) continue; lmac_cnt = bgx_get_lmac_count(nic->node, bgx); @@ -300,28 +338,125 @@ static void nic_set_lmac_vf_mapping(struct nicpf *nic) nic_reg_write(nic, NIC_PF_LMAC_0_7_CREDIT + (lmac * 8), lmac_credit); + + /* On CN81XX there are only 8 VFs but max possible no of + * interfaces are 9. + */ + if (nic->num_vf_en >= pci_sriov_get_totalvfs(nic->pdev)) { + nic->num_vf_en = pci_sriov_get_totalvfs(nic->pdev); + break; + } } } +static void nic_free_lmacmem(struct nicpf *nic) +{ + kfree(nic->vf_lmac_map); + kfree(nic->link); + kfree(nic->duplex); + kfree(nic->speed); +} + +static int nic_get_hw_info(struct nicpf *nic) +{ + u8 max_lmac; + u16 sdevid; + struct hw_info *hw = nic->hw; + + pci_read_config_word(nic->pdev, PCI_SUBSYSTEM_ID, &sdevid); + + switch (sdevid) { + case PCI_SUBSYS_DEVID_88XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN88XX; + hw->chans_per_lmac = 16; + hw->chans_per_bgx = 128; + hw->cpi_cnt = 2048; + hw->rssi_cnt = 4096; + hw->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + hw->tl3_cnt = 256; + hw->tl2_cnt = 64; + hw->tl1_cnt = 2; + hw->tl1_per_bgx = true; + break; + case PCI_SUBSYS_DEVID_81XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN81XX; + hw->chans_per_lmac = 8; + hw->chans_per_bgx = 32; + hw->chans_per_rgx = 8; + hw->chans_per_lbk = 24; + hw->cpi_cnt = 512; + hw->rssi_cnt = 256; + hw->rss_ind_tbl_size = 32; /* Max RSSI / Max interfaces */ + hw->tl3_cnt = 64; + hw->tl2_cnt = 16; + hw->tl1_cnt = 10; + hw->tl1_per_bgx = false; + break; + case PCI_SUBSYS_DEVID_83XX_NIC_PF: + hw->bgx_cnt = MAX_BGX_PER_CN83XX; + hw->chans_per_lmac = 8; + hw->chans_per_bgx = 32; + hw->chans_per_lbk = 64; + hw->cpi_cnt = 2048; + hw->rssi_cnt = 1024; + hw->rss_ind_tbl_size = 64; /* Max RSSI / Max interfaces */ + hw->tl3_cnt = 256; + hw->tl2_cnt = 64; + hw->tl1_cnt = 18; + hw->tl1_per_bgx = false; + break; + } + hw->tl4_cnt = MAX_QUEUES_PER_QSET * pci_sriov_get_totalvfs(nic->pdev); + + /* Allocate memory for LMAC tracking elements */ + max_lmac = hw->bgx_cnt * MAX_LMAC_PER_BGX; + nic->vf_lmac_map = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->vf_lmac_map) + goto error; + nic->link = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->link) + goto error; + nic->duplex = kmalloc_array(max_lmac, sizeof(u8), GFP_KERNEL); + if (!nic->duplex) + goto error; + nic->speed = kmalloc_array(max_lmac, sizeof(u32), GFP_KERNEL); + if (!nic->speed) + goto error; + return 0; + +error: + nic_free_lmacmem(nic); + return -ENOMEM; +} + #define BGX0_BLOCK 8 #define BGX1_BLOCK 9 -static void nic_init_hw(struct nicpf *nic) +static int nic_init_hw(struct nicpf *nic) { - int i; + int i, err; u64 cqm_cfg; + /* Get HW capability info */ + err = nic_get_hw_info(nic); + if (err) + return err; + /* Enable NIC HW block */ nic_reg_write(nic, NIC_PF_CFG, 0x3); /* Enable backpressure */ nic_reg_write(nic, NIC_PF_BP_CFG, (1ULL << 6) | 0x03); - /* Disable TNS mode on both interfaces */ - nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, - (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); - nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), - (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + /* TNS and TNS bypass modes are present only on 88xx */ + if (nic->pdev->subsystem_device == PCI_SUBSYS_DEVID_88XX_NIC_PF) { + /* Disable TNS mode on both interfaces */ + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG, + (NIC_TNS_BYPASS_MODE << 7) | BGX0_BLOCK); + nic_reg_write(nic, NIC_PF_INTF_0_1_SEND_CFG | (1 << 8), + (NIC_TNS_BYPASS_MODE << 7) | BGX1_BLOCK); + } + nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG, (1ULL << 63) | BGX0_BLOCK); nic_reg_write(nic, NIC_PF_INTF_0_1_BP_CFG + (1 << 8), @@ -351,11 +486,14 @@ static void nic_init_hw(struct nicpf *nic) cqm_cfg = nic_reg_read(nic, NIC_PF_CQM_CFG); if (cqm_cfg < NICPF_CQM_MIN_DROP_LEVEL) nic_reg_write(nic, NIC_PF_CQM_CFG, NICPF_CQM_MIN_DROP_LEVEL); + + return 0; } /* Channel parse index configuration */ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) { + struct hw_info *hw = nic->hw; u32 vnic, bgx, lmac, chan; u32 padd, cpi_count = 0; u64 cpi_base, cpi, rssi_base, rssi; @@ -365,9 +503,9 @@ static void nic_config_cpi(struct nicpf *nic, struct cpi_cfg_msg *cfg) bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[vnic]); - chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); - cpi_base = (lmac * NIC_MAX_CPI_PER_LMAC) + (bgx * NIC_CPI_PER_BGX); - rssi_base = (lmac * nic->rss_ind_tbl_size) + (bgx * NIC_RSSI_PER_BGX); + chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); + cpi_base = vnic * NIC_MAX_CPI_PER_LMAC; + rssi_base = vnic * hw->rss_ind_tbl_size; /* Rx channel configuration */ nic_reg_write(nic, NIC_PF_CHAN_0_255_RX_BP_CFG | (chan << 3), @@ -439,7 +577,7 @@ static void nic_send_rss_size(struct nicpf *nic, int vf) msg = (u64 *)&mbx; mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE; - mbx.rss_size.ind_tbl_size = nic->rss_ind_tbl_size; + mbx.rss_size.ind_tbl_size = nic->hw->rss_ind_tbl_size; nic_send_msg_to_vf(nic, vf, &mbx); } @@ -486,7 +624,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) /* 4 level transmit side scheduler configutation * for TNS bypass mode * - * Sample configuration for SQ0 + * Sample configuration for SQ0 on 88xx * VNIC0-SQ0 -> TL4(0) -> TL3[0] -> TL2[0] -> TL1[0] -> BGX0 * VNIC1-SQ0 -> TL4(8) -> TL3[2] -> TL2[0] -> TL1[0] -> BGX0 * VNIC2-SQ0 -> TL4(16) -> TL3[4] -> TL2[1] -> TL1[0] -> BGX0 @@ -499,6 +637,7 @@ static void nic_config_rss(struct nicpf *nic, struct rss_cfg_msg *cfg) static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, struct sq_cfg_msg *sq) { + struct hw_info *hw = nic->hw; u32 bgx, lmac, chan; u32 tl2, tl3, tl4; u32 rr_quantum; @@ -517,21 +656,28 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* 24 bytes for FCS, IPG and preamble */ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); - if (!sq->sqs_mode) { - tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); - } else { - for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { - if (nic->vf_sqs[pqs_vnic][svf] == vnic) - break; + /* For 88xx 0-511 TL4 transmits via BGX0 and + * 512-1023 TL4s transmit via BGX1. + */ + if (hw->tl1_per_bgx) { + tl4 = bgx * (hw->tl4_cnt / hw->bgx_cnt); + if (!sq->sqs_mode) { + tl4 += (lmac * MAX_QUEUES_PER_QSET); + } else { + for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { + if (nic->vf_sqs[pqs_vnic][svf] == vnic) + break; + } + tl4 += (MAX_LMAC_PER_BGX * MAX_QUEUES_PER_QSET); + tl4 += (lmac * MAX_QUEUES_PER_QSET * MAX_SQS_PER_VF); + tl4 += (svf * MAX_QUEUES_PER_QSET); } - tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC); - tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF); - tl4 += (svf * NIC_TL4_PER_LMAC); - tl4 += (bgx * NIC_TL4_PER_BGX); + } else { + tl4 = (vnic * MAX_QUEUES_PER_QSET); } tl4 += sq_idx; - tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); + tl3 = tl4 / (hw->tl4_cnt / hw->tl3_cnt); nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | ((u64)vnic << NIC_QS_ID_SHIFT) | ((u32)sq_idx << NIC_Q_NUM_SHIFT), tl4); @@ -539,8 +685,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, ((u64)vnic << 27) | ((u32)sq_idx << 24) | rr_quantum); nic_reg_write(nic, NIC_PF_TL3_0_255_CFG | (tl3 << 3), rr_quantum); - chan = (lmac * MAX_BGX_CHANS_PER_LMAC) + (bgx * NIC_CHANS_PER_INF); - nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + + /* On 88xx 0-127 channels are for BGX0 and + * 127-255 channels for BGX1. + * + * On 81xx/83xx TL3_CHAN reg should be configured with channel + * within LMAC i.e 0-7 and not the actual channel number like on 88xx + */ + chan = (lmac * hw->chans_per_lmac) + (bgx * hw->chans_per_bgx); + if (hw->tl1_per_bgx) + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), chan); + else + nic_reg_write(nic, NIC_PF_TL3_0_255_CHAN | (tl3 << 3), 0); + /* Enable backpressure on the channel */ nic_reg_write(nic, NIC_PF_CHAN_0_255_TX_CFG | (chan << 3), 1); @@ -549,6 +706,16 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, nic_reg_write(nic, NIC_PF_TL2_0_63_CFG | (tl2 << 3), rr_quantum); /* No priorities as of now */ nic_reg_write(nic, NIC_PF_TL2_0_63_PRI | (tl2 << 3), 0x00); + + /* Unlike 88xx where TL2s 0-31 transmits to TL1 '0' and rest to TL1 '1' + * on 81xx/83xx TL2 needs to be configured to transmit to one of the + * possible LMACs. + * + * This register doesn't exist on 88xx. + */ + if (!hw->tl1_per_bgx) + nic_reg_write(nic, NIC_PF_TL2_LMAC | (tl2 << 3), + lmac + (bgx * MAX_LMAC_PER_BGX)); } /* Send primary nicvf pointer to secondary QS's VF */ @@ -620,7 +787,7 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) { int bgx_idx, lmac_idx; - if (lbk->vf_id > MAX_LMAC) + if (lbk->vf_id >= nic->num_vf_en) return -1; bgx_idx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lbk->vf_id]); @@ -631,6 +798,67 @@ static int nic_config_loopback(struct nicpf *nic, struct set_loopback *lbk) return 0; } +/* Reset statistics counters */ +static int nic_reset_stat_counters(struct nicpf *nic, + int vf, struct reset_stat_cfg *cfg) +{ + int i, stat, qnum; + u64 reg_addr; + + for (i = 0; i < RX_STATS_ENUM_LAST; i++) { + if (cfg->rx_stat_mask & BIT(i)) { + reg_addr = NIC_PF_VNIC_0_127_RX_STAT_0_13 | + (vf << NIC_QS_ID_SHIFT) | + (i << 3); + nic_reg_write(nic, reg_addr, 0); + } + } + + for (i = 0; i < TX_STATS_ENUM_LAST; i++) { + if (cfg->tx_stat_mask & BIT(i)) { + reg_addr = NIC_PF_VNIC_0_127_TX_STAT_0_4 | + (vf << NIC_QS_ID_SHIFT) | + (i << 3); + nic_reg_write(nic, reg_addr, 0); + } + } + + for (i = 0; i <= 15; i++) { + qnum = i >> 1; + stat = i & 1 ? 1 : 0; + reg_addr = (vf << NIC_QS_ID_SHIFT) | + (qnum << NIC_Q_NUM_SHIFT) | (stat << 3); + if (cfg->rq_stat_mask & BIT(i)) { + reg_addr |= NIC_PF_QSET_0_127_RQ_0_7_STAT_0_1; + nic_reg_write(nic, reg_addr, 0); + } + if (cfg->sq_stat_mask & BIT(i)) { + reg_addr |= NIC_PF_QSET_0_127_SQ_0_7_STAT_0_1; + nic_reg_write(nic, reg_addr, 0); + } + } + return 0; +} + +static void nic_enable_tunnel_parsing(struct nicpf *nic, int vf) +{ + u64 prot_def = (IPV6_PROT << 32) | (IPV4_PROT << 16) | ET_PROT; + u64 vxlan_prot_def = (IPV6_PROT_DEF << 32) | + (IPV4_PROT_DEF) << 16 | ET_PROT_DEF; + + /* Configure tunnel parsing parameters */ + nic_reg_write(nic, NIC_PF_RX_GENEVE_DEF, + (1ULL << 63 | UDP_GENEVE_PORT_NUM)); + nic_reg_write(nic, NIC_PF_RX_GENEVE_PROT_DEF, + ((7ULL << 61) | prot_def)); + nic_reg_write(nic, NIC_PF_RX_NVGRE_PROT_DEF, + ((7ULL << 61) | prot_def)); + nic_reg_write(nic, NIC_PF_RX_VXLAN_DEF_0_1, + ((1ULL << 63) | UDP_VXLAN_PORT_NUM)); + nic_reg_write(nic, NIC_PF_RX_VXLAN_PROT_DEF, + ((0xfULL << 60) | vxlan_prot_def)); +} + static void nic_enable_vf(struct nicpf *nic, int vf, bool enable) { int bgx, lmac; @@ -669,18 +897,17 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) mbx_addr += sizeof(u64); } - dev_dbg(&nic->pdev->dev, "%s: Mailbox msg %d from VF%d\n", + dev_dbg(&nic->pdev->dev, "%s: Mailbox msg 0x%02x from VF%d\n", __func__, mbx.msg.msg, vf); switch (mbx.msg.msg) { case NIC_MBOX_MSG_READY: nic_mbx_send_ready(nic, vf); - if (vf < MAX_LMAC) { + if (vf < nic->num_vf_en) { nic->link[vf] = 0; nic->duplex[vf] = 0; nic->speed[vf] = 0; } - ret = 1; - break; + goto unlock; case NIC_MBOX_MSG_QS_CFG: reg_addr = NIC_PF_QSET_0_127_CFG | (mbx.qs.num << NIC_QS_ID_SHIFT); @@ -698,6 +925,15 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) (mbx.rq.qs_num << NIC_QS_ID_SHIFT) | (mbx.rq.rq_num << NIC_Q_NUM_SHIFT); nic_reg_write(nic, reg_addr, mbx.rq.cfg); + /* Enable CQE_RX2_S extension in CQE_RX descriptor. + * This gets appended by default on 81xx/83xx chips, + * for consistency enabling the same on 88xx pass2 + * where this is introduced. + */ + if (pass2_silicon(nic->pdev)) + nic_reg_write(nic, NIC_PF_RX_CFG, 0x01); + if (!pass1_silicon(nic->pdev)) + nic_enable_tunnel_parsing(nic, vf); break; case NIC_MBOX_MSG_RQ_BP_CFG: reg_addr = NIC_PF_QSET_0_127_RQ_0_7_BP_CFG | @@ -722,8 +958,10 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) nic_tx_channel_cfg(nic, mbx.qs.num, &mbx.sq); break; case NIC_MBOX_MSG_SET_MAC: - if (vf >= nic->num_vf_en) + if (vf >= nic->num_vf_en) { + ret = -1; /* NACK */ break; + } lmac = mbx.mac.vf_id; bgx = NIC_GET_BGX_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); lmac = NIC_GET_LMAC_FROM_VF_LMAC_MAP(nic->vf_lmac_map[lmac]); @@ -772,25 +1010,38 @@ static void nic_handle_mbx_intr(struct nicpf *nic, int vf) case NIC_MBOX_MSG_LOOPBACK: ret = nic_config_loopback(nic, &mbx.lbk); break; + case NIC_MBOX_MSG_RESET_STAT_COUNTER: + ret = nic_reset_stat_counters(nic, vf, &mbx.reset_stat); + break; default: dev_err(&nic->pdev->dev, "Invalid msg from VF%d, msg 0x%x\n", vf, mbx.msg.msg); break; } - if (!ret) + if (!ret) { nic_mbx_send_ack(nic, vf); - else if (mbx.msg.msg != NIC_MBOX_MSG_READY) + } else if (mbx.msg.msg != NIC_MBOX_MSG_READY) { + dev_err(&nic->pdev->dev, "NACK for MBOX 0x%02x from VF %d\n", + mbx.msg.msg, vf); nic_mbx_send_nack(nic, vf); + } unlock: nic->mbx_lock[vf] = false; } -static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) +static irqreturn_t nic_mbx_intr_handler(int irq, void *nic_irq) { + struct nicpf *nic = (struct nicpf *)nic_irq; + int mbx; u64 intr; u8 vf, vf_per_mbx_reg = 64; + if (irq == nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector) + mbx = 0; + else + mbx = 1; + intr = nic_reg_read(nic, NIC_PF_MAILBOX_INT + (mbx << 3)); dev_dbg(&nic->pdev->dev, "PF interrupt Mbox%d 0x%llx\n", mbx, intr); for (vf = 0; vf < vf_per_mbx_reg; vf++) { @@ -802,23 +1053,6 @@ static void nic_mbx_intr_handler (struct nicpf *nic, int mbx) nic_clear_mbx_intr(nic, vf, mbx); } } -} - -static irqreturn_t nic_mbx0_intr_handler (int irq, void *nic_irq) -{ - struct nicpf *nic = (struct nicpf *)nic_irq; - - nic_mbx_intr_handler(nic, 0); - - return IRQ_HANDLED; -} - -static irqreturn_t nic_mbx1_intr_handler (int irq, void *nic_irq) -{ - struct nicpf *nic = (struct nicpf *)nic_irq; - - nic_mbx_intr_handler(nic, 1); - return IRQ_HANDLED; } @@ -826,7 +1060,13 @@ static int nic_enable_msix(struct nicpf *nic) { int i, ret; - nic->num_vec = NIC_PF_MSIX_VECTORS; + nic->num_vec = pci_msix_vec_count(nic->pdev); + + nic->msix_entries = kmalloc_array(nic->num_vec, + sizeof(struct msix_entry), + GFP_KERNEL); + if (!nic->msix_entries) + return -ENOMEM; for (i = 0; i < nic->num_vec; i++) nic->msix_entries[i].entry = i; @@ -834,8 +1074,9 @@ static int nic_enable_msix(struct nicpf *nic) ret = pci_enable_msix(nic->pdev, nic->msix_entries, nic->num_vec); if (ret) { dev_err(&nic->pdev->dev, - "Request for #%d msix vectors failed\n", - nic->num_vec); + "Request for #%d msix vectors failed, returned %d\n", + nic->num_vec, ret); + kfree(nic->msix_entries); return ret; } @@ -847,6 +1088,7 @@ static void nic_disable_msix(struct nicpf *nic) { if (nic->msix_enabled) { pci_disable_msix(nic->pdev); + kfree(nic->msix_entries); nic->msix_enabled = 0; nic->num_vec = 0; } @@ -865,27 +1107,26 @@ static void nic_free_all_interrupts(struct nicpf *nic) static int nic_register_interrupts(struct nicpf *nic) { - int ret; + int i, ret; /* Enable MSI-X */ ret = nic_enable_msix(nic); if (ret) return ret; - /* Register mailbox interrupt handlers */ - ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX0].vector, - nic_mbx0_intr_handler, 0, "NIC Mbox0", nic); - if (ret) - goto fail; + /* Register mailbox interrupt handler */ + for (i = NIC_PF_INTR_ID_MBOX0; i < nic->num_vec; i++) { + sprintf(nic->irq_name[i], + "NICPF Mbox%d", (i - NIC_PF_INTR_ID_MBOX0)); - nic->irq_allocated[NIC_PF_INTR_ID_MBOX0] = true; + ret = request_irq(nic->msix_entries[i].vector, + nic_mbx_intr_handler, 0, + nic->irq_name[i], nic); + if (ret) + goto fail; - ret = request_irq(nic->msix_entries[NIC_PF_INTR_ID_MBOX1].vector, - nic_mbx1_intr_handler, 0, "NIC Mbox1", nic); - if (ret) - goto fail; - - nic->irq_allocated[NIC_PF_INTR_ID_MBOX1] = true; + nic->irq_allocated[i] = true; + } /* Enable mailbox interrupt */ nic_enable_mbx_intr(nic); @@ -894,6 +1135,7 @@ static int nic_register_interrupts(struct nicpf *nic) fail: dev_err(&nic->pdev->dev, "Request irq failed\n"); nic_free_all_interrupts(nic); + nic_disable_msix(nic); return ret; } @@ -908,6 +1150,12 @@ static int nic_num_sqs_en(struct nicpf *nic, int vf_en) int pos, sqs_per_vf = MAX_SQS_PER_VF_SINGLE_NODE; u16 total_vf; + /* Secondary Qsets are needed only if CPU count is + * morethan MAX_QUEUES_PER_QSET. + */ + if (num_online_cpus() <= MAX_QUEUES_PER_QSET) + return 0; + /* Check if its a multi-node environment */ if (nr_node_ids > 1) sqs_per_vf = MAX_SQS_PER_VF; @@ -1013,6 +1261,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!nic) return -ENOMEM; + nic->hw = devm_kzalloc(dev, sizeof(struct hw_info), GFP_KERNEL); + if (!nic->hw) { + devm_kfree(dev, nic); + return -ENOMEM; + } + pci_set_drvdata(pdev, nic); nic->pdev = pdev; @@ -1052,13 +1306,12 @@ static int nic_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->node = nic_get_node_id(pdev); - nic_set_lmac_vf_mapping(nic); - /* Initialize hardware */ - nic_init_hw(nic); + err = nic_init_hw(nic); + if (err) + goto err_release_regions; - /* Set RSS TBL size for each VF */ - nic->rss_ind_tbl_size = NIC_MAX_RSS_IDR_TBL_SIZE; + nic_set_lmac_vf_mapping(nic); /* Register interrupts */ err = nic_register_interrupts(nic); @@ -1091,6 +1344,9 @@ err_unregister_interrupts: err_release_regions: pci_release_regions(pdev); err_disable_device: + nic_free_lmacmem(nic); + devm_kfree(dev, nic->hw); + devm_kfree(dev, nic); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); return err; @@ -1111,6 +1367,11 @@ static void nic_remove(struct pci_dev *pdev) nic_unregister_interrupts(nic); pci_release_regions(pdev); + + nic_free_lmacmem(nic); + devm_kfree(&pdev->dev, nic->hw); + devm_kfree(&pdev->dev, nic); + pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } diff --git a/drivers/net/ethernet/cavium/thunder/nic_reg.h b/drivers/net/ethernet/cavium/thunder/nic_reg.h index fab35a593898..edf779f5a227 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_reg.h +++ b/drivers/net/ethernet/cavium/thunder/nic_reg.h @@ -36,6 +36,20 @@ #define NIC_PF_MAILBOX_ENA_W1C (0x0450) #define NIC_PF_MAILBOX_ENA_W1S (0x0470) #define NIC_PF_RX_ETYPE_0_7 (0x0500) +#define NIC_PF_RX_GENEVE_DEF (0x0580) +#define UDP_GENEVE_PORT_NUM 0x17C1ULL +#define NIC_PF_RX_GENEVE_PROT_DEF (0x0588) +#define IPV6_PROT 0x86DDULL +#define IPV4_PROT 0x800ULL +#define ET_PROT 0x6558ULL +#define NIC_PF_RX_NVGRE_PROT_DEF (0x0598) +#define NIC_PF_RX_VXLAN_DEF_0_1 (0x05A0) +#define UDP_VXLAN_PORT_NUM 0x12B5 +#define NIC_PF_RX_VXLAN_PROT_DEF (0x05B0) +#define IPV6_PROT_DEF 0x2ULL +#define IPV4_PROT_DEF 0x1ULL +#define ET_PROT_DEF 0x3ULL +#define NIC_PF_RX_CFG (0x05D0) #define NIC_PF_PKIND_0_15_CFG (0x0600) #define NIC_PF_ECC0_FLIP0 (0x1000) #define NIC_PF_ECC1_FLIP0 (0x1008) @@ -103,6 +117,7 @@ #define NIC_PF_SW_SYNC_RX_DONE (0x490008) #define NIC_PF_TL2_0_63_CFG (0x500000) #define NIC_PF_TL2_0_63_PRI (0x520000) +#define NIC_PF_TL2_LMAC (0x540000) #define NIC_PF_TL2_0_63_SH_STATUS (0x580000) #define NIC_PF_TL3A_0_63_CFG (0x5F0000) #define NIC_PF_TL3_0_255_CFG (0x600000) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 3240349615bd..45a13f718863 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -29,10 +29,20 @@ static const struct pci_device_id nicvf_id_table[] = { { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_NIC_VF, - PCI_VENDOR_ID_CAVIUM, 0xA134) }, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_88XX_NIC_VF) }, { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF, - PCI_VENDOR_ID_CAVIUM, 0xA11E) }, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_88XX_PASS1_NIC_VF) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_81XX_NIC_VF) }, + { PCI_DEVICE_SUB(PCI_VENDOR_ID_CAVIUM, + PCI_DEVICE_ID_THUNDER_NIC_VF, + PCI_VENDOR_ID_CAVIUM, + PCI_SUBSYS_DEVID_83XX_NIC_VF) }, { 0, } /* end of table */ }; @@ -134,15 +144,19 @@ int nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx) /* Wait for previous message to be acked, timeout 2sec */ while (!nic->pf_acked) { - if (nic->pf_nacked) + if (nic->pf_nacked) { + netdev_err(nic->netdev, + "PF NACK to mbox msg 0x%02x from VF%d\n", + (mbx->msg.msg & 0xFF), nic->vf_id); return -EINVAL; + } msleep(sleep); if (nic->pf_acked) break; timeout -= sleep; if (!timeout) { netdev_err(nic->netdev, - "PF didn't ack to mbox msg %d from VF%d\n", + "PF didn't ACK to mbox msg 0x%02x from VF%d\n", (mbx->msg.msg & 0xFF), nic->vf_id); return -EBUSY; } @@ -352,13 +366,7 @@ static int nicvf_rss_init(struct nicvf *nic) rss->enable = true; - /* Using the HW reset value for now */ - rss->key[0] = 0xFEED0BADFEED0BADULL; - rss->key[1] = 0xFEED0BADFEED0BADULL; - rss->key[2] = 0xFEED0BADFEED0BADULL; - rss->key[3] = 0xFEED0BADFEED0BADULL; - rss->key[4] = 0xFEED0BADFEED0BADULL; - + netdev_rss_key_fill(rss->key, RSS_HASH_KEY_SIZE * sizeof(u64)); nicvf_set_rss_key(nic); rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA; @@ -507,7 +515,9 @@ static int nicvf_init_resources(struct nicvf *nic) static void nicvf_snd_pkt_handler(struct net_device *netdev, struct cmp_queue *cq, - struct cqe_send_t *cqe_tx, int cqe_type) + struct cqe_send_t *cqe_tx, + int cqe_type, int budget, + unsigned int *tx_pkts, unsigned int *tx_bytes) { struct sk_buff *skb = NULL; struct nicvf *nic = netdev_priv(netdev); @@ -538,7 +548,9 @@ static void nicvf_snd_pkt_handler(struct net_device *netdev, } nicvf_put_sq_desc(sq, hdr->subdesc_cnt + 1); prefetch(skb); - dev_consume_skb_any(skb); + (*tx_pkts)++; + *tx_bytes += skb->len; + napi_consume_skb(skb, budget); sq->skbuff[cqe_tx->sqe_ptr] = (u64)NULL; } else { /* In case of SW TSO on 88xx, only last segment will have @@ -653,6 +665,7 @@ static int nicvf_cq_intr_handler(struct net_device *netdev, u8 cq_idx, struct cmp_queue *cq = &qs->cq[cq_idx]; struct cqe_rx_t *cq_desc; struct netdev_queue *txq; + unsigned int tx_pkts = 0, tx_bytes = 0; spin_lock_bh(&cq->lock); loop: @@ -691,7 +704,8 @@ loop: break; case CQE_TYPE_SEND: nicvf_snd_pkt_handler(netdev, cq, - (void *)cq_desc, CQE_TYPE_SEND); + (void *)cq_desc, CQE_TYPE_SEND, + budget, &tx_pkts, &tx_bytes); tx_done++; break; case CQE_TYPE_INVALID: @@ -720,6 +734,9 @@ done: netdev = nic->pnicvf->netdev; txq = netdev_get_tx_queue(netdev, nicvf_netdev_qidx(nic, cq_idx)); + if (tx_pkts) + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + nic = nic->pnicvf; if (netif_tx_queue_stopped(txq) && netif_carrier_ok(netdev)) { netif_tx_start_queue(txq); @@ -933,16 +950,19 @@ static int nicvf_register_interrupts(struct nicvf *nic) int vector; for_each_cq_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d CQ%d", - nic->vf_id, irq); + sprintf(nic->irq_name[irq], "%s-rxtx-%d", + nic->pnicvf->netdev->name, + nicvf_netdev_qidx(nic, irq)); for_each_sq_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d SQ%d", - nic->vf_id, irq - NICVF_INTR_ID_SQ); + sprintf(nic->irq_name[irq], "%s-sq-%d", + nic->pnicvf->netdev->name, + nicvf_netdev_qidx(nic, irq - NICVF_INTR_ID_SQ)); for_each_rbdr_irq(irq) - sprintf(nic->irq_name[irq], "NICVF%d RBDR%d", - nic->vf_id, irq - NICVF_INTR_ID_RBDR); + sprintf(nic->irq_name[irq], "%s-rbdr-%d", + nic->pnicvf->netdev->name, + nic->sqs_mode ? (nic->sqs_id + 1) : 0); /* Register CQ interrupts */ for (irq = 0; irq < nic->qs->cq_cnt; irq++) { @@ -966,8 +986,9 @@ static int nicvf_register_interrupts(struct nicvf *nic) } /* Register QS error interrupt */ - sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], - "NICVF%d Qset error", nic->vf_id); + sprintf(nic->irq_name[NICVF_INTR_ID_QS_ERR], "%s-qset-err-%d", + nic->pnicvf->netdev->name, + nic->sqs_mode ? (nic->sqs_id + 1) : 0); irq = NICVF_INTR_ID_QS_ERR; ret = request_irq(nic->msix_entries[irq].vector, nicvf_qs_err_intr_handler, @@ -1146,6 +1167,9 @@ int nicvf_stop(struct net_device *netdev) netif_tx_disable(netdev); + for (qidx = 0; qidx < netdev->num_tx_queues; qidx++) + netdev_tx_reset_queue(netdev_get_tx_queue(netdev, qidx)); + /* Free resources */ nicvf_config_data_transfer(nic, false); @@ -1196,7 +1220,7 @@ int nicvf_open(struct net_device *netdev) } /* Check if we got MAC address from PF or else generate a radom MAC */ - if (is_zero_ether_addr(netdev->dev_addr)) { + if (!nic->sqs_mode && is_zero_ether_addr(netdev->dev_addr)) { eth_hw_addr_random(netdev); nicvf_hw_set_mac_addr(nic, netdev); } @@ -1533,14 +1557,13 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_release_regions; } - qcount = MAX_CMP_QUEUES_PER_QS; + qcount = netif_get_num_default_rss_queues(); /* Restrict multiqset support only for host bound VFs */ if (pdev->is_virtfn) { /* Set max number of queues per VF */ - qcount = roundup(num_online_cpus(), MAX_CMP_QUEUES_PER_QS); - qcount = min(qcount, - (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); + qcount = min_t(int, num_online_cpus(), + (MAX_SQS_PER_VF + 1) * MAX_CMP_QUEUES_PER_QS); } netdev = alloc_etherdev_mqs(sizeof(struct nicvf), qcount, qcount); diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c index dda3ea3f3bb6..a4fc50155881 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.c @@ -479,6 +479,16 @@ void nicvf_config_vlan_stripping(struct nicvf *nic, netdev_features_t features) NIC_QSET_RQ_GEN_CFG, 0, rq_cfg); } +static void nicvf_reset_rcv_queue_stats(struct nicvf *nic) +{ + union nic_mbx mbx = {}; + + /* Reset all RXQ's stats */ + mbx.reset_stat.msg = NIC_MBOX_MSG_RESET_STAT_COUNTER; + mbx.reset_stat.rq_stat_mask = 0xFFFF; + nicvf_send_msg_to_pf(nic, &mbx); +} + /* Configures receive queue */ static void nicvf_rcv_queue_config(struct nicvf *nic, struct queue_set *qs, int qidx, bool enable) @@ -762,10 +772,10 @@ int nicvf_set_qset_resources(struct nicvf *nic) nic->qs = qs; /* Set count of each queue */ - qs->rbdr_cnt = RBDR_CNT; - qs->rq_cnt = RCV_QUEUE_CNT; - qs->sq_cnt = SND_QUEUE_CNT; - qs->cq_cnt = CMP_QUEUE_CNT; + qs->rbdr_cnt = DEFAULT_RBDR_CNT; + qs->rq_cnt = min_t(u8, MAX_RCV_QUEUES_PER_QS, num_online_cpus()); + qs->sq_cnt = min_t(u8, MAX_SND_QUEUES_PER_QS, num_online_cpus()); + qs->cq_cnt = max_t(u8, qs->rq_cnt, qs->sq_cnt); /* Set queue lengths */ qs->rbdr_len = RCV_BUF_COUNT; @@ -812,6 +822,11 @@ int nicvf_config_data_transfer(struct nicvf *nic, bool enable) nicvf_free_resources(nic); } + /* Reset RXQ's stats. + * SQ's stats will get reset automatically once SQ is reset. + */ + nicvf_reset_rcv_queue_stats(nic); + return 0; } @@ -1067,6 +1082,24 @@ static inline void nicvf_sq_add_cqe_subdesc(struct snd_queue *sq, int qentry, imm->len = 1; } +static inline void nicvf_sq_doorbell(struct nicvf *nic, struct sk_buff *skb, + int sq_num, int desc_cnt) +{ + struct netdev_queue *txq; + + txq = netdev_get_tx_queue(nic->pnicvf->netdev, + skb_get_queue_mapping(skb)); + + netdev_tx_sent_queue(txq, skb->len); + + /* make sure all memory stores are done before ringing doorbell */ + smp_wmb(); + + /* Inform HW to xmit all TSO segments */ + nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, + sq_num, desc_cnt); +} + /* Segment a TSO packet into 'gso_size' segments and append * them to SQ for transfer */ @@ -1126,12 +1159,8 @@ static int nicvf_sq_append_tso(struct nicvf *nic, struct snd_queue *sq, /* Save SKB in the last segment for freeing */ sq->skbuff[hdr_qentry] = (u64)skb; - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, desc_cnt); - /* Inform HW to xmit all TSO segments */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, desc_cnt); nic->drv_stats.tx_tso++; return 1; } @@ -1204,12 +1233,8 @@ doorbell: nicvf_sq_add_cqe_subdesc(sq, qentry, tso_sqe, skb); } - /* make sure all memory stores are done before ringing doorbell */ - smp_wmb(); + nicvf_sq_doorbell(nic, skb, sq_num, subdesc_cnt); - /* Inform HW to xmit new packet */ - nicvf_queue_reg_write(nic, NIC_QSET_SQ_0_7_DOOR, - sq_num, subdesc_cnt); return 1; append_fail: @@ -1234,13 +1259,23 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) int frag; int payload_len = 0; struct sk_buff *skb = NULL; - struct sk_buff *skb_frag = NULL; - struct sk_buff *prev_frag = NULL; + struct page *page; + int offset; u16 *rb_lens = NULL; u64 *rb_ptrs = NULL; rb_lens = (void *)cqe_rx + (3 * sizeof(u64)); - rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + /* Except 88xx pass1 on all other chips CQE_RX2_S is added to + * CQE_RX at word6, hence buffer pointers move by word + * + * Use existing 'hw_tso' flag which will be set for all chips + * except 88xx pass1 instead of a additional cache line + * access (or miss) by using pci dev's revision. + */ + if (!nic->hw_tso) + rb_ptrs = (void *)cqe_rx + (6 * sizeof(u64)); + else + rb_ptrs = (void *)cqe_rx + (7 * sizeof(u64)); netdev_dbg(nic->netdev, "%s rb_cnt %d rb0_ptr %llx rb0_sz %d\n", __func__, cqe_rx->rb_cnt, cqe_rx->rb0_ptr, cqe_rx->rb0_sz); @@ -1258,22 +1293,10 @@ struct sk_buff *nicvf_get_rcv_skb(struct nicvf *nic, struct cqe_rx_t *cqe_rx) skb_put(skb, payload_len); } else { /* Add fragments */ - skb_frag = nicvf_rb_ptr_to_skb(nic, *rb_ptrs, - payload_len); - if (!skb_frag) { - dev_kfree_skb(skb); - return NULL; - } - - if (!skb_shinfo(skb)->frag_list) - skb_shinfo(skb)->frag_list = skb_frag; - else - prev_frag->next = skb_frag; - - prev_frag = skb_frag; - skb->len += payload_len; - skb->data_len += payload_len; - skb_frag->len = payload_len; + page = virt_to_page(phys_to_virt(*rb_ptrs)); + offset = phys_to_virt(*rb_ptrs) - page_address(page); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + offset, payload_len, RCV_FRAG_LEN); } /* Next buffer pointer */ rb_ptrs++; diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h index 6673e1133523..869f3386028b 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_queues.h +++ b/drivers/net/ethernet/cavium/thunder/nicvf_queues.h @@ -57,10 +57,7 @@ #define CMP_QUEUE_SIZE6 6ULL /* 64K entries */ /* Default queue count per QS, its lengths and threshold values */ -#define RBDR_CNT 1 -#define RCV_QUEUE_CNT 8 -#define SND_QUEUE_CNT 8 -#define CMP_QUEUE_CNT 8 /* Max of RCV and SND qcount */ +#define DEFAULT_RBDR_CNT 1 #define SND_QSIZE SND_QUEUE_SIZE2 #define SND_QUEUE_LEN (1ULL << (SND_QSIZE + 10)) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 63a39ac97d53..8bbaedbb7b94 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -28,6 +28,9 @@ struct lmac { struct bgx *bgx; int dmac; u8 mac[ETH_ALEN]; + u8 lmac_type; + u8 lane_to_sds; + bool use_training; bool link_up; int lmacid; /* ID within BGX */ int lmacid_bd; /* ID on board */ @@ -43,14 +46,13 @@ struct lmac { struct bgx { u8 bgx_id; - u8 qlm_mode; struct lmac lmac[MAX_LMAC_PER_BGX]; int lmac_count; - int lmac_type; - int lane_to_sds; - int use_training; + u8 max_lmac; void __iomem *reg_base; struct pci_dev *pdev; + bool is_dlm; + bool is_rgx; }; static struct bgx *bgx_vnic[MAX_BGX_THUNDER]; @@ -61,6 +63,7 @@ static int bgx_xaui_check_link(struct lmac *lmac); /* Supported devices */ static const struct pci_device_id bgx_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_BGX) }, + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_RGX) }, { 0, } /* end of table */ }; @@ -124,8 +127,8 @@ unsigned bgx_get_map(int node) int i; unsigned map = 0; - for (i = 0; i < MAX_BGX_PER_CN88XX; i++) { - if (bgx_vnic[(node * MAX_BGX_PER_CN88XX) + i]) + for (i = 0; i < MAX_BGX_PER_NODE; i++) { + if (bgx_vnic[(node * MAX_BGX_PER_NODE) + i]) map |= (1 << i); } @@ -138,7 +141,7 @@ int bgx_get_lmac_count(int node, int bgx_idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (bgx) return bgx->lmac_count; @@ -153,7 +156,7 @@ void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status) struct bgx *bgx; struct lmac *lmac; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -166,7 +169,7 @@ EXPORT_SYMBOL(bgx_get_lmac_link_state); const u8 *bgx_get_lmac_mac(int node, int bgx_idx, int lmacid) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (bgx) return bgx->lmac[lmacid].mac; @@ -177,7 +180,7 @@ EXPORT_SYMBOL(bgx_get_lmac_mac); void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -188,11 +191,13 @@ EXPORT_SYMBOL(bgx_set_lmac_mac); void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) { - struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + struct bgx *bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; + struct lmac *lmac; u64 cfg; if (!bgx) return; + lmac = &bgx->lmac[lmacid]; cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); if (enable) @@ -200,6 +205,9 @@ void bgx_lmac_rx_tx_enable(int node, int bgx_idx, int lmacid, bool enable) else cfg &= ~(CMR_PKT_RX_EN | CMR_PKT_TX_EN); bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + if (bgx->is_rgx) + xcv_setup_link(enable ? lmac->link_up : 0, lmac->last_speed); } EXPORT_SYMBOL(bgx_lmac_rx_tx_enable); @@ -266,9 +274,12 @@ static void bgx_sgmii_change_link_state(struct lmac *lmac) port_cfg = bgx_reg_read(bgx, lmac->lmacid, BGX_GMP_GMI_PRTX_CFG); - /* renable lmac */ + /* Re-enable lmac */ cmr_cfg |= CMR_EN; bgx_reg_write(bgx, lmac->lmacid, BGX_CMRX_CFG, cmr_cfg); + + if (bgx->is_rgx && (cmr_cfg & (CMR_PKT_RX_EN | CMR_PKT_TX_EN))) + xcv_setup_link(lmac->link_up, lmac->last_speed); } static void bgx_lmac_handler(struct net_device *netdev) @@ -314,7 +325,7 @@ u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return 0; @@ -328,7 +339,7 @@ u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx) { struct bgx *bgx; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return 0; @@ -356,7 +367,7 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx, struct lmac *lmac; u64 cfg; - bgx = bgx_vnic[(node * MAX_BGX_PER_CN88XX) + bgx_idx]; + bgx = bgx_vnic[(node * MAX_BGX_PER_NODE) + bgx_idx]; if (!bgx) return; @@ -379,8 +390,9 @@ void bgx_lmac_internal_loopback(int node, int bgx_idx, } EXPORT_SYMBOL(bgx_lmac_internal_loopback); -static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) +static int bgx_lmac_sgmii_init(struct bgx *bgx, struct lmac *lmac) { + int lmacid = lmac->lmacid; u64 cfg; bgx_reg_modify(bgx, lmacid, BGX_GMP_GMI_TXX_THRESH, 0x30); @@ -409,18 +421,29 @@ static int bgx_lmac_sgmii_init(struct bgx *bgx, int lmacid) cfg |= (PCS_MRX_CTL_RST_AN | PCS_MRX_CTL_AN_EN); bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MRX_CTL, cfg); - if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, - PCS_MRX_STATUS_AN_CPT, false)) { - dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); - return -1; + if (lmac->lmac_type == BGX_MODE_QSGMII) { + /* Disable disparity check for QSGMII */ + cfg = bgx_reg_read(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL); + cfg &= ~PCS_MISC_CTL_DISP_EN; + bgx_reg_write(bgx, lmacid, BGX_GMP_PCS_MISCX_CTL, cfg); + return 0; + } + + if (lmac->lmac_type == BGX_MODE_SGMII) { + if (bgx_poll_reg(bgx, lmacid, BGX_GMP_PCS_MRX_STATUS, + PCS_MRX_STATUS_AN_CPT, false)) { + dev_err(&bgx->pdev->dev, "BGX AN_CPT not completed\n"); + return -1; + } } return 0; } -static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) +static int bgx_lmac_xaui_init(struct bgx *bgx, struct lmac *lmac) { u64 cfg; + int lmacid = lmac->lmacid; /* Reset SPU */ bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_RESET); @@ -436,12 +459,14 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) bgx_reg_modify(bgx, lmacid, BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); /* Set interleaved running disparity for RXAUI */ - if (bgx->lmac_type != BGX_MODE_RXAUI) - bgx_reg_modify(bgx, lmacid, - BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); - else + if (lmac->lmac_type == BGX_MODE_RXAUI) bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, - SPU_MISC_CTL_RX_DIS | SPU_MISC_CTL_INTLV_RDISP); + SPU_MISC_CTL_INTLV_RDISP); + + /* Clear receive packet disable */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); + cfg &= ~SPU_MISC_CTL_RX_DIS; + bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); /* clear all interrupts */ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_INT); @@ -451,7 +476,7 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); bgx_reg_write(bgx, lmacid, BGX_SPUX_INT, cfg); - if (bgx->use_training) { + if (lmac->use_training) { bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LP_CUP, 0x00); bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_CUP, 0x00); bgx_reg_write(bgx, lmacid, BGX_SPUX_BR_PMD_LD_REP, 0x00); @@ -474,9 +499,9 @@ static int bgx_lmac_xaui_init(struct bgx *bgx, int lmacid, int lmac_type) bgx_reg_write(bgx, lmacid, BGX_SPUX_AN_CONTROL, cfg); cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_AN_ADV); - if (bgx->lmac_type == BGX_MODE_10G_KR) + if (lmac->lmac_type == BGX_MODE_10G_KR) cfg |= (1 << 23); - else if (bgx->lmac_type == BGX_MODE_40G_KR) + else if (lmac->lmac_type == BGX_MODE_40G_KR) cfg |= (1 << 24); else cfg &= ~((1 << 23) | (1 << 24)); @@ -511,11 +536,10 @@ static int bgx_xaui_check_link(struct lmac *lmac) { struct bgx *bgx = lmac->bgx; int lmacid = lmac->lmacid; - int lmac_type = bgx->lmac_type; + int lmac_type = lmac->lmac_type; u64 cfg; - bgx_reg_modify(bgx, lmacid, BGX_SPUX_MISC_CONTROL, SPU_MISC_CTL_RX_DIS); - if (bgx->use_training) { + if (lmac->use_training) { cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); if (!(cfg & (1ull << 13))) { cfg = (1ull << 13) | (1ull << 14); @@ -556,7 +580,7 @@ static int bgx_xaui_check_link(struct lmac *lmac) BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); - if (bgx->use_training) { + if (lmac->use_training) { cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_INT); if (!(cfg & (1ull << 13))) { cfg = (1ull << 13) | (1ull << 14); @@ -584,11 +608,6 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - /* Clear receive packet disable */ - cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); - cfg &= ~SPU_MISC_CTL_RX_DIS; - bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); - /* Check for MAC RX faults */ cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL); /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */ @@ -599,7 +618,7 @@ static int bgx_xaui_check_link(struct lmac *lmac) /* Rx local/remote fault seen. * Do lmac reinit to see if condition recovers */ - bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type); + bgx_lmac_xaui_init(bgx, lmac); return -1; } @@ -623,7 +642,7 @@ static void bgx_poll_for_link(struct work_struct *work) if ((spu_link & SPU_STATUS1_RCV_LNK) && !(smu_link & SMU_RX_CTL_STATUS)) { lmac->link_up = 1; - if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) + if (lmac->lmac_type == BGX_MODE_XLAUI) lmac->last_speed = 40000; else lmac->last_speed = 10000; @@ -649,6 +668,16 @@ static void bgx_poll_for_link(struct work_struct *work) queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); } +static int phy_interface_mode(u8 lmac_type) +{ + if (lmac_type == BGX_MODE_QSGMII) + return PHY_INTERFACE_MODE_QSGMII; + if (lmac_type == BGX_MODE_RGMII) + return PHY_INTERFACE_MODE_RGMII; + + return PHY_INTERFACE_MODE_SGMII; +} + static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) { struct lmac *lmac; @@ -657,13 +686,15 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) lmac = &bgx->lmac[lmacid]; lmac->bgx = bgx; - if (bgx->lmac_type == BGX_MODE_SGMII) { + if ((lmac->lmac_type == BGX_MODE_SGMII) || + (lmac->lmac_type == BGX_MODE_QSGMII) || + (lmac->lmac_type == BGX_MODE_RGMII)) { lmac->is_sgmii = 1; - if (bgx_lmac_sgmii_init(bgx, lmacid)) + if (bgx_lmac_sgmii_init(bgx, lmac)) return -1; } else { lmac->is_sgmii = 0; - if (bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type)) + if (bgx_lmac_xaui_init(bgx, lmac)) return -1; } @@ -685,10 +716,10 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) /* Restore default cfg, incase low level firmware changed it */ bgx_reg_write(bgx, lmacid, BGX_CMRX_RX_DMAC_CTL, 0x03); - if ((bgx->lmac_type != BGX_MODE_XFI) && - (bgx->lmac_type != BGX_MODE_XLAUI) && - (bgx->lmac_type != BGX_MODE_40G_KR) && - (bgx->lmac_type != BGX_MODE_10G_KR)) { + if ((lmac->lmac_type != BGX_MODE_XFI) && + (lmac->lmac_type != BGX_MODE_XLAUI) && + (lmac->lmac_type != BGX_MODE_40G_KR) && + (lmac->lmac_type != BGX_MODE_10G_KR)) { if (!lmac->phydev) return -ENODEV; @@ -696,7 +727,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) if (phy_connect_direct(&lmac->netdev, lmac->phydev, bgx_lmac_handler, - PHY_INTERFACE_MODE_SGMII)) + phy_interface_mode(lmac->lmac_type))) return -ENODEV; phy_start_aneg(lmac->phydev); @@ -753,76 +784,19 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) bgx_flush_dmac_addrs(bgx, lmacid); - if ((bgx->lmac_type != BGX_MODE_XFI) && - (bgx->lmac_type != BGX_MODE_XLAUI) && - (bgx->lmac_type != BGX_MODE_40G_KR) && - (bgx->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) + if ((lmac->lmac_type != BGX_MODE_XFI) && + (lmac->lmac_type != BGX_MODE_XLAUI) && + (lmac->lmac_type != BGX_MODE_40G_KR) && + (lmac->lmac_type != BGX_MODE_10G_KR) && lmac->phydev) phy_disconnect(lmac->phydev); lmac->phydev = NULL; } -static void bgx_set_num_ports(struct bgx *bgx) -{ - u64 lmac_count; - - switch (bgx->qlm_mode) { - case QLM_MODE_SGMII: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_SGMII; - bgx->lane_to_sds = 0; - break; - case QLM_MODE_XAUI_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_XAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_RXAUI_2X2: - bgx->lmac_count = 2; - bgx->lmac_type = BGX_MODE_RXAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_XFI_4X1: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_XFI; - bgx->lane_to_sds = 0; - break; - case QLM_MODE_XLAUI_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_XLAUI; - bgx->lane_to_sds = 0xE4; - break; - case QLM_MODE_10G_KR_4X1: - bgx->lmac_count = 4; - bgx->lmac_type = BGX_MODE_10G_KR; - bgx->lane_to_sds = 0; - bgx->use_training = 1; - break; - case QLM_MODE_40G_KR4_1X4: - bgx->lmac_count = 1; - bgx->lmac_type = BGX_MODE_40G_KR; - bgx->lane_to_sds = 0xE4; - bgx->use_training = 1; - break; - default: - bgx->lmac_count = 0; - break; - } - - /* Check if low level firmware has programmed LMAC count - * based on board type, if yes consider that otherwise - * the default static values - */ - lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; - if (lmac_count != 4) - bgx->lmac_count = lmac_count; -} - static void bgx_init_hw(struct bgx *bgx) { int i; - - bgx_set_num_ports(bgx); + struct lmac *lmac; bgx_reg_modify(bgx, 0, BGX_CMR_GLOBAL_CFG, CMR_GLOBAL_CFG_FCS_STRIP); if (bgx_reg_read(bgx, 0, BGX_CMR_BIST_STATUS)) @@ -830,17 +804,9 @@ static void bgx_init_hw(struct bgx *bgx) /* Set lmac type and lane2serdes mapping */ for (i = 0; i < bgx->lmac_count; i++) { - if (bgx->lmac_type == BGX_MODE_RXAUI) { - if (i) - bgx->lane_to_sds = 0x0e; - else - bgx->lane_to_sds = 0x04; - bgx_reg_write(bgx, i, BGX_CMRX_CFG, - (bgx->lmac_type << 8) | bgx->lane_to_sds); - continue; - } + lmac = &bgx->lmac[i]; bgx_reg_write(bgx, i, BGX_CMRX_CFG, - (bgx->lmac_type << 8) | (bgx->lane_to_sds + i)); + (lmac->lmac_type << 8) | lmac->lane_to_sds); bgx->lmac[i].lmacid_bd = lmac_count; lmac_count++; } @@ -863,55 +829,212 @@ static void bgx_init_hw(struct bgx *bgx) bgx_reg_write(bgx, 0, BGX_CMR_RX_STREERING + (i * 8), 0x00); } -static void bgx_get_qlm_mode(struct bgx *bgx) +static u8 bgx_get_lane2sds_cfg(struct bgx *bgx, struct lmac *lmac) +{ + return (u8)(bgx_reg_read(bgx, lmac->lmacid, BGX_CMRX_CFG) & 0xFF); +} + +static void bgx_print_qlm_mode(struct bgx *bgx, u8 lmacid) { struct device *dev = &bgx->pdev->dev; - int lmac_type; - int train_en; + struct lmac *lmac; + char str[20]; + u8 dlm; - /* Read LMAC0 type to figure out QLM mode - * This is configured by low level firmware - */ - lmac_type = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); - lmac_type = (lmac_type >> 8) & 0x07; + if (lmacid > bgx->max_lmac) + return; - train_en = bgx_reg_read(bgx, 0, BGX_SPUX_BR_PMD_CRTL) & - SPU_PMD_CRTL_TRAIN_EN; + lmac = &bgx->lmac[lmacid]; + dlm = (lmacid / 2) + (bgx->bgx_id * 2); + if (!bgx->is_dlm) + sprintf(str, "BGX%d QLM mode", bgx->bgx_id); + else + sprintf(str, "BGX%d DLM%d mode", bgx->bgx_id, dlm); - switch (lmac_type) { + switch (lmac->lmac_type) { case BGX_MODE_SGMII: - bgx->qlm_mode = QLM_MODE_SGMII; - dev_info(dev, "BGX%d QLM mode: SGMII\n", bgx->bgx_id); + dev_info(dev, "%s: SGMII\n", (char *)str); break; case BGX_MODE_XAUI: - bgx->qlm_mode = QLM_MODE_XAUI_1X4; - dev_info(dev, "BGX%d QLM mode: XAUI\n", bgx->bgx_id); + dev_info(dev, "%s: XAUI\n", (char *)str); break; case BGX_MODE_RXAUI: - bgx->qlm_mode = QLM_MODE_RXAUI_2X2; - dev_info(dev, "BGX%d QLM mode: RXAUI\n", bgx->bgx_id); + dev_info(dev, "%s: RXAUI\n", (char *)str); break; case BGX_MODE_XFI: - if (!train_en) { - bgx->qlm_mode = QLM_MODE_XFI_4X1; - dev_info(dev, "BGX%d QLM mode: XFI\n", bgx->bgx_id); - } else { - bgx->qlm_mode = QLM_MODE_10G_KR_4X1; - dev_info(dev, "BGX%d QLM mode: 10G_KR\n", bgx->bgx_id); - } + if (!lmac->use_training) + dev_info(dev, "%s: XFI\n", (char *)str); + else + dev_info(dev, "%s: 10G_KR\n", (char *)str); break; case BGX_MODE_XLAUI: - if (!train_en) { - bgx->qlm_mode = QLM_MODE_XLAUI_1X4; - dev_info(dev, "BGX%d QLM mode: XLAUI\n", bgx->bgx_id); - } else { - bgx->qlm_mode = QLM_MODE_40G_KR4_1X4; - dev_info(dev, "BGX%d QLM mode: 40G_KR4\n", bgx->bgx_id); - } + if (!lmac->use_training) + dev_info(dev, "%s: XLAUI\n", (char *)str); + else + dev_info(dev, "%s: 40G_KR4\n", (char *)str); + break; + case BGX_MODE_QSGMII: + if ((lmacid == 0) && + (bgx_get_lane2sds_cfg(bgx, lmac) != lmacid)) + return; + if ((lmacid == 2) && + (bgx_get_lane2sds_cfg(bgx, lmac) == lmacid)) + return; + dev_info(dev, "%s: QSGMII\n", (char *)str); + break; + case BGX_MODE_RGMII: + dev_info(dev, "%s: RGMII\n", (char *)str); + break; + case BGX_MODE_INVALID: + /* Nothing to do */ + break; + } +} + +static void lmac_set_lane2sds(struct bgx *bgx, struct lmac *lmac) +{ + switch (lmac->lmac_type) { + case BGX_MODE_SGMII: + case BGX_MODE_XFI: + lmac->lane_to_sds = lmac->lmacid; + break; + case BGX_MODE_XAUI: + case BGX_MODE_XLAUI: + case BGX_MODE_RGMII: + lmac->lane_to_sds = 0xE4; + break; + case BGX_MODE_RXAUI: + lmac->lane_to_sds = (lmac->lmacid) ? 0xE : 0x4; + break; + case BGX_MODE_QSGMII: + /* There is no way to determine if DLM0/2 is QSGMII or + * DLM1/3 is configured to QSGMII as bootloader will + * configure all LMACs, so take whatever is configured + * by low level firmware. + */ + lmac->lane_to_sds = bgx_get_lane2sds_cfg(bgx, lmac); break; default: - bgx->qlm_mode = QLM_MODE_SGMII; - dev_info(dev, "BGX%d QLM default mode: SGMII\n", bgx->bgx_id); + lmac->lane_to_sds = 0; + break; + } +} + +static void lmac_set_training(struct bgx *bgx, struct lmac *lmac, int lmacid) +{ + if ((lmac->lmac_type != BGX_MODE_10G_KR) && + (lmac->lmac_type != BGX_MODE_40G_KR)) { + lmac->use_training = 0; + return; + } + + lmac->use_training = bgx_reg_read(bgx, lmacid, BGX_SPUX_BR_PMD_CRTL) & + SPU_PMD_CRTL_TRAIN_EN; +} + +static void bgx_set_lmac_config(struct bgx *bgx, u8 idx) +{ + struct lmac *lmac; + struct lmac *olmac; + u64 cmr_cfg; + u8 lmac_type; + u8 lane_to_sds; + + lmac = &bgx->lmac[idx]; + + if (!bgx->is_dlm || bgx->is_rgx) { + /* Read LMAC0 type to figure out QLM mode + * This is configured by low level firmware + */ + cmr_cfg = bgx_reg_read(bgx, 0, BGX_CMRX_CFG); + lmac->lmac_type = (cmr_cfg >> 8) & 0x07; + if (bgx->is_rgx) + lmac->lmac_type = BGX_MODE_RGMII; + lmac_set_training(bgx, lmac, 0); + lmac_set_lane2sds(bgx, lmac); + return; + } + + /* On 81xx BGX can be split across 2 DLMs + * firmware programs lmac_type of LMAC0 and LMAC2 + */ + if ((idx == 0) || (idx == 2)) { + cmr_cfg = bgx_reg_read(bgx, idx, BGX_CMRX_CFG); + lmac_type = (u8)((cmr_cfg >> 8) & 0x07); + lane_to_sds = (u8)(cmr_cfg & 0xFF); + /* Check if config is not reset value */ + if ((lmac_type == 0) && (lane_to_sds == 0xE4)) + lmac->lmac_type = BGX_MODE_INVALID; + else + lmac->lmac_type = lmac_type; + lmac_set_training(bgx, lmac, lmac->lmacid); + lmac_set_lane2sds(bgx, lmac); + + /* Set LMAC type of other lmac on same DLM i.e LMAC 1/3 */ + olmac = &bgx->lmac[idx + 1]; + olmac->lmac_type = lmac->lmac_type; + lmac_set_training(bgx, olmac, olmac->lmacid); + lmac_set_lane2sds(bgx, olmac); + } +} + +static bool is_dlm0_in_bgx_mode(struct bgx *bgx) +{ + struct lmac *lmac; + + if (!bgx->is_dlm) + return true; + + lmac = &bgx->lmac[0]; + if (lmac->lmac_type == BGX_MODE_INVALID) + return false; + + return true; +} + +static void bgx_get_qlm_mode(struct bgx *bgx) +{ + struct lmac *lmac; + struct lmac *lmac01; + struct lmac *lmac23; + u8 idx; + + /* Init all LMAC's type to invalid */ + for (idx = 0; idx < bgx->max_lmac; idx++) { + lmac = &bgx->lmac[idx]; + lmac->lmacid = idx; + lmac->lmac_type = BGX_MODE_INVALID; + lmac->use_training = false; + } + + /* It is assumed that low level firmware sets this value */ + bgx->lmac_count = bgx_reg_read(bgx, 0, BGX_CMR_RX_LMACS) & 0x7; + if (bgx->lmac_count > bgx->max_lmac) + bgx->lmac_count = bgx->max_lmac; + + for (idx = 0; idx < bgx->max_lmac; idx++) + bgx_set_lmac_config(bgx, idx); + + if (!bgx->is_dlm || bgx->is_rgx) { + bgx_print_qlm_mode(bgx, 0); + return; + } + + if (bgx->lmac_count) { + bgx_print_qlm_mode(bgx, 0); + bgx_print_qlm_mode(bgx, 2); + } + + /* If DLM0 is not in BGX mode then LMAC0/1 have + * to be configured with serdes lanes of DLM1 + */ + if (is_dlm0_in_bgx_mode(bgx) || (bgx->lmac_count > 2)) + return; + for (idx = 0; idx < bgx->lmac_count; idx++) { + lmac01 = &bgx->lmac[idx]; + lmac23 = &bgx->lmac[idx + 2]; + lmac01->lmac_type = lmac23->lmac_type; + lmac01->lane_to_sds = lmac23->lane_to_sds; } } @@ -1042,7 +1165,7 @@ static int bgx_init_of_phy(struct bgx *bgx) } lmac++; - if (lmac == MAX_LMAC_PER_BGX) { + if (lmac == bgx->max_lmac) { of_node_put(node); break; } @@ -1087,6 +1210,7 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct device *dev = &pdev->dev; struct bgx *bgx = NULL; u8 lmac; + u16 sdevid; bgx = devm_kzalloc(dev, sizeof(*bgx), GFP_KERNEL); if (!bgx) @@ -1115,10 +1239,30 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto err_release_regions; } - bgx->bgx_id = (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; - bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_CN88XX; - bgx_vnic[bgx->bgx_id] = bgx; + pci_read_config_word(pdev, PCI_DEVICE_ID, &sdevid); + if (sdevid != PCI_DEVICE_ID_THUNDER_RGX) { + bgx->bgx_id = + (pci_resource_start(pdev, PCI_CFG_REG_BAR_NUM) >> 24) & 1; + bgx->bgx_id += nic_get_node_id(pdev) * MAX_BGX_PER_NODE; + bgx->max_lmac = MAX_LMAC_PER_BGX; + bgx_vnic[bgx->bgx_id] = bgx; + } else { + bgx->is_rgx = true; + bgx->max_lmac = 1; + bgx->bgx_id = MAX_BGX_PER_CN81XX - 1; + bgx_vnic[bgx->bgx_id] = bgx; + xcv_init_hw(); + } + + /* On 81xx all are DLMs and on 83xx there are 3 BGX QLMs and one + * BGX i.e BGX2 can be split across 2 DLMs. + */ + pci_read_config_word(pdev, PCI_SUBSYSTEM_ID, &sdevid); + if ((sdevid == PCI_SUBSYS_DEVID_81XX_BGX) || + ((sdevid == PCI_SUBSYS_DEVID_83XX_BGX) && (bgx->bgx_id == 2))) + bgx->is_dlm = true; + bgx_get_qlm_mode(bgx); err = bgx_init_phy(bgx); @@ -1133,6 +1277,8 @@ static int bgx_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (err) { dev_err(dev, "BGX%d failed to enable lmac%d\n", bgx->bgx_id, lmac); + while (lmac) + bgx_lmac_disable(bgx, --lmac); goto err_enable; } } diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 42010d2e5ddf..d59c71e4a000 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -9,8 +9,20 @@ #ifndef THUNDER_BGX_H #define THUNDER_BGX_H -#define MAX_BGX_THUNDER 8 /* Max 4 nodes, 2 per node */ +/* PCI device ID */ +#define PCI_DEVICE_ID_THUNDER_BGX 0xA026 +#define PCI_DEVICE_ID_THUNDER_RGX 0xA054 + +/* Subsystem device IDs */ +#define PCI_SUBSYS_DEVID_88XX_BGX 0xA126 +#define PCI_SUBSYS_DEVID_81XX_BGX 0xA226 +#define PCI_SUBSYS_DEVID_83XX_BGX 0xA326 + +#define MAX_BGX_THUNDER 8 /* Max 2 nodes, 4 per node */ #define MAX_BGX_PER_CN88XX 2 +#define MAX_BGX_PER_CN81XX 3 /* 2 BGXs + 1 RGX */ +#define MAX_BGX_PER_CN83XX 4 +#define MAX_BGX_PER_NODE 4 #define MAX_LMAC_PER_BGX 4 #define MAX_BGX_CHANS_PER_LMAC 16 #define MAX_DMAC_PER_LMAC 8 @@ -18,8 +30,6 @@ #define MAX_DMAC_PER_LMAC_TNS_BYPASS_MODE 2 -#define MAX_LMAC (MAX_BGX_PER_CN88XX * MAX_LMAC_PER_BGX) - /* Registers */ #define BGX_CMRX_CFG 0x00 #define CMR_PKT_TX_EN BIT_ULL(13) @@ -136,6 +146,7 @@ #define BGX_GMP_PCS_ANX_AN_RESULTS 0x30020 #define BGX_GMP_PCS_SGM_AN_ADV 0x30068 #define BGX_GMP_PCS_MISCX_CTL 0x30078 +#define PCS_MISC_CTL_DISP_EN BIT_ULL(13) #define PCS_MISC_CTL_GMX_ENO BIT_ULL(11) #define PCS_MISC_CTL_SAMP_PT_MASK 0x7Full #define BGX_GMP_GMI_PRTX_CFG 0x38020 @@ -194,6 +205,9 @@ void bgx_set_lmac_mac(int node, int bgx_idx, int lmacid, const u8 *mac); void bgx_get_lmac_link_state(int node, int bgx_idx, int lmacid, void *status); void bgx_lmac_internal_loopback(int node, int bgx_idx, int lmac_idx, bool enable); +void xcv_init_hw(void); +void xcv_setup_link(bool link_up, int link_speed); + u64 bgx_get_rx_stats(int node, int bgx_idx, int lmac, int idx); u64 bgx_get_tx_stats(int node, int bgx_idx, int lmac, int idx); #define BGX_RX_STATS_COUNT 11 @@ -213,16 +227,9 @@ enum LMAC_TYPE { BGX_MODE_XLAUI = 4, /* 4 lanes, 10.3125 Gbaud */ BGX_MODE_10G_KR = 3,/* 1 lane, 10.3125 Gbaud */ BGX_MODE_40G_KR = 4,/* 4 lanes, 10.3125 Gbaud */ -}; - -enum qlm_mode { - QLM_MODE_SGMII, /* SGMII, each lane independent */ - QLM_MODE_XAUI_1X4, /* 1 XAUI or DXAUI, 4 lanes */ - QLM_MODE_RXAUI_2X2, /* 2 RXAUI, 2 lanes each */ - QLM_MODE_XFI_4X1, /* 4 XFI, 1 lane each */ - QLM_MODE_XLAUI_1X4, /* 1 XLAUI, 4 lanes each */ - QLM_MODE_10G_KR_4X1, /* 4 10GBASE-KR, 1 lane each */ - QLM_MODE_40G_KR4_1X4, /* 1 40GBASE-KR4, 4 lanes each */ + BGX_MODE_RGMII = 5, + BGX_MODE_QSGMII = 6, + BGX_MODE_INVALID = 7, }; #endif /* THUNDER_BGX_H */ diff --git a/drivers/net/ethernet/cavium/thunder/thunder_xcv.c b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c new file mode 100644 index 000000000000..67befedef709 --- /dev/null +++ b/drivers/net/ethernet/cavium/thunder/thunder_xcv.c @@ -0,0 +1,235 @@ +/* + * Copyright (C) 2016 Cavium, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "nic.h" +#include "thunder_bgx.h" + +#define DRV_NAME "thunder-xcv" +#define DRV_VERSION "1.0" + +/* Register offsets */ +#define XCV_RESET 0x00 +#define PORT_EN BIT_ULL(63) +#define CLK_RESET BIT_ULL(15) +#define DLL_RESET BIT_ULL(11) +#define COMP_EN BIT_ULL(7) +#define TX_PKT_RESET BIT_ULL(3) +#define TX_DATA_RESET BIT_ULL(2) +#define RX_PKT_RESET BIT_ULL(1) +#define RX_DATA_RESET BIT_ULL(0) +#define XCV_DLL_CTL 0x10 +#define CLKRX_BYP BIT_ULL(23) +#define CLKTX_BYP BIT_ULL(15) +#define XCV_COMP_CTL 0x20 +#define DRV_BYP BIT_ULL(63) +#define XCV_CTL 0x30 +#define XCV_INT 0x40 +#define XCV_INT_W1S 0x48 +#define XCV_INT_ENA_W1C 0x50 +#define XCV_INT_ENA_W1S 0x58 +#define XCV_INBND_STATUS 0x80 +#define XCV_BATCH_CRD_RET 0x100 + +struct xcv { + void __iomem *reg_base; + struct pci_dev *pdev; +}; + +static struct xcv *xcv; + +/* Supported devices */ +static const struct pci_device_id xcv_id_table[] = { + { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xA056) }, + { 0, } /* end of table */ +}; + +MODULE_AUTHOR("Cavium Inc"); +MODULE_DESCRIPTION("Cavium Thunder RGX/XCV Driver"); +MODULE_LICENSE("GPL v2"); +MODULE_VERSION(DRV_VERSION); +MODULE_DEVICE_TABLE(pci, xcv_id_table); + +void xcv_init_hw(void) +{ + u64 cfg; + + /* Take DLL out of reset */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~DLL_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Take clock tree out of reset */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~CLK_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + /* Wait for DLL to lock */ + msleep(1); + + /* Configure DLL - enable or bypass + * TX no bypass, RX bypass + */ + cfg = readq_relaxed(xcv->reg_base + XCV_DLL_CTL); + cfg &= ~0xFF03; + cfg |= CLKRX_BYP; + writeq_relaxed(cfg, xcv->reg_base + XCV_DLL_CTL); + + /* Enable compensation controller and force the + * write to be visible to HW by readig back. + */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= COMP_EN; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + readq_relaxed(xcv->reg_base + XCV_RESET); + /* Wait for compensation state machine to lock */ + msleep(10); + + /* enable the XCV block */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= PORT_EN; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= CLK_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); +} +EXPORT_SYMBOL(xcv_init_hw); + +void xcv_setup_link(bool link_up, int link_speed) +{ + u64 cfg; + int speed = 2; + + if (!xcv) { + dev_err(&xcv->pdev->dev, + "XCV init not done, probe may have failed\n"); + return; + } + + if (link_speed == 100) + speed = 1; + else if (link_speed == 10) + speed = 0; + + if (link_up) { + /* set operating speed */ + cfg = readq_relaxed(xcv->reg_base + XCV_CTL); + cfg &= ~0x03; + cfg |= speed; + writeq_relaxed(cfg, xcv->reg_base + XCV_CTL); + + /* Reset datapaths */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= TX_DATA_RESET | RX_DATA_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Enable the packet flow */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg |= TX_PKT_RESET | RX_PKT_RESET; + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + + /* Return credits to RGX */ + writeq_relaxed(0x01, xcv->reg_base + XCV_BATCH_CRD_RET); + } else { + /* Disable packet flow */ + cfg = readq_relaxed(xcv->reg_base + XCV_RESET); + cfg &= ~(TX_PKT_RESET | RX_PKT_RESET); + writeq_relaxed(cfg, xcv->reg_base + XCV_RESET); + readq_relaxed(xcv->reg_base + XCV_RESET); + } +} +EXPORT_SYMBOL(xcv_setup_link); + +static int xcv_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + int err; + struct device *dev = &pdev->dev; + + xcv = devm_kzalloc(dev, sizeof(struct xcv), GFP_KERNEL); + if (!xcv) + return -ENOMEM; + xcv->pdev = pdev; + + pci_set_drvdata(pdev, xcv); + + err = pci_enable_device(pdev); + if (err) { + dev_err(dev, "Failed to enable PCI device\n"); + goto err_kfree; + } + + err = pci_request_regions(pdev, DRV_NAME); + if (err) { + dev_err(dev, "PCI request regions failed 0x%x\n", err); + goto err_disable_device; + } + + /* MAP configuration registers */ + xcv->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0); + if (!xcv->reg_base) { + dev_err(dev, "XCV: Cannot map CSR memory space, aborting\n"); + err = -ENOMEM; + goto err_release_regions; + } + + return 0; + +err_release_regions: + pci_release_regions(pdev); +err_disable_device: + pci_disable_device(pdev); +err_kfree: + devm_kfree(dev, xcv); + xcv = NULL; + return err; +} + +static void xcv_remove(struct pci_dev *pdev) +{ + struct device *dev = &pdev->dev; + + if (xcv) { + devm_kfree(dev, xcv); + xcv = NULL; + } + + pci_release_regions(pdev); + pci_disable_device(pdev); +} + +static struct pci_driver xcv_driver = { + .name = DRV_NAME, + .id_table = xcv_id_table, + .probe = xcv_probe, + .remove = xcv_remove, +}; + +static int __init xcv_init_module(void) +{ + pr_info("%s, ver %s\n", DRV_NAME, DRV_VERSION); + + return pci_register_driver(&xcv_driver); +} + +static void __exit xcv_cleanup_module(void) +{ + pci_unregister_driver(&xcv_driver); +} + +module_init(xcv_init_module); +module_exit(xcv_cleanup_module); diff --git a/drivers/net/ethernet/chelsio/cxgb4/Makefile b/drivers/net/ethernet/chelsio/cxgb4/Makefile index ace0ab98d0f1..c6b71f656992 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/Makefile +++ b/drivers/net/ethernet/chelsio/cxgb4/Makefile @@ -4,7 +4,7 @@ obj-$(CONFIG_CHELSIO_T4) += cxgb4.o -cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o +cxgb4-objs := cxgb4_main.o l2t.o t4_hw.o sge.o clip_tbl.o cxgb4_ethtool.o cxgb4_uld.o sched.o cxgb4_filter.o cxgb4_tc_u32.o cxgb4-$(CONFIG_CHELSIO_T4_DCB) += cxgb4_dcb.o cxgb4-$(CONFIG_CHELSIO_T4_FCOE) += cxgb4_fcoe.o cxgb4-$(CONFIG_DEBUG_FS) += cxgb4_debugfs.o diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index edd23386b47d..28e653e9c856 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -53,6 +53,8 @@ #include "cxgb4_uld.h" #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__) +extern struct list_head adapter_list; +extern struct mutex uld_mutex; enum { MAX_NPORTS = 4, /* max # of ports */ @@ -338,12 +340,14 @@ struct adapter_params { enum chip_type chip; /* chip code */ struct arch_specific_params arch; /* chip specific params */ unsigned char offload; + unsigned char crypto; /* HW capability for crypto */ unsigned char bypass; unsigned int ofldq_wr_cred; bool ulptx_memwrite_dsgl; /* use of T5 DSGL allowed */ + unsigned int nsched_cls; /* number of traffic classes */ unsigned int max_ordird_qp; /* Max read depth per RDMA QP */ unsigned int max_ird_adapter; /* Max read depth per adapter */ }; @@ -403,7 +407,6 @@ struct fw_info { struct fw_hdr fw_hdr; }; - struct trace_params { u32 data[TRACE_LEN / 4]; u32 mask[TRACE_LEN / 4]; @@ -434,11 +437,6 @@ enum { MAX_ETH_QSETS = 32, /* # of Ethernet Tx/Rx queue sets */ MAX_OFLD_QSETS = 16, /* # of offload Tx, iscsi Rx queue sets */ MAX_CTRL_QUEUES = NCHAN, /* # of control Tx queues */ - MAX_RDMA_QUEUES = NCHAN, /* # of streaming RDMA Rx queues */ - MAX_RDMA_CIQS = 32, /* # of RDMA concentrator IQs */ - - /* # of streaming iSCSIT Rx queues */ - MAX_ISCSIT_QUEUES = MAX_OFLD_QSETS, }; enum { @@ -455,8 +453,7 @@ enum { enum { INGQ_EXTRAS = 2, /* firmware event queue and */ /* forwarded interrupts */ - MAX_INGQ = MAX_ETH_QSETS + MAX_OFLD_QSETS + MAX_RDMA_QUEUES + - MAX_RDMA_CIQS + MAX_ISCSIT_QUEUES + INGQ_EXTRAS, + MAX_INGQ = MAX_ETH_QSETS + INGQ_EXTRAS, }; struct adapter; @@ -493,6 +490,7 @@ struct port_info { #endif /* CONFIG_CHELSIO_T4_FCOE */ bool rxtstamp; /* Enable TS */ struct hwtstamp_config tstamp_config; + struct sched_table *sched_tbl; }; struct dentry; @@ -510,6 +508,10 @@ enum { /* adapter flags */ FW_OFLD_CONN = (1 << 9), }; +enum { + ULP_CRYPTO_LOOKASIDE = 1 << 0, +}; + struct rx_sw_desc; struct sge_fl { /* SGE free-buffer queue state */ @@ -680,17 +682,24 @@ struct sge_ctrl_txq { /* state for an SGE control Tx queue */ u8 full; /* the Tx ring is full */ } ____cacheline_aligned_in_smp; +struct sge_uld_rxq_info { + char name[IFNAMSIZ]; /* name of ULD driver */ + struct sge_ofld_rxq *uldrxq; /* Rxq's for ULD */ + u16 *msix_tbl; /* msix_tbl for uld */ + u16 *rspq_id; /* response queue id's of rxq */ + u16 nrxq; /* # of ingress uld queues */ + u16 nciq; /* # of completion queues */ + u8 uld; /* uld type */ +}; + struct sge { struct sge_eth_txq ethtxq[MAX_ETH_QSETS]; struct sge_ofld_txq ofldtxq[MAX_OFLD_QSETS]; struct sge_ctrl_txq ctrlq[MAX_CTRL_QUEUES]; struct sge_eth_rxq ethrxq[MAX_ETH_QSETS]; - struct sge_ofld_rxq iscsirxq[MAX_OFLD_QSETS]; - struct sge_ofld_rxq iscsitrxq[MAX_ISCSIT_QUEUES]; - struct sge_ofld_rxq rdmarxq[MAX_RDMA_QUEUES]; - struct sge_ofld_rxq rdmaciq[MAX_RDMA_CIQS]; struct sge_rspq fw_evtq ____cacheline_aligned_in_smp; + struct sge_uld_rxq_info **uld_rxq_info; struct sge_rspq intrq ____cacheline_aligned_in_smp; spinlock_t intrq_lock; @@ -698,14 +707,8 @@ struct sge { u16 max_ethqsets; /* # of available Ethernet queue sets */ u16 ethqsets; /* # of active Ethernet queue sets */ u16 ethtxq_rover; /* Tx queue to clean up next */ - u16 iscsiqsets; /* # of active iSCSI queue sets */ - u16 niscsitq; /* # of available iSCST Rx queues */ - u16 rdmaqs; /* # of available RDMA Rx queues */ - u16 rdmaciqs; /* # of available RDMA concentrator IQs */ - u16 iscsi_rxq[MAX_OFLD_QSETS]; - u16 iscsit_rxq[MAX_ISCSIT_QUEUES]; - u16 rdma_rxq[MAX_RDMA_QUEUES]; - u16 rdma_ciq[MAX_RDMA_CIQS]; + u16 ofldqsets; /* # of active ofld queue sets */ + u16 nqs_per_uld; /* # of Rx queues per ULD */ u16 timer_val[SGE_NTIMERS]; u8 counter_val[SGE_NCOUNTERS]; u32 fl_pg_order; /* large page allocation size */ @@ -729,10 +732,7 @@ struct sge { }; #define for_each_ethrxq(sge, i) for (i = 0; i < (sge)->ethqsets; i++) -#define for_each_iscsirxq(sge, i) for (i = 0; i < (sge)->iscsiqsets; i++) -#define for_each_iscsitrxq(sge, i) for (i = 0; i < (sge)->niscsitq; i++) -#define for_each_rdmarxq(sge, i) for (i = 0; i < (sge)->rdmaqs; i++) -#define for_each_rdmaciq(sge, i) for (i = 0; i < (sge)->rdmaciqs; i++) +#define for_each_ofldtxq(sge, i) for (i = 0; i < (sge)->ofldqsets; i++) struct l2t_data; @@ -757,6 +757,23 @@ struct hash_mac_addr { u8 addr[ETH_ALEN]; }; +struct uld_msix_bmap { + unsigned long *msix_bmap; + unsigned int mapsize; + spinlock_t lock; /* lock for acquiring bitmap */ +}; + +struct uld_msix_info { + unsigned short vec; + char desc[IFNAMSIZ + 10]; + unsigned int idx; +}; + +struct vf_info { + unsigned char vf_mac_addr[ETH_ALEN]; + bool pf_set_mac; +}; + struct adapter { void __iomem *regs; void __iomem *bar2; @@ -767,6 +784,7 @@ struct adapter { unsigned int mbox; unsigned int pf; unsigned int flags; + unsigned int adap_idx; enum chip_type chip; int msg_enable; @@ -779,6 +797,9 @@ struct adapter { unsigned short vec; char desc[IFNAMSIZ + 10]; } msix_info[MAX_INGQ + 1]; + struct uld_msix_info *msix_info_ulds; /* msix info for uld's */ + struct uld_msix_bmap msix_bmap_ulds; /* msix bitmap for all uld */ + int msi_idx; struct doorbell_stats db_stats; struct sge sge; @@ -786,6 +807,9 @@ struct adapter { struct net_device *port[MAX_NPORTS]; u8 chan_map[NCHAN]; /* channel -> port map */ + struct vf_info *vfinfo; + u8 num_vfs; + u32 filter_mode; unsigned int l2t_start; unsigned int l2t_end; @@ -793,7 +817,10 @@ struct adapter { unsigned int clipt_start; unsigned int clipt_end; struct clip_tbl *clipt; + struct cxgb4_uld_info *uld; void *uld_handle[CXGB4_ULD_MAX]; + unsigned int num_uld; + unsigned int num_ofld_uld; struct list_head list_node; struct list_head rcu_node; struct list_head mac_hlist; /* list of MAC addresses in MPS Hash */ @@ -813,6 +840,8 @@ struct adapter { #define T4_OS_LOG_MBOX_CMDS 256 struct mbox_cmd_log *mbox_log; + struct mutex uld_mutex; + struct dentry *debugfs_root; bool use_bd; /* Use SGE Back Door intfc for reading SGE Contexts */ bool trace_rss; /* 1 implies that different RSS flit per filter is @@ -822,6 +851,58 @@ struct adapter { spinlock_t stats_lock; spinlock_t win0_lock ____cacheline_aligned_in_smp; + + /* TC u32 offload */ + struct cxgb4_tc_u32_table *tc_u32; +}; + +/* Support for "sched-class" command to allow a TX Scheduling Class to be + * programmed with various parameters. + */ +struct ch_sched_params { + s8 type; /* packet or flow */ + union { + struct { + s8 level; /* scheduler hierarchy level */ + s8 mode; /* per-class or per-flow */ + s8 rateunit; /* bit or packet rate */ + s8 ratemode; /* %port relative or kbps absolute */ + s8 channel; /* scheduler channel [0..N] */ + s8 class; /* scheduler class [0..N] */ + s32 minrate; /* minimum rate */ + s32 maxrate; /* maximum rate */ + s16 weight; /* percent weight */ + s16 pktsize; /* average packet size */ + } params; + } u; +}; + +enum { + SCHED_CLASS_TYPE_PACKET = 0, /* class type */ +}; + +enum { + SCHED_CLASS_LEVEL_CL_RL = 0, /* class rate limiter */ +}; + +enum { + SCHED_CLASS_MODE_CLASS = 0, /* per-class scheduling */ +}; + +enum { + SCHED_CLASS_RATEUNIT_BITS = 0, /* bit rate scheduling */ +}; + +enum { + SCHED_CLASS_RATEMODE_ABS = 1, /* Kb/s */ +}; + +/* Support for "sched_queue" command to allow one or more NIC TX Queues + * to be bound to a TX Scheduling Class. + */ +struct ch_sched_queue { + s8 queue; /* queue index */ + s8 class; /* class index */ }; /* Defined bit width of user definable filter tuples @@ -947,11 +1028,47 @@ enum { VLAN_REWRITE }; +/* Host shadow copy of ingress filter entry. This is in host native format + * and doesn't match the ordering or bit order, etc. of the hardware of the + * firmware command. The use of bit-field structure elements is purely to + * remind ourselves of the field size limitations and save memory in the case + * where the filter table is large. + */ +struct filter_entry { + /* Administrative fields for filter. */ + u32 valid:1; /* filter allocated and valid */ + u32 locked:1; /* filter is administratively locked */ + + u32 pending:1; /* filter action is pending firmware reply */ + u32 smtidx:8; /* Source MAC Table index for smac */ + struct filter_ctx *ctx; /* Caller's completion hook */ + struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ + struct net_device *dev; /* Associated net device */ + u32 tid; /* This will store the actual tid */ + + /* The filter itself. Most of this is a straight copy of information + * provided by the extended ioctl(). Some fields are translated to + * internal forms -- for instance the Ingress Queue ID passed in from + * the ioctl() is translated into the Absolute Ingress Queue ID. + */ + struct ch_filter_specification fs; +}; + static inline int is_offload(const struct adapter *adap) { return adap->params.offload; } +static inline int is_pci_uld(const struct adapter *adap) +{ + return adap->params.crypto; +} + +static inline int is_uld(const struct adapter *adap) +{ + return (adap->params.offload || adap->params.crypto); +} + static inline u32 t4_read_reg(struct adapter *adap, u32 reg_addr) { return readl(adap->regs + reg_addr); @@ -1178,6 +1295,8 @@ int t4_sge_alloc_eth_txq(struct adapter *adap, struct sge_eth_txq *txq, int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, struct net_device *dev, unsigned int iqid, unsigned int cmplqid); +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid); int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid); irqreturn_t t4_sge_intr_msix(int irq, void *cookie); @@ -1185,8 +1304,6 @@ int t4_sge_init(struct adapter *adap); void t4_sge_start(struct adapter *adap); void t4_sge_stop(struct adapter *adap); int cxgb_busy_poll(struct napi_struct *napi); -int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us, - unsigned int cnt); void cxgb4_set_ethtool_ops(struct net_device *netdev); int cxgb4_write_rss(const struct port_info *pi, const u16 *queues); extern int dbfifo_int_thresh; @@ -1289,6 +1406,18 @@ static inline int hash_mac_addr(const u8 *addr) return a & 0x3f; } +int cxgb4_set_rspq_intr_params(struct sge_rspq *q, unsigned int us, + unsigned int cnt); +static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, + unsigned int us, unsigned int cnt, + unsigned int size, unsigned int iqe_size) +{ + q->adap = adap; + cxgb4_set_rspq_intr_params(q, us, cnt); + q->iqe_len = iqe_size; + q->size = size; +} + void t4_write_indirect(struct adapter *adap, unsigned int addr_reg, unsigned int data_reg, const u32 *vals, unsigned int nregs, unsigned int start_idx); @@ -1514,6 +1643,9 @@ void t4_get_trace_filter(struct adapter *adapter, struct trace_params *tp, int filter_index, int *enabled); int t4_fwaddrspace_write(struct adapter *adap, unsigned int mbox, u32 addr, u32 val); +int t4_sched_params(struct adapter *adapter, int type, int level, int mode, + int rateunit, int ratemode, int channel, int class, + int minrate, int maxrate, int weight, int pktsize); void t4_sge_decode_idma_state(struct adapter *adapter, int state); void t4_free_mem(void *addr); void t4_idma_monitor_init(struct adapter *adapter, @@ -1521,4 +1653,11 @@ void t4_idma_monitor_init(struct adapter *adapter, void t4_idma_monitor(struct adapter *adapter, struct sge_idma_monitor_state *idma, int hz, int ticks); +int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + unsigned int naddr, u8 *addr); +void t4_uld_mem_free(struct adapter *adap); +int t4_uld_mem_alloc(struct adapter *adap); +void t4_uld_clean_up(struct adapter *adap); +void t4_register_netevent_notifier(void); +void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, struct sge_fl *fl); #endif /* __CXGB4_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c index 91fb50850fff..20455d082cb8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c @@ -2432,17 +2432,11 @@ static int sge_qinfo_show(struct seq_file *seq, void *v) { struct adapter *adap = seq->private; int eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4); - int iscsi_entries = DIV_ROUND_UP(adap->sge.iscsiqsets, 4); - int iscsit_entries = DIV_ROUND_UP(adap->sge.niscsitq, 4); - int rdma_entries = DIV_ROUND_UP(adap->sge.rdmaqs, 4); - int ciq_entries = DIV_ROUND_UP(adap->sge.rdmaciqs, 4); + int ofld_entries = DIV_ROUND_UP(adap->sge.ofldqsets, 4); int ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4); int i, r = (uintptr_t)v - 1; - int iscsi_idx = r - eth_entries; - int iscsit_idx = iscsi_idx - iscsi_entries; - int rdma_idx = iscsit_idx - iscsit_entries; - int ciq_idx = rdma_idx - rdma_entries; - int ctrl_idx = ciq_idx - ciq_entries; + int ofld_idx = r - eth_entries; + int ctrl_idx = ofld_idx - ofld_entries; int fq_idx = ctrl_idx - ctrl_entries; if (r) @@ -2518,119 +2512,17 @@ do { \ RL("FLLow:", fl.low); RL("FLStarving:", fl.starving); - } else if (iscsi_idx < iscsi_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsirxq[iscsi_idx * 4]; + } else if (ofld_idx < ofld_entries) { const struct sge_ofld_txq *tx = - &adap->sge.ofldtxq[iscsi_idx * 4]; - int n = min(4, adap->sge.iscsiqsets - 4 * iscsi_idx); + &adap->sge.ofldtxq[ofld_idx * 4]; + int n = min(4, adap->sge.ofldqsets - 4 * ofld_idx); - S("QType:", "iSCSI"); + S("QType:", "OFLD-Txq"); T("TxQ ID:", q.cntxt_id); T("TxQ size:", q.size); T("TxQ inuse:", q.in_use); T("TxQ CIDX:", q.cidx); T("TxQ PIDX:", q.pidx); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (iscsit_idx < iscsit_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.iscsitrxq[iscsit_idx * 4]; - int n = min(4, adap->sge.niscsitq - 4 * iscsit_idx); - - S("QType:", "iSCSIT"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (rdma_idx < rdma_entries) { - const struct sge_ofld_rxq *rx = - &adap->sge.rdmarxq[rdma_idx * 4]; - int n = min(4, adap->sge.rdmaqs - 4 * rdma_idx); - - S("QType:", "RDMA-CPL"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - R("FL ID:", fl.cntxt_id); - R("FL size:", fl.size - 8); - R("FL pend:", fl.pend_cred); - R("FL avail:", fl.avail); - R("FL PIDX:", fl.pidx); - R("FL CIDX:", fl.cidx); - RL("RxPackets:", stats.pkts); - RL("RxImmPkts:", stats.imm); - RL("RxNoMem:", stats.nomem); - RL("FLAllocErr:", fl.alloc_failed); - RL("FLLrgAlcErr:", fl.large_alloc_failed); - RL("FLMapErr:", fl.mapping_err); - RL("FLLow:", fl.low); - RL("FLStarving:", fl.starving); - - } else if (ciq_idx < ciq_entries) { - const struct sge_ofld_rxq *rx = &adap->sge.rdmaciq[ciq_idx * 4]; - int n = min(4, adap->sge.rdmaciqs - 4 * ciq_idx); - - S("QType:", "RDMA-CIQ"); - S("Interface:", - rx[i].rspq.netdev ? rx[i].rspq.netdev->name : "N/A"); - R("RspQ ID:", rspq.abs_id); - R("RspQ size:", rspq.size); - R("RspQE size:", rspq.iqe_len); - R("RspQ CIDX:", rspq.cidx); - R("RspQ Gen:", rspq.gen); - S3("u", "Intr delay:", qtimer_val(adap, &rx[i].rspq)); - S3("u", "Intr pktcnt:", - adap->sge.counter_val[rx[i].rspq.pktcnt_idx]); - RL("RxAN:", stats.an); - RL("RxNoMem:", stats.nomem); } else if (ctrl_idx < ctrl_entries) { const struct sge_ctrl_txq *tx = &adap->sge.ctrlq[ctrl_idx * 4]; @@ -2672,10 +2564,7 @@ do { \ static int sge_queue_entries(const struct adapter *adap) { return DIV_ROUND_UP(adap->sge.ethqsets, 4) + - DIV_ROUND_UP(adap->sge.iscsiqsets, 4) + - DIV_ROUND_UP(adap->sge.niscsitq, 4) + - DIV_ROUND_UP(adap->sge.rdmaqs, 4) + - DIV_ROUND_UP(adap->sge.rdmaciqs, 4) + + DIV_ROUND_UP(adap->sge.ofldqsets, 4) + DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1; } @@ -2859,12 +2748,6 @@ static void add_debugfs_mem(struct adapter *adap, const char *name, size_mb << 20); } -static int blocked_fl_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t blocked_fl_read(struct file *filp, char __user *ubuf, size_t count, loff_t *ppos) { @@ -2908,7 +2791,7 @@ static ssize_t blocked_fl_write(struct file *filp, const char __user *ubuf, static const struct file_operations blocked_fl_fops = { .owner = THIS_MODULE, - .open = blocked_fl_open, + .open = simple_open, .read = blocked_fl_read, .write = blocked_fl_write, .llseek = generic_file_llseek, diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c new file mode 100644 index 000000000000..10736738ff30 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -0,0 +1,721 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "cxgb4.h" +#include "t4_regs.h" +#include "l2t.h" +#include "t4fw_api.h" +#include "cxgb4_filter.h" + +static inline bool is_field_set(u32 val, u32 mask) +{ + return val || mask; +} + +static inline bool unsupported(u32 conf, u32 conf_mask, u32 val, u32 mask) +{ + return !(conf & conf_mask) && is_field_set(val, mask); +} + +/* Validate filter spec against configuration done on the card. */ +static int validate_filter(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + u32 fconf, iconf; + + /* Check for unconfigured fields being used. */ + fconf = adapter->params.tp.vlan_pri_map; + iconf = adapter->params.tp.ingress_config; + + if (unsupported(fconf, FCOE_F, fs->val.fcoe, fs->mask.fcoe) || + unsupported(fconf, PORT_F, fs->val.iport, fs->mask.iport) || + unsupported(fconf, TOS_F, fs->val.tos, fs->mask.tos) || + unsupported(fconf, ETHERTYPE_F, fs->val.ethtype, + fs->mask.ethtype) || + unsupported(fconf, MACMATCH_F, fs->val.macidx, fs->mask.macidx) || + unsupported(fconf, MPSHITTYPE_F, fs->val.matchtype, + fs->mask.matchtype) || + unsupported(fconf, FRAGMENTATION_F, fs->val.frag, fs->mask.frag) || + unsupported(fconf, PROTOCOL_F, fs->val.proto, fs->mask.proto) || + unsupported(fconf, VNIC_ID_F, fs->val.pfvf_vld, + fs->mask.pfvf_vld) || + unsupported(fconf, VNIC_ID_F, fs->val.ovlan_vld, + fs->mask.ovlan_vld) || + unsupported(fconf, VLAN_F, fs->val.ivlan_vld, fs->mask.ivlan_vld)) + return -EOPNOTSUPP; + + /* T4 inconveniently uses the same FT_VNIC_ID_W bits for both the Outer + * VLAN Tag and PF/VF/VFvld fields based on VNIC_F being set + * in TP_INGRESS_CONFIG. Hense the somewhat crazy checks + * below. Additionally, since the T4 firmware interface also + * carries that overlap, we need to translate any PF/VF + * specification into that internal format below. + */ + if (is_field_set(fs->val.pfvf_vld, fs->mask.pfvf_vld) && + is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld)) + return -EOPNOTSUPP; + if (unsupported(iconf, VNIC_F, fs->val.pfvf_vld, fs->mask.pfvf_vld) || + (is_field_set(fs->val.ovlan_vld, fs->mask.ovlan_vld) && + (iconf & VNIC_F))) + return -EOPNOTSUPP; + if (fs->val.pf > 0x7 || fs->val.vf > 0x7f) + return -ERANGE; + fs->mask.pf &= 0x7; + fs->mask.vf &= 0x7f; + + /* If the user is requesting that the filter action loop + * matching packets back out one of our ports, make sure that + * the egress port is in range. + */ + if (fs->action == FILTER_SWITCH && + fs->eport >= adapter->params.nports) + return -ERANGE; + + /* Don't allow various trivially obvious bogus out-of-range values... */ + if (fs->val.iport >= adapter->params.nports) + return -ERANGE; + + /* T4 doesn't support removing VLAN Tags for loop back filters. */ + if (is_t4(adapter->params.chip) && + fs->action == FILTER_SWITCH && + (fs->newvlan == VLAN_REMOVE || + fs->newvlan == VLAN_REWRITE)) + return -EOPNOTSUPP; + + return 0; +} + +static int get_filter_steerq(struct net_device *dev, + struct ch_filter_specification *fs) +{ + struct adapter *adapter = netdev2adap(dev); + int iq; + + /* If the user has requested steering matching Ingress Packets + * to a specific Queue Set, we need to make sure it's in range + * for the port and map that into the Absolute Queue ID of the + * Queue Set's Response Queue. + */ + if (!fs->dirsteer) { + if (fs->iq) + return -EINVAL; + iq = 0; + } else { + struct port_info *pi = netdev_priv(dev); + + /* If the iq id is greater than the number of qsets, + * then assume it is an absolute qid. + */ + if (fs->iq < pi->nqsets) + iq = adapter->sge.ethrxq[pi->first_qset + + fs->iq].rspq.abs_id; + else + iq = fs->iq; + } + + return iq; +} + +static int cxgb4_set_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + + if (test_bit(fidx, t->ftid_bmap)) { + spin_unlock_bh(&t->ftid_lock); + return -EBUSY; + } + + if (family == PF_INET) + __set_bit(fidx, t->ftid_bmap); + else + bitmap_allocate_region(t->ftid_bmap, fidx, 2); + + spin_unlock_bh(&t->ftid_lock); + return 0; +} + +static void cxgb4_clear_ftid(struct tid_info *t, int fidx, int family) +{ + spin_lock_bh(&t->ftid_lock); + if (family == PF_INET) + __clear_bit(fidx, t->ftid_bmap); + else + bitmap_release_region(t->ftid_bmap, fidx, 2); + spin_unlock_bh(&t->ftid_lock); +} + +/* Delete the filter at a specified index. */ +static int del_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + unsigned int len; + + len = sizeof(*fwr); + + skb = alloc_skb(len, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + fwr = (struct fw_filter_wr *)__skb_put(skb, len); + t4_mk_filtdelwr(f->tid, fwr, adapter->sge.fw_evtq.abs_id); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + t4_mgmt_tx(adapter, skb); + return 0; +} + +/* Send a Work Request to write the filter at a specified index. We construct + * a Firmware Filter Work Request to have the work done and put the indicated + * filter into "pending" mode which will prevent any further actions against + * it till we get a reply from the firmware on the completion status of the + * request. + */ +int set_filter_wr(struct adapter *adapter, int fidx) +{ + struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; + struct fw_filter_wr *fwr; + struct sk_buff *skb; + + skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); + if (!skb) + return -ENOMEM; + + /* If the new filter requires loopback Destination MAC and/or VLAN + * rewriting then we need to allocate a Layer 2 Table (L2T) entry for + * the filter. + */ + if (f->fs.newdmac || f->fs.newvlan) { + /* allocate L2T entry for new filter */ + f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, + f->fs.eport, f->fs.dmac); + if (!f->l2t) { + kfree_skb(skb); + return -ENOMEM; + } + } + + fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); + memset(fwr, 0, sizeof(*fwr)); + + /* It would be nice to put most of the following in t4_hw.c but most + * of the work is translating the cxgbtool ch_filter_specification + * into the Work Request and the definition of that structure is + * currently in cxgbtool.h which isn't appropriate to pull into the + * common code. We may eventually try to come up with a more neutral + * filter specification structure but for now it's easiest to simply + * put this fairly direct code in line ... + */ + fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); + fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr) / 16)); + fwr->tid_to_iq = + htonl(FW_FILTER_WR_TID_V(f->tid) | + FW_FILTER_WR_RQTYPE_V(f->fs.type) | + FW_FILTER_WR_NOREPLY_V(0) | + FW_FILTER_WR_IQ_V(f->fs.iq)); + fwr->del_filter_to_l2tix = + htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | + FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | + FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | + FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | + FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | + FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | + FW_FILTER_WR_DMAC_V(f->fs.newdmac) | + FW_FILTER_WR_SMAC_V(f->fs.newsmac) | + FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || + f->fs.newvlan == VLAN_REWRITE) | + FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | + FW_FILTER_WR_TXCHAN_V(f->fs.eport) | + FW_FILTER_WR_PRIO_V(f->fs.prio) | + FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); + fwr->ethtype = htons(f->fs.val.ethtype); + fwr->ethtypem = htons(f->fs.mask.ethtype); + fwr->frag_to_ovlan_vldm = + (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | + FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | + FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | + FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | + FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); + fwr->smac_sel = 0; + fwr->rx_chan_rx_rpl_iq = + htons(FW_FILTER_WR_RX_CHAN_V(0) | + FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); + fwr->maci_to_matchtypem = + htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | + FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | + FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | + FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | + FW_FILTER_WR_PORT_V(f->fs.val.iport) | + FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | + FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | + FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); + fwr->ptcl = f->fs.val.proto; + fwr->ptclm = f->fs.mask.proto; + fwr->ttyp = f->fs.val.tos; + fwr->ttypm = f->fs.mask.tos; + fwr->ivlan = htons(f->fs.val.ivlan); + fwr->ivlanm = htons(f->fs.mask.ivlan); + fwr->ovlan = htons(f->fs.val.ovlan); + fwr->ovlanm = htons(f->fs.mask.ovlan); + memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); + memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); + memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); + memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); + fwr->lp = htons(f->fs.val.lport); + fwr->lpm = htons(f->fs.mask.lport); + fwr->fp = htons(f->fs.val.fport); + fwr->fpm = htons(f->fs.mask.fport); + if (f->fs.newsmac) + memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); + + /* Mark the filter as "pending" and ship off the Filter Work Request. + * When we get the Work Request Reply we'll clear the pending status. + */ + f->pending = 1; + set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); + t4_ofld_send(adapter, skb); + return 0; +} + +/* Return an error number if the indicated filter isn't writable ... */ +int writable_filter(struct filter_entry *f) +{ + if (f->locked) + return -EPERM; + if (f->pending) + return -EBUSY; + + return 0; +} + +/* Delete the filter at the specified index (if valid). The checks for all + * the common problems with doing this like the filter being locked, currently + * pending in another operation, etc. + */ +int delete_filter(struct adapter *adapter, unsigned int fidx) +{ + struct filter_entry *f; + int ret; + + if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) + return -EINVAL; + + f = &adapter->tids.ftid_tab[fidx]; + ret = writable_filter(f); + if (ret) + return ret; + if (f->valid) + return del_filter_wr(adapter, fidx); + + return 0; +} + +/* Clear a filter and release any of its resources that we own. This also + * clears the filter's "pending" status. + */ +void clear_filter(struct adapter *adap, struct filter_entry *f) +{ + /* If the new or old filter have loopback rewriteing rules then we'll + * need to free any existing Layer Two Table (L2T) entries of the old + * filter rule. The firmware will handle freeing up any Source MAC + * Table (SMT) entries used for rewriting Source MAC Addresses in + * loopback rules. + */ + if (f->l2t) + cxgb4_l2t_release(f->l2t); + + /* The zeroing of the filter rule below clears the filter valid, + * pending, locked flags, l2t pointer, etc. so it's all we need for + * this operation. + */ + memset(f, 0, sizeof(*f)); +} + +void clear_all_filters(struct adapter *adapter) +{ + unsigned int i; + + if (adapter->tids.ftid_tab) { + struct filter_entry *f = &adapter->tids.ftid_tab[0]; + unsigned int max_ftid = adapter->tids.nftids + + adapter->tids.nsftids; + + for (i = 0; i < max_ftid; i++, f++) + if (f->valid || f->pending) + clear_filter(adapter, f); + } +} + +/* Fill up default masks for set match fields. */ +static void fill_default_mask(struct ch_filter_specification *fs) +{ + unsigned int lip = 0, lip_mask = 0; + unsigned int fip = 0, fip_mask = 0; + unsigned int i; + + if (fs->val.iport && !fs->mask.iport) + fs->mask.iport |= ~0; + if (fs->val.fcoe && !fs->mask.fcoe) + fs->mask.fcoe |= ~0; + if (fs->val.matchtype && !fs->mask.matchtype) + fs->mask.matchtype |= ~0; + if (fs->val.macidx && !fs->mask.macidx) + fs->mask.macidx |= ~0; + if (fs->val.ethtype && !fs->mask.ethtype) + fs->mask.ethtype |= ~0; + if (fs->val.ivlan && !fs->mask.ivlan) + fs->mask.ivlan |= ~0; + if (fs->val.ovlan && !fs->mask.ovlan) + fs->mask.ovlan |= ~0; + if (fs->val.frag && !fs->mask.frag) + fs->mask.frag |= ~0; + if (fs->val.tos && !fs->mask.tos) + fs->mask.tos |= ~0; + if (fs->val.proto && !fs->mask.proto) + fs->mask.proto |= ~0; + + for (i = 0; i < ARRAY_SIZE(fs->val.lip); i++) { + lip |= fs->val.lip[i]; + lip_mask |= fs->mask.lip[i]; + fip |= fs->val.fip[i]; + fip_mask |= fs->mask.fip[i]; + } + + if (lip && !lip_mask) + memset(fs->mask.lip, ~0, sizeof(fs->mask.lip)); + + if (fip && !fip_mask) + memset(fs->mask.fip, ~0, sizeof(fs->mask.lip)); + + if (fs->val.lport && !fs->mask.lport) + fs->mask.lport = ~0; + if (fs->val.fport && !fs->mask.fport) + fs->mask.fport = ~0; +} + +/* Check a Chelsio Filter Request for validity, convert it into our internal + * format and send it to the hardware. Return 0 on success, an error number + * otherwise. We attach any provided filter operation context to the internal + * filter specification in order to facilitate signaling completion of the + * operation. + */ +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int max_fidx, fidx; + struct filter_entry *f; + u32 iconf; + int iq, ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + fill_default_mask(fs); + + ret = validate_filter(dev, fs); + if (ret) + return ret; + + iq = get_filter_steerq(dev, fs); + if (iq < 0) + return iq; + + /* IPv6 filters occupy four slots and must be aligned on + * four-slot boundaries. IPv4 filters only occupy a single + * slot and have no alignment requirements but writing a new + * IPv4 filter into the middle of an existing IPv6 filter + * requires clearing the old IPv6 filter and hence we prevent + * insertion. + */ + if (fs->type == 0) { /* IPv4 */ + /* If our IPv4 filter isn't being written to a + * multiple of four filter index and there's an IPv6 + * filter at the multiple of 4 base slot, then we + * prevent insertion. + */ + fidx = filter_id & ~0x3; + if (fidx != filter_id && + adapter->tids.ftid_tab[fidx].fs.type) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and is occupying slots %u to %u\n", + fidx, fidx + 3); + return -EINVAL; + } + } + } else { /* IPv6 */ + /* Ensure that the IPv6 filter is aligned on a + * multiple of 4 boundary. + */ + if (filter_id & 0x3) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 must be aligned on a 4-slot boundary\n"); + return -EINVAL; + } + + /* Check all except the base overlapping IPv4 filter slots. */ + for (fidx = filter_id + 1; fidx < filter_id + 4; fidx++) { + f = &adapter->tids.ftid_tab[fidx]; + if (f->valid) { + dev_err(adapter->pdev_dev, + "Invalid location. IPv6 requires 4 slots and an IPv4 filter exists at %u\n", + fidx); + return -EINVAL; + } + } + } + + /* Check to make sure that provided filter index is not + * already in use by someone else + */ + f = &adapter->tids.ftid_tab[filter_id]; + if (f->valid) + return -EBUSY; + + fidx = filter_id + adapter->tids.ftid_base; + ret = cxgb4_set_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + if (ret) + return ret; + + /* Check to make sure the filter requested is writable ... */ + ret = writable_filter(f); + if (ret) { + /* Clear the bits we have set above */ + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + return ret; + } + + /* Clear out any old resources being used by the filter before + * we start constructing the new filter. + */ + if (f->valid) + clear_filter(adapter, f); + + /* Convert the filter specification into our internal format. + * We copy the PF/VF specification into the Outer VLAN field + * here so the rest of the code -- including the interface to + * the firmware -- doesn't have to constantly do these checks. + */ + f->fs = *fs; + f->fs.iq = iq; + f->dev = dev; + + iconf = adapter->params.tp.ingress_config; + if (iconf & VNIC_F) { + f->fs.val.ovlan = (fs->val.pf << 13) | fs->val.vf; + f->fs.mask.ovlan = (fs->mask.pf << 13) | fs->mask.vf; + f->fs.val.ovlan_vld = fs->val.pfvf_vld; + f->fs.mask.ovlan_vld = fs->mask.pfvf_vld; + } + + /* Attempt to set the filter. If we don't succeed, we clear + * it and return the failure. + */ + f->ctx = ctx; + f->tid = fidx; /* Save the actual tid */ + ret = set_filter_wr(adapter, filter_id); + if (ret) { + cxgb4_clear_ftid(&adapter->tids, filter_id, + fs->type ? PF_INET6 : PF_INET); + clear_filter(adapter, f); + } + + return ret; +} + +/* Check a delete filter request for validity and send it to the hardware. + * Return 0 on success, an error number otherwise. We attach any provided + * filter operation context to the internal filter specification in order to + * facilitate signaling completion of the operation. + */ +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx) +{ + struct adapter *adapter = netdev2adap(dev); + struct filter_entry *f; + unsigned int max_fidx; + int ret; + + max_fidx = adapter->tids.nftids; + if (filter_id != (max_fidx + adapter->tids.nsftids - 1) && + filter_id >= max_fidx) + return -E2BIG; + + f = &adapter->tids.ftid_tab[filter_id]; + ret = writable_filter(f); + if (ret) + return ret; + + if (f->valid) { + f->ctx = ctx; + cxgb4_clear_ftid(&adapter->tids, filter_id, + f->fs.type ? PF_INET6 : PF_INET); + return del_filter_wr(adapter, filter_id); + } + + /* If the caller has passed in a Completion Context then we need to + * mark it as a successful completion so they don't stall waiting + * for it. + */ + if (ctx) { + ctx->result = 0; + complete(&ctx->completion); + } + return ret; +} + +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_set_filter(dev, filter_id, fs, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +int cxgb4_del_filter(struct net_device *dev, int filter_id) +{ + struct filter_ctx ctx; + int ret; + + init_completion(&ctx.completion); + + ret = __cxgb4_del_filter(dev, filter_id, &ctx); + if (ret) + goto out; + + /* Wait for reply */ + ret = wait_for_completion_timeout(&ctx.completion, 10 * HZ); + if (!ret) + return -ETIMEDOUT; + + ret = ctx.result; +out: + return ret; +} + +/* Handle a filter write/deletion reply. */ +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) +{ + unsigned int tid = GET_TID(rpl); + struct filter_entry *f = NULL; + unsigned int max_fidx; + int idx; + + max_fidx = adap->tids.nftids + adap->tids.nsftids; + /* Get the corresponding filter entry for this tid */ + if (adap->tids.ftid_tab) { + /* Check this in normal filter region */ + idx = tid - adap->tids.ftid_base; + if (idx >= max_fidx) + return; + f = &adap->tids.ftid_tab[idx]; + if (f->tid != tid) + return; + } + + /* We found the filter entry for this tid */ + if (f) { + unsigned int ret = TCB_COOKIE_G(rpl->cookie); + struct filter_ctx *ctx; + + /* Pull off any filter operation context attached to the + * filter. + */ + ctx = f->ctx; + f->ctx = NULL; + + if (ret == FW_FILTER_WR_FLT_DELETED) { + /* Clear the filter when we get confirmation from the + * hardware that the filter has been deleted. + */ + clear_filter(adap, f); + if (ctx) + ctx->result = 0; + } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { + dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", + idx); + clear_filter(adap, f); + if (ctx) + ctx->result = -ENOMEM; + } else if (ret == FW_FILTER_WR_FLT_ADDED) { + f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; + f->pending = 0; /* asynchronous setup completed */ + f->valid = 1; + if (ctx) { + ctx->result = 0; + ctx->tid = idx; + } + } else { + /* Something went wrong. Issue a warning about the + * problem and clear everything out. + */ + dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", + idx, ret); + clear_filter(adap, f); + if (ctx) + ctx->result = -EINVAL; + } + if (ctx) + complete(&ctx->completion); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h new file mode 100644 index 000000000000..23742cb1c69f --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.h @@ -0,0 +1,48 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_FILTER_H +#define __CXGB4_FILTER_H + +#include "t4_msg.h" + +void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl); +void clear_filter(struct adapter *adap, struct filter_entry *f); + +int set_filter_wr(struct adapter *adapter, int fidx); +int delete_filter(struct adapter *adapter, unsigned int fidx); + +int writable_filter(struct filter_entry *f); +void clear_all_filters(struct adapter *adapter); +#endif /* __CXGB4_FILTER_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c index 3ceafb55d6da..cf147ca419a8 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -67,6 +67,7 @@ #include #include "cxgb4.h" +#include "cxgb4_filter.h" #include "t4_regs.h" #include "t4_values.h" #include "t4_msg.h" @@ -76,6 +77,8 @@ #include "cxgb4_debugfs.h" #include "clip_tbl.h" #include "l2t.h" +#include "sched.h" +#include "cxgb4_tc_u32.h" char cxgb4_driver_name[] = KBUILD_MODNAME; @@ -86,30 +89,6 @@ char cxgb4_driver_name[] = KBUILD_MODNAME; const char cxgb4_driver_version[] = DRV_VERSION; #define DRV_DESC "Chelsio T4/T5/T6 Network Driver" -/* Host shadow copy of ingress filter entry. This is in host native format - * and doesn't match the ordering or bit order, etc. of the hardware of the - * firmware command. The use of bit-field structure elements is purely to - * remind ourselves of the field size limitations and save memory in the case - * where the filter table is large. - */ -struct filter_entry { - /* Administrative fields for filter. - */ - u32 valid:1; /* filter allocated and valid */ - u32 locked:1; /* filter is administratively locked */ - - u32 pending:1; /* filter action is pending firmware reply */ - u32 smtidx:8; /* Source MAC Table index for smac */ - struct l2t_entry *l2t; /* Layer Two Table entry for dmac */ - - /* The filter itself. Most of this is a straight copy of information - * provided by the extended ioctl(). Some fields are translated to - * internal forms -- for instance the Ingress Queue ID passed in from - * the ioctl() is translated into the Absolute Ingress Queue ID. - */ - struct ch_filter_specification fs; -}; - #define DFLT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP |\ NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -223,13 +202,8 @@ MODULE_PARM_DESC(select_queue, static struct dentry *cxgb4_debugfs_root; -static LIST_HEAD(adapter_list); -static DEFINE_MUTEX(uld_mutex); -/* Adapter list to be accessed from atomic context */ -static LIST_HEAD(adap_rcu_list); -static DEFINE_SPINLOCK(adap_rcu_lock); -static struct cxgb4_uld_info ulds[CXGB4_ULD_MAX]; -static const char *const uld_str[] = { "RDMA", "iSCSI", "iSCSIT" }; +LIST_HEAD(adapter_list); +DEFINE_MUTEX(uld_mutex); static void link_report(struct net_device *dev) { @@ -303,11 +277,9 @@ static void dcb_tx_queue_prio_enable(struct net_device *dev, int enable) txq->dcb_prio = value; } } -#endif /* CONFIG_CHELSIO_T4_DCB */ -int cxgb4_dcb_enabled(const struct net_device *dev) +static int cxgb4_dcb_enabled(const struct net_device *dev) { -#ifdef CONFIG_CHELSIO_T4_DCB struct port_info *pi = netdev_priv(dev); if (!pi->dcb.enabled) @@ -315,11 +287,8 @@ int cxgb4_dcb_enabled(const struct net_device *dev) return ((pi->dcb.state == CXGB4_DCB_STATE_FW_ALLSYNCED) || (pi->dcb.state == CXGB4_DCB_STATE_HOST)); -#else - return 0; -#endif } -EXPORT_SYMBOL(cxgb4_dcb_enabled); +#endif /* CONFIG_CHELSIO_T4_DCB */ void t4_os_link_changed(struct adapter *adapter, int port_id, int link_stat) { @@ -531,66 +500,6 @@ static void dcb_rpl(struct adapter *adap, const struct fw_port_cmd *pcmd) } #endif /* CONFIG_CHELSIO_T4_DCB */ -/* Clear a filter and release any of its resources that we own. This also - * clears the filter's "pending" status. - */ -static void clear_filter(struct adapter *adap, struct filter_entry *f) -{ - /* If the new or old filter have loopback rewriteing rules then we'll - * need to free any existing Layer Two Table (L2T) entries of the old - * filter rule. The firmware will handle freeing up any Source MAC - * Table (SMT) entries used for rewriting Source MAC Addresses in - * loopback rules. - */ - if (f->l2t) - cxgb4_l2t_release(f->l2t); - - /* The zeroing of the filter rule below clears the filter valid, - * pending, locked flags, l2t pointer, etc. so it's all we need for - * this operation. - */ - memset(f, 0, sizeof(*f)); -} - -/* Handle a filter write/deletion reply. - */ -static void filter_rpl(struct adapter *adap, const struct cpl_set_tcb_rpl *rpl) -{ - unsigned int idx = GET_TID(rpl); - unsigned int nidx = idx - adap->tids.ftid_base; - unsigned int ret; - struct filter_entry *f; - - if (idx >= adap->tids.ftid_base && nidx < - (adap->tids.nftids + adap->tids.nsftids)) { - idx = nidx; - ret = TCB_COOKIE_G(rpl->cookie); - f = &adap->tids.ftid_tab[idx]; - - if (ret == FW_FILTER_WR_FLT_DELETED) { - /* Clear the filter when we get confirmation from the - * hardware that the filter has been deleted. - */ - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_SMT_TBL_FULL) { - dev_err(adap->pdev_dev, "filter %u setup failed due to full SMT\n", - idx); - clear_filter(adap, f); - } else if (ret == FW_FILTER_WR_FLT_ADDED) { - f->smtidx = (be64_to_cpu(rpl->oldval) >> 24) & 0xff; - f->pending = 0; /* asynchronous setup completed */ - f->valid = 1; - } else { - /* Something went wrong. Issue a warning about the - * problem and clear everything out. - */ - dev_err(adap->pdev_dev, "filter %u setup failed with error %u\n", - idx, ret); - clear_filter(adap, f); - } - } -} - /* Response queue handler for the FW event queue. */ static int fwevtq_handler(struct sge_rspq *q, const __be64 *rsp, @@ -677,56 +586,6 @@ out: return 0; } -/* Flush the aggregated lro sessions */ -static void uldrx_flush_handler(struct sge_rspq *q) -{ - if (ulds[q->uld].lro_flush) - ulds[q->uld].lro_flush(&q->lro_mgr); -} - -/** - * uldrx_handler - response queue handler for ULD queues - * @q: the response queue that received the packet - * @rsp: the response queue descriptor holding the offload message - * @gl: the gather list of packet fragments - * - * Deliver an ingress offload packet to a ULD. All processing is done by - * the ULD, we just maintain statistics. - */ -static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, - const struct pkt_gl *gl) -{ - struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); - int ret; - - /* FW can send CPLs encapsulated in a CPL_FW4_MSG. - */ - if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && - ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) - rsp += 2; - - if (q->flush_handler) - ret = ulds[q->uld].lro_rx_handler(q->adap->uld_handle[q->uld], - rsp, gl, &q->lro_mgr, - &q->napi); - else - ret = ulds[q->uld].rx_handler(q->adap->uld_handle[q->uld], - rsp, gl); - - if (ret) { - rxq->stats.nomem++; - return -1; - } - - if (gl == NULL) - rxq->stats.imm++; - else if (gl == CXGB4_MSG_AN) - rxq->stats.an++; - else - rxq->stats.pkts++; - return 0; -} - static void disable_msi(struct adapter *adapter) { if (adapter->flags & USING_MSIX) { @@ -778,30 +637,12 @@ static void name_msix_vecs(struct adapter *adap) snprintf(adap->msix_info[msi_idx].desc, n, "%s-Rx%d", d->name, i); } - - /* offload queues */ - for_each_iscsirxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iscsi%d", - adap->port[0]->name, i); - - for_each_iscsitrxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-iSCSIT%d", - adap->port[0]->name, i); - - for_each_rdmarxq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma%d", - adap->port[0]->name, i); - - for_each_rdmaciq(&adap->sge, i) - snprintf(adap->msix_info[msi_idx++].desc, n, "%s-rdma-ciq%d", - adap->port[0]->name, i); } static int request_msix_queue_irqs(struct adapter *adap) { struct sge *s = &adap->sge; - int err, ethqidx, iscsiqidx = 0, rdmaqidx = 0, rdmaciqqidx = 0; - int iscsitqidx = 0; + int err, ethqidx; int msi_index = 2; err = request_irq(adap->msix_info[1].vec, t4_sge_intr_msix, 0, @@ -818,57 +659,9 @@ static int request_msix_queue_irqs(struct adapter *adap) goto unwind; msi_index++; } - for_each_iscsirxq(s, iscsiqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsirxq[iscsiqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_iscsitrxq(s, iscsitqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->iscsitrxq[iscsitqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmarxq(s, rdmaqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmarxq[rdmaqidx].rspq); - if (err) - goto unwind; - msi_index++; - } - for_each_rdmaciq(s, rdmaciqqidx) { - err = request_irq(adap->msix_info[msi_index].vec, - t4_sge_intr_msix, 0, - adap->msix_info[msi_index].desc, - &s->rdmaciq[rdmaciqqidx].rspq); - if (err) - goto unwind; - msi_index++; - } return 0; unwind: - while (--rdmaciqqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmaciq[rdmaciqqidx].rspq); - while (--rdmaqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->rdmarxq[rdmaqidx].rspq); - while (--iscsitqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsitrxq[iscsitqidx].rspq); - while (--iscsiqidx >= 0) - free_irq(adap->msix_info[--msi_index].vec, - &s->iscsirxq[iscsiqidx].rspq); while (--ethqidx >= 0) free_irq(adap->msix_info[--msi_index].vec, &s->ethrxq[ethqidx].rspq); @@ -884,16 +677,6 @@ static void free_msix_queue_irqs(struct adapter *adap) free_irq(adap->msix_info[1].vec, &s->fw_evtq); for_each_ethrxq(s, i) free_irq(adap->msix_info[msi_index++].vec, &s->ethrxq[i].rspq); - for_each_iscsirxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsirxq[i].rspq); - for_each_iscsitrxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, - &s->iscsitrxq[i].rspq); - for_each_rdmarxq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmarxq[i].rspq); - for_each_rdmaciq(s, i) - free_irq(adap->msix_info[msi_index++].vec, &s->rdmaciq[i].rspq); } /** @@ -1032,28 +815,30 @@ static void enable_rx(struct adapter *adap) } } -static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, - unsigned int nq, unsigned int per_chan, int msi_idx, - u16 *ids, bool lro) -{ - int i, err; - for (i = 0; i < nq; i++, q++) { - if (msi_idx > 0) - msi_idx++; - err = t4_sge_alloc_rxq(adap, &q->rspq, false, - adap->port[i / per_chan], - msi_idx, q->fl.size ? &q->fl : NULL, - uldrx_handler, - lro ? uldrx_flush_handler : NULL, - 0); +static int setup_fw_sge_queues(struct adapter *adap) +{ + struct sge *s = &adap->sge; + int err = 0; + + bitmap_zero(s->starving_fl, s->egr_sz); + bitmap_zero(s->txq_maperr, s->egr_sz); + + if (adap->flags & USING_MSIX) + adap->msi_idx = 1; /* vector 0 is for non-queue interrupts */ + else { + err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, + NULL, NULL, NULL, -1); if (err) return err; - memset(&q->stats, 0, sizeof(q->stats)); - if (ids) - ids[i] = q->rspq.abs_id; + adap->msi_idx = -((int)s->intrq.abs_id + 1); } - return 0; + + err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], + adap->msi_idx, NULL, fwevtq_handler, NULL, -1); + if (err) + t4_free_sge_resources(adap); + return err; } /** @@ -1066,41 +851,10 @@ static int alloc_ofld_rxqs(struct adapter *adap, struct sge_ofld_rxq *q, */ static int setup_sge_queues(struct adapter *adap) { - int err, msi_idx, i, j; + int err, i, j; struct sge *s = &adap->sge; - - bitmap_zero(s->starving_fl, s->egr_sz); - bitmap_zero(s->txq_maperr, s->egr_sz); - - if (adap->flags & USING_MSIX) - msi_idx = 1; /* vector 0 is for non-queue interrupts */ - else { - err = t4_sge_alloc_rxq(adap, &s->intrq, false, adap->port[0], 0, - NULL, NULL, NULL, -1); - if (err) - return err; - msi_idx = -((int)s->intrq.abs_id + 1); - } - - /* NOTE: If you add/delete any Ingress/Egress Queue allocations in here, - * don't forget to update the following which need to be - * synchronized to and changes here. - * - * 1. The calculations of MAX_INGQ in cxgb4.h. - * - * 2. Update enable_msix/name_msix_vecs/request_msix_queue_irqs - * to accommodate any new/deleted Ingress Queues - * which need MSI-X Vectors. - * - * 3. Update sge_qinfo_show() to include information on the - * new/deleted queues. - */ - err = t4_sge_alloc_rxq(adap, &s->fw_evtq, true, adap->port[0], - msi_idx, NULL, fwevtq_handler, NULL, -1); - if (err) { -freeout: t4_free_sge_resources(adap); - return err; - } + struct sge_uld_rxq_info *rxq_info = s->uld_rxq_info[CXGB4_ULD_RDMA]; + unsigned int cmplqid = 0; for_each_port(adap, i) { struct net_device *dev = adap->port[i]; @@ -1109,10 +863,10 @@ freeout: t4_free_sge_resources(adap); struct sge_eth_txq *t = &s->ethtxq[pi->first_qset]; for (j = 0; j < pi->nqsets; j++, q++) { - if (msi_idx > 0) - msi_idx++; + if (adap->msi_idx > 0) + adap->msi_idx++; err = t4_sge_alloc_rxq(adap, &q->rspq, false, dev, - msi_idx, &q->fl, + adap->msi_idx, &q->fl, t4_ethrx_handler, NULL, t4_get_mps_bg_map(adap, @@ -1131,8 +885,8 @@ freeout: t4_free_sge_resources(adap); } } - j = s->iscsiqsets / adap->params.nports; /* iscsi queues per channel */ - for_each_iscsirxq(s, i) { + j = s->ofldqsets / adap->params.nports; /* iscsi queues per channel */ + for_each_ofldtxq(s, i) { err = t4_sge_alloc_ofld_txq(adap, &s->ofldtxq[i], adap->port[i / j], s->fw_evtq.cntxt_id); @@ -1140,30 +894,15 @@ freeout: t4_free_sge_resources(adap); goto freeout; } -#define ALLOC_OFLD_RXQS(firstq, nq, per_chan, ids, lro) do { \ - err = alloc_ofld_rxqs(adap, firstq, nq, per_chan, msi_idx, ids, lro); \ - if (err) \ - goto freeout; \ - if (msi_idx > 0) \ - msi_idx += nq; \ -} while (0) - - ALLOC_OFLD_RXQS(s->iscsirxq, s->iscsiqsets, j, s->iscsi_rxq, false); - ALLOC_OFLD_RXQS(s->iscsitrxq, s->niscsitq, j, s->iscsit_rxq, true); - ALLOC_OFLD_RXQS(s->rdmarxq, s->rdmaqs, 1, s->rdma_rxq, false); - j = s->rdmaciqs / adap->params.nports; /* rdmaq queues per channel */ - ALLOC_OFLD_RXQS(s->rdmaciq, s->rdmaciqs, j, s->rdma_ciq, false); - -#undef ALLOC_OFLD_RXQS - for_each_port(adap, i) { - /* - * Note that ->rdmarxq[i].rspq.cntxt_id below is 0 if we don't + /* Note that cmplqid below is 0 if we don't * have RDMA queues, and that's the right value. */ + if (rxq_info) + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + err = t4_sge_alloc_ctrl_txq(adap, &s->ctrlq[i], adap->port[i], - s->fw_evtq.cntxt_id, - s->rdmarxq[i].rspq.cntxt_id); + s->fw_evtq.cntxt_id, cmplqid); if (err) goto freeout; } @@ -1174,6 +913,9 @@ freeout: t4_free_sge_resources(adap); RSSCONTROL_V(netdev2pinfo(adap->port[0])->tx_chan) | QUEUENUMBER_V(s->ethrxq[0].rspq.abs_id)); return 0; +freeout: + t4_free_sge_resources(adap); + return err; } /* @@ -1197,151 +939,6 @@ void t4_free_mem(void *addr) kvfree(addr); } -/* Send a Work Request to write the filter at a specified index. We construct - * a Firmware Filter Work Request to have the work done and put the indicated - * filter into "pending" mode which will prevent any further actions against - * it till we get a reply from the firmware on the completion status of the - * request. - */ -static int set_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int ftid; - - skb = alloc_skb(sizeof(*fwr), GFP_KERNEL); - if (!skb) - return -ENOMEM; - - /* If the new filter requires loopback Destination MAC and/or VLAN - * rewriting then we need to allocate a Layer 2 Table (L2T) entry for - * the filter. - */ - if (f->fs.newdmac || f->fs.newvlan) { - /* allocate L2T entry for new filter */ - f->l2t = t4_l2t_alloc_switching(adapter, f->fs.vlan, - f->fs.eport, f->fs.dmac); - if (f->l2t == NULL) { - kfree_skb(skb); - return -ENOMEM; - } - } - - ftid = adapter->tids.ftid_base + fidx; - - fwr = (struct fw_filter_wr *)__skb_put(skb, sizeof(*fwr)); - memset(fwr, 0, sizeof(*fwr)); - - /* It would be nice to put most of the following in t4_hw.c but most - * of the work is translating the cxgbtool ch_filter_specification - * into the Work Request and the definition of that structure is - * currently in cxgbtool.h which isn't appropriate to pull into the - * common code. We may eventually try to come up with a more neutral - * filter specification structure but for now it's easiest to simply - * put this fairly direct code in line ... - */ - fwr->op_pkd = htonl(FW_WR_OP_V(FW_FILTER_WR)); - fwr->len16_pkd = htonl(FW_WR_LEN16_V(sizeof(*fwr)/16)); - fwr->tid_to_iq = - htonl(FW_FILTER_WR_TID_V(ftid) | - FW_FILTER_WR_RQTYPE_V(f->fs.type) | - FW_FILTER_WR_NOREPLY_V(0) | - FW_FILTER_WR_IQ_V(f->fs.iq)); - fwr->del_filter_to_l2tix = - htonl(FW_FILTER_WR_RPTTID_V(f->fs.rpttid) | - FW_FILTER_WR_DROP_V(f->fs.action == FILTER_DROP) | - FW_FILTER_WR_DIRSTEER_V(f->fs.dirsteer) | - FW_FILTER_WR_MASKHASH_V(f->fs.maskhash) | - FW_FILTER_WR_DIRSTEERHASH_V(f->fs.dirsteerhash) | - FW_FILTER_WR_LPBK_V(f->fs.action == FILTER_SWITCH) | - FW_FILTER_WR_DMAC_V(f->fs.newdmac) | - FW_FILTER_WR_SMAC_V(f->fs.newsmac) | - FW_FILTER_WR_INSVLAN_V(f->fs.newvlan == VLAN_INSERT || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_RMVLAN_V(f->fs.newvlan == VLAN_REMOVE || - f->fs.newvlan == VLAN_REWRITE) | - FW_FILTER_WR_HITCNTS_V(f->fs.hitcnts) | - FW_FILTER_WR_TXCHAN_V(f->fs.eport) | - FW_FILTER_WR_PRIO_V(f->fs.prio) | - FW_FILTER_WR_L2TIX_V(f->l2t ? f->l2t->idx : 0)); - fwr->ethtype = htons(f->fs.val.ethtype); - fwr->ethtypem = htons(f->fs.mask.ethtype); - fwr->frag_to_ovlan_vldm = - (FW_FILTER_WR_FRAG_V(f->fs.val.frag) | - FW_FILTER_WR_FRAGM_V(f->fs.mask.frag) | - FW_FILTER_WR_IVLAN_VLD_V(f->fs.val.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLD_V(f->fs.val.ovlan_vld) | - FW_FILTER_WR_IVLAN_VLDM_V(f->fs.mask.ivlan_vld) | - FW_FILTER_WR_OVLAN_VLDM_V(f->fs.mask.ovlan_vld)); - fwr->smac_sel = 0; - fwr->rx_chan_rx_rpl_iq = - htons(FW_FILTER_WR_RX_CHAN_V(0) | - FW_FILTER_WR_RX_RPL_IQ_V(adapter->sge.fw_evtq.abs_id)); - fwr->maci_to_matchtypem = - htonl(FW_FILTER_WR_MACI_V(f->fs.val.macidx) | - FW_FILTER_WR_MACIM_V(f->fs.mask.macidx) | - FW_FILTER_WR_FCOE_V(f->fs.val.fcoe) | - FW_FILTER_WR_FCOEM_V(f->fs.mask.fcoe) | - FW_FILTER_WR_PORT_V(f->fs.val.iport) | - FW_FILTER_WR_PORTM_V(f->fs.mask.iport) | - FW_FILTER_WR_MATCHTYPE_V(f->fs.val.matchtype) | - FW_FILTER_WR_MATCHTYPEM_V(f->fs.mask.matchtype)); - fwr->ptcl = f->fs.val.proto; - fwr->ptclm = f->fs.mask.proto; - fwr->ttyp = f->fs.val.tos; - fwr->ttypm = f->fs.mask.tos; - fwr->ivlan = htons(f->fs.val.ivlan); - fwr->ivlanm = htons(f->fs.mask.ivlan); - fwr->ovlan = htons(f->fs.val.ovlan); - fwr->ovlanm = htons(f->fs.mask.ovlan); - memcpy(fwr->lip, f->fs.val.lip, sizeof(fwr->lip)); - memcpy(fwr->lipm, f->fs.mask.lip, sizeof(fwr->lipm)); - memcpy(fwr->fip, f->fs.val.fip, sizeof(fwr->fip)); - memcpy(fwr->fipm, f->fs.mask.fip, sizeof(fwr->fipm)); - fwr->lp = htons(f->fs.val.lport); - fwr->lpm = htons(f->fs.mask.lport); - fwr->fp = htons(f->fs.val.fport); - fwr->fpm = htons(f->fs.mask.fport); - if (f->fs.newsmac) - memcpy(fwr->sma, f->fs.smac, sizeof(fwr->sma)); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - set_wr_txq(skb, CPL_PRIORITY_CONTROL, f->fs.val.iport & 0x3); - t4_ofld_send(adapter, skb); - return 0; -} - -/* Delete the filter at a specified index. - */ -static int del_filter_wr(struct adapter *adapter, int fidx) -{ - struct filter_entry *f = &adapter->tids.ftid_tab[fidx]; - struct sk_buff *skb; - struct fw_filter_wr *fwr; - unsigned int len, ftid; - - len = sizeof(*fwr); - ftid = adapter->tids.ftid_base + fidx; - - skb = alloc_skb(len, GFP_KERNEL); - if (!skb) - return -ENOMEM; - - fwr = (struct fw_filter_wr *)__skb_put(skb, len); - t4_mk_filtdelwr(ftid, fwr, adapter->sge.fw_evtq.abs_id); - - /* Mark the filter as "pending" and ship off the Filter Work Request. - * When we get the Work Request Reply we'll clear the pending status. - */ - f->pending = 1; - t4_mgmt_tx(adapter, skb); - return 0; -} - static u16 cxgb_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback) { @@ -1723,19 +1320,22 @@ EXPORT_SYMBOL(cxgb4_remove_tid); */ static int tid_init(struct tid_info *t) { - size_t size; - unsigned int stid_bmap_size; - unsigned int natids = t->natids; struct adapter *adap = container_of(t, struct adapter, tids); + unsigned int max_ftids = t->nftids + t->nsftids; + unsigned int natids = t->natids; + unsigned int stid_bmap_size; + unsigned int ftid_bmap_size; + size_t size; stid_bmap_size = BITS_TO_LONGS(t->nstids + t->nsftids); + ftid_bmap_size = BITS_TO_LONGS(t->nftids); size = t->ntids * sizeof(*t->tid_tab) + natids * sizeof(*t->atid_tab) + t->nstids * sizeof(*t->stid_tab) + t->nsftids * sizeof(*t->stid_tab) + stid_bmap_size * sizeof(long) + - t->nftids * sizeof(*t->ftid_tab) + - t->nsftids * sizeof(*t->ftid_tab); + max_ftids * sizeof(*t->ftid_tab) + + ftid_bmap_size * sizeof(long); t->tid_tab = t4_alloc_mem(size); if (!t->tid_tab) @@ -1745,8 +1345,10 @@ static int tid_init(struct tid_info *t) t->stid_tab = (struct serv_entry *)&t->atid_tab[natids]; t->stid_bmap = (unsigned long *)&t->stid_tab[t->nstids + t->nsftids]; t->ftid_tab = (struct filter_entry *)&t->stid_bmap[stid_bmap_size]; + t->ftid_bmap = (unsigned long *)&t->ftid_tab[max_ftids]; spin_lock_init(&t->stid_lock); spin_lock_init(&t->atid_lock); + spin_lock_init(&t->ftid_lock); t->stids_in_use = 0; t->sftids_in_use = 0; @@ -1761,12 +1363,16 @@ static int tid_init(struct tid_info *t) t->atid_tab[natids - 1].next = &t->atid_tab[natids]; t->afree = t->atid_tab; } - bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); - /* Reserve stid 0 for T4/T5 adapters */ - if (!t->stid_base && - (CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5)) - __set_bit(0, t->stid_bmap); + if (is_offload(adap)) { + bitmap_zero(t->stid_bmap, t->nstids + t->nsftids); + /* Reserve stid 0 for T4/T5 adapters */ + if (!t->stid_base && + CHELSIO_CHIP_VERSION(adap->params.chip) <= CHELSIO_T5) + __set_bit(0, t->stid_bmap); + } + + bitmap_zero(t->ftid_bmap, t->nftids); return 0; } @@ -2316,7 +1922,7 @@ static void disable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) disable_txq_db(&adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) disable_txq_db(&adap->sge.ofldtxq[i].q); for_each_port(adap, i) disable_txq_db(&adap->sge.ctrlq[i].q); @@ -2328,7 +1934,7 @@ static void enable_dbs(struct adapter *adap) for_each_ethrxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) enable_txq_db(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) enable_txq_db(adap, &adap->sge.ctrlq[i].q); @@ -2336,9 +1942,10 @@ static void enable_dbs(struct adapter *adap) static void notify_rdma_uld(struct adapter *adap, enum cxgb4_control cmd) { - if (adap->uld_handle[CXGB4_ULD_RDMA]) - ulds[CXGB4_ULD_RDMA].control(adap->uld_handle[CXGB4_ULD_RDMA], - cmd); + enum cxgb4_uld type = CXGB4_ULD_RDMA; + + if (adap->uld && adap->uld[type].handle) + adap->uld[type].control(adap->uld[type].handle, cmd); } static void process_db_full(struct work_struct *work) @@ -2392,13 +1999,14 @@ out: if (ret) CH_WARN(adap, "DB drop recovery failed.\n"); } + static void recover_all_queues(struct adapter *adap) { int i; for_each_ethrxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ethtxq[i].q); - for_each_iscsirxq(&adap->sge, i) + for_each_ofldtxq(&adap->sge, i) sync_txq_pidx(adap, &adap->sge.ofldtxq[i].q); for_each_port(adap, i) sync_txq_pidx(adap, &adap->sge.ctrlq[i].q); @@ -2463,94 +2071,12 @@ void t4_db_dropped(struct adapter *adap) queue_work(adap->workq, &adap->db_drop_task); } -static void uld_attach(struct adapter *adap, unsigned int uld) +void t4_register_netevent_notifier(void) { - void *handle; - struct cxgb4_lld_info lli; - unsigned short i; - - lli.pdev = adap->pdev; - lli.pf = adap->pf; - lli.l2t = adap->l2t; - lli.tids = &adap->tids; - lli.ports = adap->port; - lli.vr = &adap->vres; - lli.mtus = adap->params.mtus; - if (uld == CXGB4_ULD_RDMA) { - lli.rxq_ids = adap->sge.rdma_rxq; - lli.ciq_ids = adap->sge.rdma_ciq; - lli.nrxq = adap->sge.rdmaqs; - lli.nciq = adap->sge.rdmaciqs; - } else if (uld == CXGB4_ULD_ISCSI) { - lli.rxq_ids = adap->sge.iscsi_rxq; - lli.nrxq = adap->sge.iscsiqsets; - } else if (uld == CXGB4_ULD_ISCSIT) { - lli.rxq_ids = adap->sge.iscsit_rxq; - lli.nrxq = adap->sge.niscsitq; - } - lli.ntxq = adap->sge.iscsiqsets; - lli.nchan = adap->params.nports; - lli.nports = adap->params.nports; - lli.wr_cred = adap->params.ofldq_wr_cred; - lli.adapter_type = adap->params.chip; - lli.iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); - lli.iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); - lli.iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); - lli.iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); - lli.iscsi_ppm = &adap->iscsi_ppm; - lli.cclk_ps = 1000000000 / adap->params.vpd.cclk; - lli.udb_density = 1 << adap->params.sge.eq_qpp; - lli.ucq_density = 1 << adap->params.sge.iq_qpp; - lli.filt_mode = adap->params.tp.vlan_pri_map; - /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ - for (i = 0; i < NCHAN; i++) - lli.tx_modq[i] = i; - lli.gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); - lli.db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); - lli.fw_vers = adap->params.fw_vers; - lli.dbfifo_int_thresh = dbfifo_int_thresh; - lli.sge_ingpadboundary = adap->sge.fl_align; - lli.sge_egrstatuspagesize = adap->sge.stat_len; - lli.sge_pktshift = adap->sge.pktshift; - lli.enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; - lli.max_ordird_qp = adap->params.max_ordird_qp; - lli.max_ird_adapter = adap->params.max_ird_adapter; - lli.ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; - lli.nodeid = dev_to_node(adap->pdev_dev); - - handle = ulds[uld].add(&lli); - if (IS_ERR(handle)) { - dev_warn(adap->pdev_dev, - "could not attach to the %s driver, error %ld\n", - uld_str[uld], PTR_ERR(handle)); - return; - } - - adap->uld_handle[uld] = handle; - if (!netevent_registered) { register_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = true; } - - if (adap->flags & FULL_INIT_DONE) - ulds[uld].state_change(handle, CXGB4_STATE_UP); -} - -static void attach_ulds(struct adapter *adap) -{ - unsigned int i; - - spin_lock(&adap_rcu_lock); - list_add_tail_rcu(&adap->rcu_node, &adap_rcu_list); - spin_unlock(&adap_rcu_lock); - - mutex_lock(&uld_mutex); - list_add_tail(&adap->list_node, &adapter_list); - for (i = 0; i < CXGB4_ULD_MAX; i++) - if (ulds[i].add) - uld_attach(adap, i); - mutex_unlock(&uld_mutex); } static void detach_ulds(struct adapter *adap) @@ -2560,20 +2086,16 @@ static void detach_ulds(struct adapter *adap) mutex_lock(&uld_mutex); list_del(&adap->list_node); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) { - ulds[i].state_change(adap->uld_handle[i], + if (adap->uld && adap->uld[i].handle) { + adap->uld[i].state_change(adap->uld[i].handle, CXGB4_STATE_DETACH); - adap->uld_handle[i] = NULL; + adap->uld[i].handle = NULL; } if (netevent_registered && list_empty(&adapter_list)) { unregister_netevent_notifier(&cxgb4_netevent_nb); netevent_registered = false; } mutex_unlock(&uld_mutex); - - spin_lock(&adap_rcu_lock); - list_del_rcu(&adap->rcu_node); - spin_unlock(&adap_rcu_lock); } static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) @@ -2582,61 +2104,12 @@ static void notify_ulds(struct adapter *adap, enum cxgb4_state new_state) mutex_lock(&uld_mutex); for (i = 0; i < CXGB4_ULD_MAX; i++) - if (adap->uld_handle[i]) - ulds[i].state_change(adap->uld_handle[i], new_state); + if (adap->uld && adap->uld[i].handle) + adap->uld[i].state_change(adap->uld[i].handle, + new_state); mutex_unlock(&uld_mutex); } -/** - * cxgb4_register_uld - register an upper-layer driver - * @type: the ULD type - * @p: the ULD methods - * - * Registers an upper-layer driver with this driver and notifies the ULD - * about any presently available devices that support its type. Returns - * %-EBUSY if a ULD of the same type is already registered. - */ -int cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p) -{ - int ret = 0; - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - if (ulds[type].add) { - ret = -EBUSY; - goto out; - } - ulds[type] = *p; - list_for_each_entry(adap, &adapter_list, list_node) - uld_attach(adap, type); -out: mutex_unlock(&uld_mutex); - return ret; -} -EXPORT_SYMBOL(cxgb4_register_uld); - -/** - * cxgb4_unregister_uld - unregister an upper-layer driver - * @type: the ULD type - * - * Unregisters an existing upper-layer driver. - */ -int cxgb4_unregister_uld(enum cxgb4_uld type) -{ - struct adapter *adap; - - if (type >= CXGB4_ULD_MAX) - return -EINVAL; - mutex_lock(&uld_mutex); - list_for_each_entry(adap, &adapter_list, list_node) - adap->uld_handle[type] = NULL; - ulds[type].add = NULL; - mutex_unlock(&uld_mutex); - return 0; -} -EXPORT_SYMBOL(cxgb4_unregister_uld); - #if IS_ENABLED(CONFIG_IPV6) static int cxgb4_inet6addr_handler(struct notifier_block *this, unsigned long event, void *data) @@ -2741,7 +2214,6 @@ static int cxgb_up(struct adapter *adap) adap->msix_info[0].desc, adap); if (err) goto irq_err; - err = request_msix_queue_irqs(adap); if (err) { free_irq(adap->msix_info[0].vec, adap); @@ -2819,40 +2291,6 @@ static int cxgb_close(struct net_device *dev) return t4_enable_vi(adapter, adapter->pf, pi->viid, false, false); } -/* Return an error number if the indicated filter isn't writable ... - */ -static int writable_filter(struct filter_entry *f) -{ - if (f->locked) - return -EPERM; - if (f->pending) - return -EBUSY; - - return 0; -} - -/* Delete the filter at the specified index (if valid). The checks for all - * the common problems with doing this like the filter being locked, currently - * pending in another operation, etc. - */ -static int delete_filter(struct adapter *adapter, unsigned int fidx) -{ - struct filter_entry *f; - int ret; - - if (fidx >= adapter->tids.nftids + adapter->tids.nsftids) - return -EINVAL; - - f = &adapter->tids.ftid_tab[fidx]; - ret = writable_filter(f); - if (ret) - return ret; - if (f->valid) - return del_filter_wr(adapter, fidx); - - return 0; -} - int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, __be32 sip, __be16 sport, __be16 vlan, unsigned int queue, unsigned char port, unsigned char mask) @@ -2922,7 +2360,6 @@ EXPORT_SYMBOL(cxgb4_create_server_filter); int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6) { - int ret; struct filter_entry *f; struct adapter *adap; @@ -2936,11 +2373,7 @@ int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, /* Unlock the filter */ f->locked = 0; - ret = delete_filter(adap, stid); - if (ret) - return ret; - - return 0; + return delete_filter(adap, stid); } EXPORT_SYMBOL(cxgb4_remove_server_filter); @@ -3078,6 +2511,85 @@ static int cxgb_change_mtu(struct net_device *dev, int new_mtu) return ret; } +#ifdef CONFIG_PCI_IOV +static int dummy_open(struct net_device *dev) +{ + /* Turn carrier off since we don't have to transmit anything on this + * interface. + */ + netif_carrier_off(dev); + return 0; +} + +/* Fill MAC address that will be assigned by the FW */ +static void fill_vf_station_mac_addr(struct adapter *adap) +{ + unsigned int i; + u8 hw_addr[ETH_ALEN], macaddr[ETH_ALEN]; + int err; + u8 *na; + u16 a, b; + + err = t4_get_raw_vpd_params(adap, &adap->params.vpd); + if (!err) { + na = adap->params.vpd.na; + for (i = 0; i < ETH_ALEN; i++) + hw_addr[i] = (hex2val(na[2 * i + 0]) * 16 + + hex2val(na[2 * i + 1])); + a = (hw_addr[0] << 8) | hw_addr[1]; + b = (hw_addr[1] << 8) | hw_addr[2]; + a ^= b; + a |= 0x0200; /* locally assigned Ethernet MAC address */ + a &= ~0x0100; /* not a multicast Ethernet MAC address */ + macaddr[0] = a >> 8; + macaddr[1] = a & 0xff; + + for (i = 2; i < 5; i++) + macaddr[i] = hw_addr[i + 1]; + + for (i = 0; i < adap->num_vfs; i++) { + macaddr[5] = adap->pf * 16 + i; + ether_addr_copy(adap->vfinfo[i].vf_mac_addr, macaddr); + } + } +} + +static int cxgb_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + int ret; + + /* verify MAC addr is valid */ + if (!is_valid_ether_addr(mac)) { + dev_err(pi->adapter->pdev_dev, + "Invalid Ethernet address %pM for VF %d\n", + mac, vf); + return -EINVAL; + } + + dev_info(pi->adapter->pdev_dev, + "Setting MAC %pM on VF %d\n", mac, vf); + ret = t4_set_vf_mac_acl(adap, vf + 1, 1, mac); + if (!ret) + ether_addr_copy(adap->vfinfo[vf].vf_mac_addr, mac); + return ret; +} + +static int cxgb_get_vf_config(struct net_device *dev, + int vf, struct ifla_vf_info *ivi) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + + if (vf >= adap->num_vfs) + return -EINVAL; + ivi->vf = vf; + ether_addr_copy(ivi->mac, adap->vfinfo[vf].vf_mac_addr); + return 0; +} +#endif + static int cxgb_set_mac_addr(struct net_device *dev, void *p) { int ret; @@ -3114,6 +2626,116 @@ static void cxgb_netpoll(struct net_device *dev) } #endif +static int cxgb_set_tx_maxrate(struct net_device *dev, int index, u32 rate) +{ + struct port_info *pi = netdev_priv(dev); + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct ch_sched_params p; + struct ch_sched_queue qe; + u32 req_rate; + int err = 0; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (index < 0 || index > pi->nqsets - 1) + return -EINVAL; + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to rate limit on queue %d. Link Down?\n", + index); + return -EINVAL; + } + + /* Convert from Mbps to Kbps */ + req_rate = rate << 10; + + /* Max rate is 10 Gbps */ + if (req_rate >= SCHED_MAX_RATE_KBPS) { + dev_err(adap->pdev_dev, + "Invalid rate %u Mbps, Max rate is %u Gbps\n", + rate, SCHED_MAX_RATE_KBPS); + return -ERANGE; + } + + /* First unbind the queue from any existing class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = SCHED_CLS_NONE; + + err = cxgb4_sched_class_unbind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) { + dev_err(adap->pdev_dev, + "Unbinding Queue %d on port %d fail. Err: %d\n", + index, pi->port_id, err); + return err; + } + + /* Queue already unbound */ + if (!req_rate) + return 0; + + /* Fetch any available unused or matching scheduling class */ + memset(&p, 0, sizeof(p)); + p.type = SCHED_CLASS_TYPE_PACKET; + p.u.params.level = SCHED_CLASS_LEVEL_CL_RL; + p.u.params.mode = SCHED_CLASS_MODE_CLASS; + p.u.params.rateunit = SCHED_CLASS_RATEUNIT_BITS; + p.u.params.ratemode = SCHED_CLASS_RATEMODE_ABS; + p.u.params.channel = pi->tx_chan; + p.u.params.class = SCHED_CLS_NONE; + p.u.params.minrate = 0; + p.u.params.maxrate = req_rate; + p.u.params.weight = 0; + p.u.params.pktsize = dev->mtu; + + e = cxgb4_sched_class_alloc(dev, &p); + if (!e) + return -ENOMEM; + + /* Bind the queue to a scheduling class */ + memset(&qe, 0, sizeof(qe)); + qe.queue = index; + qe.class = e->idx; + + err = cxgb4_sched_class_bind(dev, (void *)(&qe), SCHED_QUEUE); + if (err) + dev_err(adap->pdev_dev, + "Queue rate limiting failed. Err: %d\n", err); + return err; +} + +static int cxgb_setup_tc(struct net_device *dev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct port_info *pi = netdev2pinfo(dev); + struct adapter *adap = netdev2adap(dev); + + if (!(adap->flags & FULL_INIT_DONE)) { + dev_err(adap->pdev_dev, + "Failed to setup tc on port %d. Link Down?\n", + pi->port_id); + return -EINVAL; + } + + if (TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS) && + tc->type == TC_SETUP_CLSU32) { + switch (tc->cls_u32->command) { + case TC_CLSU32_NEW_KNODE: + case TC_CLSU32_REPLACE_KNODE: + return cxgb4_config_knode(dev, proto, tc->cls_u32); + case TC_CLSU32_DELETE_KNODE: + return cxgb4_delete_knode(dev, proto, tc->cls_u32); + default: + return -EOPNOTSUPP; + } + } + + return -EOPNOTSUPP; +} + static const struct net_device_ops cxgb4_netdev_ops = { .ndo_open = cxgb_open, .ndo_stop = cxgb_close, @@ -3136,7 +2758,31 @@ static const struct net_device_ops cxgb4_netdev_ops = { #ifdef CONFIG_NET_RX_BUSY_POLL .ndo_busy_poll = cxgb_busy_poll, #endif + .ndo_set_tx_maxrate = cxgb_set_tx_maxrate, + .ndo_setup_tc = cxgb_setup_tc, +}; +#ifdef CONFIG_PCI_IOV +static const struct net_device_ops cxgb4_mgmt_netdev_ops = { + .ndo_open = dummy_open, + .ndo_set_vf_mac = cxgb_set_vf_mac, + .ndo_get_vf_config = cxgb_get_vf_config, +}; +#endif + +static void get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct adapter *adapter = netdev2adap(dev); + + strlcpy(info->driver, cxgb4_driver_name, sizeof(info->driver)); + strlcpy(info->version, cxgb4_driver_version, + sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static const struct ethtool_ops cxgb4_mgmt_ethtool_ops = { + .get_drvinfo = get_drvinfo, }; void t4_fatal_err(struct adapter *adap) @@ -3979,6 +3625,12 @@ static int adap_init0(struct adapter *adap) adap->clipt_start = val[0]; adap->clipt_end = val[1]; + /* We don't yet have a PARAMs calls to retrieve the number of Traffic + * Classes supported by the hardware/firmware so we hard code it here + * for now. + */ + adap->params.nsched_cls = is_t4(adap->params.chip) ? 15 : 16; + /* query params related to active filter region */ params[0] = FW_PARAM_PFVF(ACTIVE_FILTER_START); params[1] = FW_PARAM_PFVF(ACTIVE_FILTER_END); @@ -4067,6 +3719,7 @@ static int adap_init0(struct adapter *adap) adap->params.ofldq_wr_cred = val[5]; adap->params.offload = 1; + adap->num_ofld_uld += 1; } if (caps_cmd.rdmacaps) { params[0] = FW_PARAM_PFVF(STAG_START); @@ -4119,6 +3772,7 @@ static int adap_init0(struct adapter *adap) "max_ordird_qp %d max_ird_adapter %d\n", adap->params.max_ordird_qp, adap->params.max_ird_adapter); + adap->num_ofld_uld += 2; } if (caps_cmd.iscsicaps) { params[0] = FW_PARAM_PFVF(ISCSI_START); @@ -4129,6 +3783,13 @@ static int adap_init0(struct adapter *adap) goto bye; adap->vres.iscsi.start = val[0]; adap->vres.iscsi.size = val[1] - val[0] + 1; + /* LIO target and cxgb4i initiaitor */ + adap->num_ofld_uld += 2; + } + if (caps_cmd.cryptocaps) { + /* Should query params here...TODO */ + adap->params.crypto |= ULP_CRYPTO_LOOKASIDE; + adap->num_uld += 1; } #undef FW_PARAM_PFVF #undef FW_PARAM_DEV @@ -4318,16 +3979,6 @@ static inline bool is_x_10g_port(const struct link_config *lc) return high_speeds != 0; } -static inline void init_rspq(struct adapter *adap, struct sge_rspq *q, - unsigned int us, unsigned int cnt, - unsigned int size, unsigned int iqe_size) -{ - q->adap = adap; - cxgb4_set_rspq_intr_params(q, us, cnt); - q->iqe_len = iqe_size; - q->size = size; -} - /* * Perform default configuration of DMA queues depending on the number and type * of ports we found and the number of available CPUs. Most settings can be @@ -4340,12 +3991,16 @@ static void cfg_queues(struct adapter *adap) #ifndef CONFIG_CHELSIO_T4_DCB int q10g = 0; #endif - int ciq_size; /* Reduce memory usage in kdump environment, disable all offload. */ - if (is_kdump_kernel()) + if (is_kdump_kernel()) { adap->params.offload = 0; + adap->params.crypto = 0; + } else if (is_uld(adap) && t4_uld_mem_alloc(adap)) { + adap->params.offload = 0; + adap->params.crypto = 0; + } for_each_port(adap, i) n10g += is_x_10g_port(&adap2pinfo(adap, i)->link_cfg); @@ -4389,33 +4044,18 @@ static void cfg_queues(struct adapter *adap) s->ethqsets = qidx; s->max_ethqsets = qidx; /* MSI-X may lower it later */ - if (is_offload(adap)) { + if (is_uld(adap)) { /* * For offload we use 1 queue/channel if all ports are up to 1G, * otherwise we divide all available queues amongst the channels * capped by the number of available cores. */ if (n10g) { - i = min_t(int, ARRAY_SIZE(s->iscsirxq), - num_online_cpus()); - s->iscsiqsets = roundup(i, adap->params.nports); - } else - s->iscsiqsets = adap->params.nports; - /* For RDMA one Rx queue per channel suffices */ - s->rdmaqs = adap->params.nports; - /* Try and allow at least 1 CIQ per cpu rounding down - * to the number of ports, with a minimum of 1 per port. - * A 2 port card in a 6 cpu system: 6 CIQs, 3 / port. - * A 4 port card in a 6 cpu system: 4 CIQs, 1 / port. - * A 4 port card in a 2 cpu system: 4 CIQs, 1 / port. - */ - s->rdmaciqs = min_t(int, MAX_RDMA_CIQS, num_online_cpus()); - s->rdmaciqs = (s->rdmaciqs / adap->params.nports) * - adap->params.nports; - s->rdmaciqs = max_t(int, s->rdmaciqs, adap->params.nports); - - if (!is_t4(adap->params.chip)) - s->niscsitq = s->iscsiqsets; + i = num_online_cpus(); + s->ofldqsets = roundup(i, adap->params.nports); + } else { + s->ofldqsets = adap->params.nports; + } } for (i = 0; i < ARRAY_SIZE(s->ethrxq); i++) { @@ -4434,47 +4074,8 @@ static void cfg_queues(struct adapter *adap) for (i = 0; i < ARRAY_SIZE(s->ofldtxq); i++) s->ofldtxq[i].q.size = 1024; - for (i = 0; i < ARRAY_SIZE(s->iscsirxq); i++) { - struct sge_ofld_rxq *r = &s->iscsirxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSI; - r->fl.size = 72; - } - - if (!is_t4(adap->params.chip)) { - for (i = 0; i < ARRAY_SIZE(s->iscsitrxq); i++) { - struct sge_ofld_rxq *r = &s->iscsitrxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 1024, 64); - r->rspq.uld = CXGB4_ULD_ISCSIT; - r->fl.size = 72; - } - } - - for (i = 0; i < ARRAY_SIZE(s->rdmarxq); i++) { - struct sge_ofld_rxq *r = &s->rdmarxq[i]; - - init_rspq(adap, &r->rspq, 5, 1, 511, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - r->fl.size = 72; - } - - ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; - if (ciq_size > SGE_MAX_IQ_SIZE) { - CH_WARN(adap, "CIQ size too small for available IQs\n"); - ciq_size = SGE_MAX_IQ_SIZE; - } - - for (i = 0; i < ARRAY_SIZE(s->rdmaciq); i++) { - struct sge_ofld_rxq *r = &s->rdmaciq[i]; - - init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); - r->rspq.uld = CXGB4_ULD_RDMA; - } - init_rspq(adap, &s->fw_evtq, 0, 1, 1024, 64); - init_rspq(adap, &s->intrq, 0, 1, 2 * MAX_INGQ, 64); + init_rspq(adap, &s->intrq, 0, 1, 512, 64); } /* @@ -4505,42 +4106,90 @@ static void reduce_ethqs(struct adapter *adap, int n) } } +static int get_msix_info(struct adapter *adap) +{ + struct uld_msix_info *msix_info; + unsigned int max_ingq = 0; + + if (is_offload(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_ofld_uld; + if (is_pci_uld(adap)) + max_ingq += MAX_OFLD_QSETS * adap->num_uld; + + if (!max_ingq) + goto out; + + msix_info = kcalloc(max_ingq, sizeof(*msix_info), GFP_KERNEL); + if (!msix_info) + return -ENOMEM; + + adap->msix_bmap_ulds.msix_bmap = kcalloc(BITS_TO_LONGS(max_ingq), + sizeof(long), GFP_KERNEL); + if (!adap->msix_bmap_ulds.msix_bmap) { + kfree(msix_info); + return -ENOMEM; + } + spin_lock_init(&adap->msix_bmap_ulds.lock); + adap->msix_info_ulds = msix_info; +out: + return 0; +} + +static void free_msix_info(struct adapter *adap) +{ + if (!(adap->num_uld && adap->num_ofld_uld)) + return; + + kfree(adap->msix_info_ulds); + kfree(adap->msix_bmap_ulds.msix_bmap); +} + /* 2 MSI-X vectors needed for the FW queue and non-data interrupts */ #define EXTRA_VECS 2 static int enable_msix(struct adapter *adap) { - int ofld_need = 0; - int i, want, need, allocated; + int ofld_need = 0, uld_need = 0; + int i, j, want, need, allocated; struct sge *s = &adap->sge; unsigned int nchan = adap->params.nports; struct msix_entry *entries; + int max_ingq = MAX_INGQ; - entries = kmalloc(sizeof(*entries) * (MAX_INGQ + 1), + if (is_pci_uld(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_uld); + if (is_offload(adap)) + max_ingq += (MAX_OFLD_QSETS * adap->num_ofld_uld); + entries = kmalloc(sizeof(*entries) * (max_ingq + 1), GFP_KERNEL); if (!entries) return -ENOMEM; - for (i = 0; i < MAX_INGQ + 1; ++i) + /* map for msix */ + if (get_msix_info(adap)) { + adap->params.offload = 0; + adap->params.crypto = 0; + } + + for (i = 0; i < max_ingq + 1; ++i) entries[i].entry = i; want = s->max_ethqsets + EXTRA_VECS; if (is_offload(adap)) { - want += s->rdmaqs + s->rdmaciqs + s->iscsiqsets + - s->niscsitq; - /* need nchan for each possible ULD */ - if (is_t4(adap->params.chip)) - ofld_need = 3 * nchan; - else - ofld_need = 4 * nchan; + want += adap->num_ofld_uld * s->ofldqsets; + ofld_need = adap->num_ofld_uld * nchan; + } + if (is_pci_uld(adap)) { + want += adap->num_uld * s->ofldqsets; + uld_need = adap->num_uld * nchan; } #ifdef CONFIG_CHELSIO_T4_DCB /* For Data Center Bridging we need 8 Ethernet TX Priority Queues for * each port. */ - need = 8 * adap->params.nports + EXTRA_VECS + ofld_need; + need = 8 * adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #else - need = adap->params.nports + EXTRA_VECS + ofld_need; + need = adap->params.nports + EXTRA_VECS + ofld_need + uld_need; #endif allocated = pci_enable_msix_range(adap->pdev, entries, need, want); if (allocated < 0) { @@ -4554,33 +4203,31 @@ static int enable_msix(struct adapter *adap) * Every group gets its minimum requirement and NIC gets top * priority for leftovers. */ - i = allocated - EXTRA_VECS - ofld_need; + i = allocated - EXTRA_VECS - ofld_need - uld_need; if (i < s->max_ethqsets) { s->max_ethqsets = i; if (i < s->ethqsets) reduce_ethqs(adap, i); } - if (is_offload(adap)) { - if (allocated < want) { - s->rdmaqs = nchan; - s->rdmaciqs = nchan; - - if (!is_t4(adap->params.chip)) - s->niscsitq = nchan; - } - - /* leftovers go to OFLD */ - i = allocated - EXTRA_VECS - s->max_ethqsets - - s->rdmaqs - s->rdmaciqs - s->niscsitq; - s->iscsiqsets = (i / nchan) * nchan; /* round down */ - + if (is_uld(adap)) { + if (allocated < want) + s->nqs_per_uld = nchan; + else + s->nqs_per_uld = s->ofldqsets; } - for (i = 0; i < allocated; ++i) + + for (i = 0; i < (s->max_ethqsets + EXTRA_VECS); ++i) adap->msix_info[i].vec = entries[i].vector; + if (is_uld(adap)) { + for (j = 0 ; i < allocated; ++i, j++) { + adap->msix_info_ulds[j].vec = entries[i].vector; + adap->msix_info_ulds[j].idx = i; + } + adap->msix_bmap_ulds.mapsize = j; + } dev_info(adap->pdev_dev, "%d MSI-X vectors allocated, " - "nic %d iscsi %d rdma cpl %d rdma ciq %d\n", - allocated, s->max_ethqsets, s->iscsiqsets, s->rdmaqs, - s->rdmaciqs); + "nic %d per uld %d\n", + allocated, s->max_ethqsets, s->nqs_per_uld); kfree(entries); return 0; @@ -4794,7 +4441,9 @@ static void free_some_resources(struct adapter *adapter) unsigned int i; t4_free_mem(adapter->l2t); + t4_cleanup_sched(adapter); t4_free_mem(adapter->tids.tid_tab); + cxgb4_cleanup_tc_u32(adapter); kfree(adapter->sge.egr_map); kfree(adapter->sge.ingr_map); kfree(adapter->sge.starving_fl); @@ -4845,21 +4494,59 @@ static int get_chip_type(struct pci_dev *pdev, u32 pl_rev) } #ifdef CONFIG_PCI_IOV +static void dummy_setup(struct net_device *dev) +{ + dev->type = ARPHRD_NONE; + dev->mtu = 0; + dev->hard_header_len = 0; + dev->addr_len = 0; + dev->tx_queue_len = 0; + dev->flags |= IFF_NOARP; + dev->priv_flags |= IFF_NO_QUEUE; + + /* Initialize the device structure. */ + dev->netdev_ops = &cxgb4_mgmt_netdev_ops; + dev->ethtool_ops = &cxgb4_mgmt_ethtool_ops; + dev->destructor = free_netdev; +} + +static int config_mgmt_dev(struct pci_dev *pdev) +{ + struct adapter *adap = pci_get_drvdata(pdev); + struct net_device *netdev; + struct port_info *pi; + char name[IFNAMSIZ]; + int err; + + snprintf(name, IFNAMSIZ, "mgmtpf%d%d", adap->adap_idx, adap->pf); + netdev = alloc_netdev(0, name, NET_NAME_UNKNOWN, dummy_setup); + if (!netdev) + return -ENOMEM; + + pi = netdev_priv(netdev); + pi->adapter = adap; + SET_NETDEV_DEV(netdev, &pdev->dev); + + adap->port[0] = netdev; + + err = register_netdev(adap->port[0]); + if (err) { + pr_info("Unable to register VF mgmt netdev %s\n", name); + free_netdev(adap->port[0]); + adap->port[0] = NULL; + return err; + } + return 0; +} + static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) { + struct adapter *adap = pci_get_drvdata(pdev); int err = 0; int current_vfs = pci_num_vf(pdev); u32 pcie_fw; - void __iomem *regs; - regs = pci_ioremap_bar(pdev, 0); - if (!regs) { - dev_err(&pdev->dev, "cannot map device registers\n"); - return -ENOMEM; - } - - pcie_fw = readl(regs + PCIE_FW_A); - iounmap(regs); + pcie_fw = readl(adap->regs + PCIE_FW_A); /* Check if cxgb4 is the MASTER and fw is initialized */ if (!(pcie_fw & PCIE_FW_INIT_F) || !(pcie_fw & PCIE_FW_MASTER_VLD_F) || @@ -4886,6 +4573,14 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) */ if (!num_vfs) { pci_disable_sriov(pdev); + if (adap->port[0]) { + unregister_netdev(adap->port[0]); + adap->port[0] = NULL; + } + /* free VF resources */ + kfree(adap->vfinfo); + adap->vfinfo = NULL; + adap->num_vfs = 0; return num_vfs; } @@ -4893,7 +4588,17 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs) err = pci_enable_sriov(pdev, num_vfs); if (err) return err; + + adap->num_vfs = num_vfs; + err = config_mgmt_dev(pdev); + if (err) + return err; } + + adap->vfinfo = kcalloc(adap->num_vfs, + sizeof(struct vf_info), GFP_KERNEL); + if (adap->vfinfo) + fill_vf_station_mac_addr(adap); return num_vfs; } #endif @@ -4904,9 +4609,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct port_info *pi; bool highdma = false; struct adapter *adapter = NULL; + struct net_device *netdev; void __iomem *regs; u32 whoami, pl_rev; enum chip_type chip; + static int adap_idx = 1; printk_once(KERN_INFO "%s - version %s\n", DRV_DESC, DRV_VERSION); @@ -4941,7 +4648,9 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) func = CHELSIO_CHIP_VERSION(chip) <= CHELSIO_T5 ? SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); if (func != ent->driver_data) { +#ifndef CONFIG_PCI_IOV iounmap(regs); +#endif pci_disable_device(pdev); pci_save_state(pdev); /* to restore SR-IOV later */ goto sriov; @@ -4973,6 +4682,7 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) err = -ENOMEM; goto out_unmap_bar0; } + adap_idx++; adapter->workq = create_singlethread_workqueue("cxgb4"); if (!adapter->workq) { @@ -5059,8 +4769,6 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) T6_STATMODE_V(0))); for_each_port(adapter, i) { - struct net_device *netdev; - netdev = alloc_etherdev_mq(sizeof(struct port_info), MAX_ETH_QSETS); if (!netdev) { @@ -5080,7 +4788,8 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->hw_features = NETIF_F_SG | TSO_FLAGS | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM | NETIF_F_RXHASH | - NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX; + NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_TC; if (highdma) netdev->hw_features |= NETIF_F_HIGHDMA; netdev->features |= netdev->hw_features; @@ -5154,10 +4863,26 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) } } #endif - if (is_offload(adapter) && tid_init(&adapter->tids) < 0) { + + for_each_port(adapter, i) { + pi = adap2pinfo(adapter, i); + pi->sched_tbl = t4_init_sched(adapter->params.nsched_cls); + if (!pi->sched_tbl) + dev_warn(&pdev->dev, + "could not activate scheduling on port %d\n", + i); + } + + if (tid_init(&adapter->tids) < 0) { dev_warn(&pdev->dev, "could not allocate TID table, " "continuing\n"); adapter->params.offload = 0; + } else { + adapter->tc_u32 = cxgb4_init_tc_u32(adapter, + CXGB4_MAX_LINK_HANDLE); + if (!adapter->tc_u32) + dev_warn(&pdev->dev, + "could not offload tc u32, continuing\n"); } if (is_offload(adapter)) { @@ -5179,8 +4904,11 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* See what interrupts we'll be using */ if (msi > 1 && enable_msix(adapter) == 0) adapter->flags |= USING_MSIX; - else if (msi > 0 && pci_enable_msi(pdev) == 0) + else if (msi > 0 && pci_enable_msi(pdev) == 0) { adapter->flags |= USING_MSI; + if (msi > 1) + free_msix_info(adapter); + } /* check for PCI Express bandwidth capabiltites */ cxgb4_check_pcie_caps(adapter); @@ -5224,10 +4952,15 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* PCIe EEH recovery on powerpc platforms needs fundamental reset */ pdev->needs_freset = 1; - if (is_offload(adapter)) - attach_ulds(adapter); + if (is_uld(adapter)) { + mutex_lock(&uld_mutex); + list_add_tail(&adapter->list_node, &adapter_list); + mutex_unlock(&uld_mutex); + } print_adapter_info(adapter); + setup_fw_sge_queues(adapter); + return 0; sriov: #ifdef CONFIG_PCI_IOV @@ -5241,11 +4974,48 @@ sriov: "instantiated %u virtual functions\n", num_vf[func]); } -#endif + + adapter = kzalloc(sizeof(*adapter), GFP_KERNEL); + if (!adapter) { + err = -ENOMEM; + goto free_pci_region; + } + + adapter->pdev = pdev; + adapter->pdev_dev = &pdev->dev; + adapter->name = pci_name(pdev); + adapter->mbox = func; + adapter->pf = func; + adapter->regs = regs; + adapter->adap_idx = adap_idx; + adapter->mbox_log = kzalloc(sizeof(*adapter->mbox_log) + + (sizeof(struct mbox_cmd) * + T4_OS_LOG_MBOX_CMDS), + GFP_KERNEL); + if (!adapter->mbox_log) { + err = -ENOMEM; + goto free_adapter; + } + pci_set_drvdata(pdev, adapter); return 0; + free_adapter: + kfree(adapter); + free_pci_region: + iounmap(regs); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + return err; +#else + return 0; +#endif + out_free_dev: free_some_resources(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); out_unmap_bar: if (!is_t4(adapter->params.chip)) iounmap(adapter->bar2); @@ -5269,12 +5039,12 @@ static void remove_one(struct pci_dev *pdev) { struct adapter *adapter = pci_get_drvdata(pdev); -#ifdef CONFIG_PCI_IOV - pci_disable_sriov(pdev); + if (!adapter) { + pci_release_regions(pdev); + return; + } -#endif - - if (adapter) { + if (adapter->pf == 4) { int i; /* Tear down per-adapter Work Queue first since it can contain @@ -5282,7 +5052,7 @@ static void remove_one(struct pci_dev *pdev) */ destroy_workqueue(adapter->workq); - if (is_offload(adapter)) + if (is_uld(adapter)) detach_ulds(adapter); disable_interrupts(adapter); @@ -5296,17 +5066,15 @@ static void remove_one(struct pci_dev *pdev) /* If we allocated filters, free up state associated with any * valid filters ... */ - if (adapter->tids.ftid_tab) { - struct filter_entry *f = &adapter->tids.ftid_tab[0]; - for (i = 0; i < (adapter->tids.nftids + - adapter->tids.nsftids); i++, f++) - if (f->valid) - clear_filter(adapter, f); - } + clear_all_filters(adapter); if (adapter->flags & FULL_INIT_DONE) cxgb_down(adapter); + if (adapter->flags & USING_MSIX) + free_msix_info(adapter); + if (adapter->num_uld || adapter->num_ofld_uld) + t4_uld_mem_free(adapter); free_some_resources(adapter); #if IS_ENABLED(CONFIG_IPV6) t4_cleanup_clip_tbl(adapter); @@ -5323,8 +5091,64 @@ static void remove_one(struct pci_dev *pdev) kfree(adapter->mbox_log); synchronize_rcu(); kfree(adapter); - } else + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); pci_release_regions(pdev); + } +#endif +} + +/* "Shutdown" quiesces the device, stopping Ingress Packet and Interrupt + * delivery. This is essentially a stripped down version of the PCI remove() + * function where we do the minimal amount of work necessary to shutdown any + * further activity. + */ +static void shutdown_one(struct pci_dev *pdev) +{ + struct adapter *adapter = pci_get_drvdata(pdev); + + /* As with remove_one() above (see extended comment), we only want do + * do cleanup on PCI Devices which went all the way through init_one() + * ... + */ + if (!adapter) { + pci_release_regions(pdev); + return; + } + + if (adapter->pf == 4) { + int i; + + for_each_port(adapter, i) + if (adapter->port[i]->reg_state == NETREG_REGISTERED) + cxgb_close(adapter->port[i]); + + t4_uld_clean_up(adapter); + disable_interrupts(adapter); + disable_msi(adapter); + + t4_sge_stop(adapter); + if (adapter->flags & FW_OK) + t4_fw_bye(adapter, adapter->mbox); + } +#ifdef CONFIG_PCI_IOV + else { + if (adapter->port[0]) + unregister_netdev(adapter->port[0]); + iounmap(adapter->regs); + kfree(adapter->vfinfo); + kfree(adapter); + pci_disable_sriov(pdev); + pci_release_regions(pdev); + } +#endif } static struct pci_driver cxgb4_driver = { @@ -5332,7 +5156,7 @@ static struct pci_driver cxgb4_driver = { .id_table = cxgb4_pci_tbl, .probe = init_one, .remove = remove_one, - .shutdown = remove_one, + .shutdown = shutdown_one, #ifdef CONFIG_PCI_IOV .sriov_configure = cxgb4_iov_configure, #endif diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c new file mode 100644 index 000000000000..49d2debb334e --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.c @@ -0,0 +1,483 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cxgb4.h" +#include "cxgb4_tc_u32_parse.h" +#include "cxgb4_tc_u32.h" + +/* Fill ch_filter_specification with parsed match value/mask pair. */ +static int fill_match_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls, + const struct cxgb4_match_field *entry, + bool next_header) +{ + unsigned int i, j; + u32 val, mask; + int off, err; + bool found; + + for (i = 0; i < cls->knode.sel->nkeys; i++) { + off = cls->knode.sel->keys[i].off; + val = cls->knode.sel->keys[i].val; + mask = cls->knode.sel->keys[i].mask; + + if (next_header) { + /* For next headers, parse only keys with offmask */ + if (!cls->knode.sel->keys[i].offmask) + continue; + } else { + /* For the remaining, parse only keys without offmask */ + if (cls->knode.sel->keys[i].offmask) + continue; + } + + found = false; + + for (j = 0; entry[j].val; j++) { + if (off == entry[j].off) { + found = true; + err = entry[j].val(fs, val, mask); + if (err) + return err; + break; + } + } + + if (!found) + return -EINVAL; + } + + return 0; +} + +/* Fill ch_filter_specification with parsed action. */ +static int fill_action_fields(struct adapter *adap, + struct ch_filter_specification *fs, + struct tc_cls_u32_offload *cls) +{ + unsigned int num_actions = 0; + const struct tc_action *a; + struct tcf_exts *exts; + LIST_HEAD(actions); + + exts = cls->knode.exts; + if (tc_no_actions(exts)) + return -EINVAL; + + tcf_exts_to_list(exts, &actions); + list_for_each_entry(a, &actions, list) { + /* Don't allow more than one action per rule. */ + if (num_actions) + return -EINVAL; + + /* Drop in hardware. */ + if (is_tcf_gact_shot(a)) { + fs->action = FILTER_DROP; + num_actions++; + continue; + } + + /* Re-direct to specified port in hardware. */ + if (is_tcf_mirred_redirect(a)) { + struct net_device *n_dev; + unsigned int i, index; + bool found = false; + + index = tcf_mirred_ifindex(a); + for_each_port(adap, i) { + n_dev = adap->port[i]; + if (index == n_dev->ifindex) { + fs->action = FILTER_SWITCH; + fs->eport = i; + found = true; + break; + } + } + + /* Interface doesn't belong to any port of + * the underlying hardware. + */ + if (!found) + return -EINVAL; + + num_actions++; + continue; + } + + /* Un-supported action. */ + return -EINVAL; + } + + return 0; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + const struct cxgb4_match_field *start, *link_start = NULL; + struct adapter *adapter = netdev2adap(dev); + struct ch_filter_specification fs; + struct cxgb4_tc_u32_table *t; + struct cxgb4_link *link; + unsigned int filter_id; + u32 uhtid, link_uhtid; + bool is_ipv6 = false; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + if (protocol != htons(ETH_P_IP) && protocol != htons(ETH_P_IPV6)) + return -EOPNOTSUPP; + + /* Fetch the location to insert the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for insertion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + uhtid = TC_U32_USERHTID(cls->knode.handle); + link_uhtid = TC_U32_USERHTID(cls->knode.link_handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Ensure link handle uhtid is sane, if specified. */ + if (link_uhtid >= t->size) + return -EINVAL; + + memset(&fs, 0, sizeof(fs)); + + if (protocol == htons(ETH_P_IPV6)) { + start = cxgb4_ipv6_fields; + is_ipv6 = true; + } else { + start = cxgb4_ipv4_fields; + is_ipv6 = false; + } + + if (uhtid != 0x800) { + /* Link must exist from root node before insertion. */ + if (!t->table[uhtid - 1].link_handle) + return -EINVAL; + + /* Link must have a valid supported next header. */ + link_start = t->table[uhtid - 1].match_field; + if (!link_start) + return -EINVAL; + } + + /* Parse links and record them for subsequent jumps to valid + * next headers. + */ + if (link_uhtid) { + const struct cxgb4_next_header *next; + bool found = false; + unsigned int i, j; + u32 val, mask; + int off; + + if (t->table[link_uhtid - 1].link_handle) { + dev_err(adapter->pdev_dev, + "Link handle exists for: 0x%x\n", + link_uhtid); + return -EINVAL; + } + + next = is_ipv6 ? cxgb4_ipv6_jumps : cxgb4_ipv4_jumps; + + /* Try to find matches that allow jumps to next header. */ + for (i = 0; next[i].jump; i++) { + if (next[i].offoff != cls->knode.sel->offoff || + next[i].shift != cls->knode.sel->offshift || + next[i].mask != cls->knode.sel->offmask || + next[i].offset != cls->knode.sel->off) + continue; + + /* Found a possible candidate. Find a key that + * matches the corresponding offset, value, and + * mask to jump to next header. + */ + for (j = 0; j < cls->knode.sel->nkeys; j++) { + off = cls->knode.sel->keys[j].off; + val = cls->knode.sel->keys[j].val; + mask = cls->knode.sel->keys[j].mask; + + if (next[i].match_off == off && + next[i].match_val == val && + next[i].match_mask == mask) { + found = true; + break; + } + } + + if (!found) + continue; /* Try next candidate. */ + + /* Candidate to jump to next header found. + * Translate all keys to internal specification + * and store them in jump table. This spec is copied + * later to set the actual filters. + */ + ret = fill_match_fields(adapter, &fs, cls, + start, false); + if (ret) + goto out; + + link = &t->table[link_uhtid - 1]; + link->match_field = next[i].jump; + link->link_handle = cls->knode.handle; + memcpy(&link->fs, &fs, sizeof(fs)); + break; + } + + /* No candidate found to jump to next header. */ + if (!found) + return -EINVAL; + + return 0; + } + + /* Fill ch_filter_specification match fields to be shipped to hardware. + * Copy the linked spec (if any) first. And then update the spec as + * needed. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) { + /* Copy linked ch_filter_specification */ + memcpy(&fs, &t->table[uhtid - 1].fs, sizeof(fs)); + ret = fill_match_fields(adapter, &fs, cls, + link_start, true); + if (ret) + goto out; + } + + ret = fill_match_fields(adapter, &fs, cls, start, false); + if (ret) + goto out; + + /* Fill ch_filter_specification action fields to be shipped to + * hardware. + */ + ret = fill_action_fields(adapter, &fs, cls); + if (ret) + goto out; + + /* The filter spec has been completely built from the info + * provided from u32. We now set some default fields in the + * spec for sanity. + */ + + /* Match only packets coming from the ingress port where this + * filter will be created. + */ + fs.val.iport = netdev2pinfo(dev)->port_id; + fs.mask.iport = ~0; + + /* Enable filter hit counts. */ + fs.hitcnts = 1; + + /* Set type of filter - IPv6 or IPv4 */ + fs.type = is_ipv6 ? 1 : 0; + + /* Set the filter */ + ret = cxgb4_set_filter(dev, filter_id, &fs); + if (ret) + goto out; + + /* If this is a linked bucket, then set the corresponding + * entry in the bitmap to mark it as belonging to this linked + * bucket. + */ + if (uhtid != 0x800 && t->table[uhtid - 1].link_handle) + set_bit(filter_id, t->table[uhtid - 1].tid_map); + +out: + return ret; +} + +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls) +{ + struct adapter *adapter = netdev2adap(dev); + unsigned int filter_id, max_tids, i, j; + struct cxgb4_link *link = NULL; + struct cxgb4_tc_u32_table *t; + u32 handle, uhtid; + int ret; + + if (!can_tc_u32_offload(dev)) + return -EOPNOTSUPP; + + /* Fetch the location to delete the filter. */ + filter_id = cls->knode.handle & 0xFFFFF; + + if (filter_id > adapter->tids.nftids) { + dev_err(adapter->pdev_dev, + "Location %d out of range for deletion. Max: %d\n", + filter_id, adapter->tids.nftids); + return -ERANGE; + } + + t = adapter->tc_u32; + handle = cls->knode.handle; + uhtid = TC_U32_USERHTID(cls->knode.handle); + + /* Ensure that uhtid is either root u32 (i.e. 0x800) + * or a a valid linked bucket. + */ + if (uhtid != 0x800 && uhtid >= t->size) + return -EINVAL; + + /* Delete the specified filter */ + if (uhtid != 0x800) { + link = &t->table[uhtid - 1]; + if (!link->link_handle) + return -EINVAL; + + if (!test_bit(filter_id, link->tid_map)) + return -EINVAL; + } + + ret = cxgb4_del_filter(dev, filter_id); + if (ret) + goto out; + + if (link) + clear_bit(filter_id, link->tid_map); + + /* If a link is being deleted, then delete all filters + * associated with the link. + */ + max_tids = adapter->tids.nftids; + for (i = 0; i < t->size; i++) { + link = &t->table[i]; + + if (link->link_handle == handle) { + for (j = 0; j < max_tids; j++) { + if (!test_bit(j, link->tid_map)) + continue; + + ret = __cxgb4_del_filter(dev, j, NULL); + if (ret) + goto out; + + clear_bit(j, link->tid_map); + } + + /* Clear the link state */ + link->match_field = NULL; + link->link_handle = 0; + memset(&link->fs, 0, sizeof(link->fs)); + break; + } + } + +out: + return ret; +} + +void cxgb4_cleanup_tc_u32(struct adapter *adap) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!adap->tc_u32) + return; + + /* Free up all allocated memory. */ + t = adap->tc_u32; + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + t4_free_mem(link->tid_map); + } + t4_free_mem(adap->tc_u32); +} + +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size) +{ + struct cxgb4_tc_u32_table *t; + unsigned int i; + + if (!size) + return NULL; + + t = t4_alloc_mem(sizeof(*t) + + (size * sizeof(struct cxgb4_link))); + if (!t) + return NULL; + + t->size = size; + + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + unsigned int bmap_size; + unsigned int max_tids; + + max_tids = adap->tids.nftids; + bmap_size = BITS_TO_LONGS(max_tids); + link->tid_map = t4_alloc_mem(sizeof(unsigned long) * bmap_size); + if (!link->tid_map) + goto out_no_mem; + bitmap_zero(link->tid_map, max_tids); + } + + return t; + +out_no_mem: + for (i = 0; i < t->size; i++) { + struct cxgb4_link *link = &t->table[i]; + + if (link->tid_map) + t4_free_mem(link->tid_map); + } + + if (t) + t4_free_mem(t); + + return NULL; +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h new file mode 100644 index 000000000000..6bdc885eff22 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32.h @@ -0,0 +1,57 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_H +#define __CXGB4_TC_U32_H + +#include + +#define CXGB4_MAX_LINK_HANDLE 32 + +static inline bool can_tc_u32_offload(struct net_device *dev) +{ + struct adapter *adap = netdev2adap(dev); + + return (dev->features & NETIF_F_HW_TC) && adap->tc_u32 ? true : false; +} + +int cxgb4_config_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); +int cxgb4_delete_knode(struct net_device *dev, __be16 protocol, + struct tc_cls_u32_offload *cls); + +void cxgb4_cleanup_tc_u32(struct adapter *adapter); +struct cxgb4_tc_u32_table *cxgb4_init_tc_u32(struct adapter *adap, + unsigned int size); +#endif /* __CXGB4_TC_U32_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h new file mode 100644 index 000000000000..a4b99edcc339 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_u32_parse.h @@ -0,0 +1,294 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_TC_U32_PARSE_H +#define __CXGB4_TC_U32_PARSE_H + +struct cxgb4_match_field { + int off; /* Offset from the beginning of the header to match */ + /* Fill the value/mask pair in the spec if matched */ + int (*val)(struct ch_filter_specification *f, u32 val, u32 mask); +}; + +/* IPv4 match fields */ +static inline int cxgb4_fill_ipv4_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 16) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_frag(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + u32 mask_val; + u8 frag_val; + + frag_val = (ntohl(val) >> 13) & 0x00000007; + mask_val = ntohl(mask) & 0x0000FFFF; + + if (frag_val == 0x1 && mask_val != 0x3FFF) { /* MF set */ + f->val.frag = 1; + f->mask.frag = 1; + } else if (frag_val == 0x2 && mask_val != 0x3FFF) { /* DF set */ + f->val.frag = 0; + f->mask.frag = 1; + } else { + return -EINVAL; + } + + return 0; +} + +static inline int cxgb4_fill_ipv4_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 16) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 16) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv4_src_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv4_dst_ip(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv4_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv4_tos }, + { .off = 4, .val = cxgb4_fill_ipv4_frag }, + { .off = 8, .val = cxgb4_fill_ipv4_proto }, + { .off = 12, .val = cxgb4_fill_ipv4_src_ip }, + { .off = 16, .val = cxgb4_fill_ipv4_dst_ip }, + { .val = NULL } +}; + +/* IPv6 match fields */ +static inline int cxgb4_fill_ipv6_tos(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.tos = (ntohl(val) >> 20) & 0x000000FF; + f->mask.tos = (ntohl(mask) >> 20) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_proto(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.proto = (ntohl(val) >> 8) & 0x000000FF; + f->mask.proto = (ntohl(mask) >> 8) & 0x000000FF; + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[0], &val, sizeof(u32)); + memcpy(&f->mask.fip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[4], &val, sizeof(u32)); + memcpy(&f->mask.fip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[8], &val, sizeof(u32)); + memcpy(&f->mask.fip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_src_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.fip[12], &val, sizeof(u32)); + memcpy(&f->mask.fip[12], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip0(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[0], &val, sizeof(u32)); + memcpy(&f->mask.lip[0], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip1(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[4], &val, sizeof(u32)); + memcpy(&f->mask.lip[4], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip2(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[8], &val, sizeof(u32)); + memcpy(&f->mask.lip[8], &mask, sizeof(u32)); + + return 0; +} + +static inline int cxgb4_fill_ipv6_dst_ip3(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + memcpy(&f->val.lip[12], &val, sizeof(u32)); + memcpy(&f->mask.lip[12], &mask, sizeof(u32)); + + return 0; +} + +static const struct cxgb4_match_field cxgb4_ipv6_fields[] = { + { .off = 0, .val = cxgb4_fill_ipv6_tos }, + { .off = 4, .val = cxgb4_fill_ipv6_proto }, + { .off = 8, .val = cxgb4_fill_ipv6_src_ip0 }, + { .off = 12, .val = cxgb4_fill_ipv6_src_ip1 }, + { .off = 16, .val = cxgb4_fill_ipv6_src_ip2 }, + { .off = 20, .val = cxgb4_fill_ipv6_src_ip3 }, + { .off = 24, .val = cxgb4_fill_ipv6_dst_ip0 }, + { .off = 28, .val = cxgb4_fill_ipv6_dst_ip1 }, + { .off = 32, .val = cxgb4_fill_ipv6_dst_ip2 }, + { .off = 36, .val = cxgb4_fill_ipv6_dst_ip3 }, + { .val = NULL } +}; + +/* TCP/UDP match */ +static inline int cxgb4_fill_l4_ports(struct ch_filter_specification *f, + u32 val, u32 mask) +{ + f->val.fport = ntohl(val) >> 16; + f->mask.fport = ntohl(mask) >> 16; + f->val.lport = ntohl(val) & 0x0000FFFF; + f->mask.lport = ntohl(mask) & 0x0000FFFF; + + return 0; +}; + +static const struct cxgb4_match_field cxgb4_tcp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +static const struct cxgb4_match_field cxgb4_udp_fields[] = { + { .off = 0, .val = cxgb4_fill_l4_ports }, + { .val = NULL } +}; + +struct cxgb4_next_header { + unsigned int offset; /* Offset to next header */ + /* offset, shift, and mask added to offset above + * to get to next header. Useful when using a header + * field's value to jump to next header such as IHL field + * in IPv4 header. + */ + unsigned int offoff; + u32 shift; + u32 mask; + /* match criteria to make this jump */ + unsigned int match_off; + u32 match_val; + u32 match_mask; + /* location of jump to make */ + const struct cxgb4_match_field *jump; +}; + +/* Accept a rule with a jump to transport layer header based on IHL field in + * IPv4 header. + */ +static const struct cxgb4_next_header cxgb4_ipv4_jumps[] = { + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x600, .match_mask = 0xFF00, + .jump = cxgb4_tcp_fields }, + { .offset = 0, .offoff = 0, .shift = 6, .mask = 0xF, + .match_off = 8, .match_val = 0x1100, .match_mask = 0xFF00, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +/* Accept a rule with a jump directly past the 40 Bytes of IPv6 fixed header + * to get to transport layer header. + */ +static const struct cxgb4_next_header cxgb4_ipv6_jumps[] = { + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x60000, .match_mask = 0xFF0000, + .jump = cxgb4_tcp_fields }, + { .offset = 0x28, .offoff = 0, .shift = 0, .mask = 0, + .match_off = 4, .match_val = 0x110000, .match_mask = 0xFF0000, + .jump = cxgb4_udp_fields }, + { .jump = NULL } +}; + +struct cxgb4_link { + const struct cxgb4_match_field *match_field; /* Next header */ + struct ch_filter_specification fs; /* Match spec associated with link */ + u32 link_handle; /* Knode handle associated with the link */ + unsigned long *tid_map; /* Bitmap for filter tids */ +}; + +struct cxgb4_tc_u32_table { + unsigned int size; /* number of entries in table */ + struct cxgb4_link table[0]; /* Jump table */ +}; +#endif /* __CXGB4_TC_U32_PARSE_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c new file mode 100644 index 000000000000..b4b2d20aab3c --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c @@ -0,0 +1,696 @@ +/* + * cxgb4_uld.c:Chelsio Upper Layer Driver Interface for T4/T5/T6 SGE management + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Written by: Atul Gupta (atul.gupta@chelsio.com) + * Written by: Hariprasad Shenai (hariprasad@chelsio.com) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cxgb4.h" +#include "cxgb4_uld.h" +#include "t4_regs.h" +#include "t4fw_api.h" +#include "t4_msg.h" + +#define for_each_uldrxq(m, i) for (i = 0; i < ((m)->nrxq + (m)->nciq); i++) + +static int get_msix_idx_from_bmap(struct adapter *adap) +{ + struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds; + unsigned long flags; + unsigned int msix_idx; + + spin_lock_irqsave(&bmap->lock, flags); + msix_idx = find_first_zero_bit(bmap->msix_bmap, bmap->mapsize); + if (msix_idx < bmap->mapsize) { + __set_bit(msix_idx, bmap->msix_bmap); + } else { + spin_unlock_irqrestore(&bmap->lock, flags); + return -ENOSPC; + } + + spin_unlock_irqrestore(&bmap->lock, flags); + return msix_idx; +} + +static void free_msix_idx_in_bmap(struct adapter *adap, unsigned int msix_idx) +{ + struct uld_msix_bmap *bmap = &adap->msix_bmap_ulds; + unsigned long flags; + + spin_lock_irqsave(&bmap->lock, flags); + __clear_bit(msix_idx, bmap->msix_bmap); + spin_unlock_irqrestore(&bmap->lock, flags); +} + +/* Flush the aggregated lro sessions */ +static void uldrx_flush_handler(struct sge_rspq *q) +{ + struct adapter *adap = q->adap; + + if (adap->uld[q->uld].lro_flush) + adap->uld[q->uld].lro_flush(&q->lro_mgr); +} + +/** + * uldrx_handler - response queue handler for ULD queues + * @q: the response queue that received the packet + * @rsp: the response queue descriptor holding the offload message + * @gl: the gather list of packet fragments + * + * Deliver an ingress offload packet to a ULD. All processing is done by + * the ULD, we just maintain statistics. + */ +static int uldrx_handler(struct sge_rspq *q, const __be64 *rsp, + const struct pkt_gl *gl) +{ + struct adapter *adap = q->adap; + struct sge_ofld_rxq *rxq = container_of(q, struct sge_ofld_rxq, rspq); + int ret; + + /* FW can send CPLs encapsulated in a CPL_FW4_MSG */ + if (((const struct rss_header *)rsp)->opcode == CPL_FW4_MSG && + ((const struct cpl_fw4_msg *)(rsp + 1))->type == FW_TYPE_RSSCPL) + rsp += 2; + + if (q->flush_handler) + ret = adap->uld[q->uld].lro_rx_handler(adap->uld[q->uld].handle, + rsp, gl, &q->lro_mgr, + &q->napi); + else + ret = adap->uld[q->uld].rx_handler(adap->uld[q->uld].handle, + rsp, gl); + + if (ret) { + rxq->stats.nomem++; + return -1; + } + + if (!gl) + rxq->stats.imm++; + else if (gl == CXGB4_MSG_AN) + rxq->stats.an++; + else + rxq->stats.pkts++; + return 0; +} + +static int alloc_uld_rxqs(struct adapter *adap, + struct sge_uld_rxq_info *rxq_info, + unsigned int nq, unsigned int offset, bool lro) +{ + struct sge *s = &adap->sge; + struct sge_ofld_rxq *q = rxq_info->uldrxq + offset; + unsigned short *ids = rxq_info->rspq_id + offset; + unsigned int per_chan = nq / adap->params.nports; + unsigned int bmap_idx = 0; + int i, err, msi_idx; + + if (adap->flags & USING_MSIX) + msi_idx = 1; + else + msi_idx = -((int)s->intrq.abs_id + 1); + + for (i = 0; i < nq; i++, q++) { + if (msi_idx >= 0) { + bmap_idx = get_msix_idx_from_bmap(adap); + msi_idx = adap->msix_info_ulds[bmap_idx].idx; + } + err = t4_sge_alloc_rxq(adap, &q->rspq, false, + adap->port[i / per_chan], + msi_idx, + q->fl.size ? &q->fl : NULL, + uldrx_handler, + lro ? uldrx_flush_handler : NULL, + 0); + if (err) + goto freeout; + if (msi_idx >= 0) + rxq_info->msix_tbl[i + offset] = bmap_idx; + memset(&q->stats, 0, sizeof(q->stats)); + if (ids) + ids[i] = q->rspq.abs_id; + } + return 0; +freeout: + q = rxq_info->uldrxq + offset; + for ( ; i; i--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + } + + /* We need to free rxq also in case of ciq allocation failure */ + if (offset) { + q = rxq_info->uldrxq + offset; + for ( ; i; i--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + } + } + return err; +} + +static int +setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int i, ret = 0; + + if (adap->flags & USING_MSIX) { + rxq_info->msix_tbl = kcalloc((rxq_info->nrxq + rxq_info->nciq), + sizeof(unsigned short), + GFP_KERNEL); + if (!rxq_info->msix_tbl) + return -ENOMEM; + } + + ret = !(!alloc_uld_rxqs(adap, rxq_info, rxq_info->nrxq, 0, lro) && + !alloc_uld_rxqs(adap, rxq_info, rxq_info->nciq, + rxq_info->nrxq, lro)); + + /* Tell uP to route control queue completions to rdma rspq */ + if (adap->flags & FULL_INIT_DONE && + !ret && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + unsigned int cmplqid; + u32 param, cmdop; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + cmplqid = rxq_info->uldrxq[i].rspq.cntxt_id; + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + ret = t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + return ret; +} + +static void t4_free_uld_rxqs(struct adapter *adap, int n, + struct sge_ofld_rxq *q) +{ + for ( ; n; n--, q++) { + if (q->rspq.desc) + free_rspq_fl(adap, &q->rspq, + q->fl.size ? &q->fl : NULL); + } +} + +static void free_sge_queues_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + if (adap->flags & FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) { + struct sge *s = &adap->sge; + u32 param, cmdop, cmplqid = 0; + int i; + + cmdop = FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL; + for_each_port(adap, i) { + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(cmdop) | + FW_PARAMS_PARAM_YZ_V(s->ctrlq[i].q.cntxt_id)); + t4_set_params(adap, adap->mbox, adap->pf, + 0, 1, ¶m, &cmplqid); + } + } + + if (rxq_info->nciq) + t4_free_uld_rxqs(adap, rxq_info->nciq, + rxq_info->uldrxq + rxq_info->nrxq); + t4_free_uld_rxqs(adap, rxq_info->nrxq, rxq_info->uldrxq); + if (adap->flags & USING_MSIX) + kfree(rxq_info->msix_tbl); +} + +static int cfg_queues_uld(struct adapter *adap, unsigned int uld_type, + const struct cxgb4_uld_info *uld_info) +{ + struct sge *s = &adap->sge; + struct sge_uld_rxq_info *rxq_info; + int i, nrxq, ciq_size; + + rxq_info = kzalloc(sizeof(*rxq_info), GFP_KERNEL); + if (!rxq_info) + return -ENOMEM; + + if (adap->flags & USING_MSIX && uld_info->nrxq > s->nqs_per_uld) { + i = s->nqs_per_uld; + rxq_info->nrxq = roundup(i, adap->params.nports); + } else { + i = min_t(int, uld_info->nrxq, + num_online_cpus()); + rxq_info->nrxq = roundup(i, adap->params.nports); + } + if (!uld_info->ciq) { + rxq_info->nciq = 0; + } else { + if (adap->flags & USING_MSIX) + rxq_info->nciq = min_t(int, s->nqs_per_uld, + num_online_cpus()); + else + rxq_info->nciq = min_t(int, MAX_OFLD_QSETS, + num_online_cpus()); + rxq_info->nciq = ((rxq_info->nciq / adap->params.nports) * + adap->params.nports); + rxq_info->nciq = max_t(int, rxq_info->nciq, + adap->params.nports); + } + + nrxq = rxq_info->nrxq + rxq_info->nciq; /* total rxq's */ + rxq_info->uldrxq = kcalloc(nrxq, sizeof(struct sge_ofld_rxq), + GFP_KERNEL); + if (!rxq_info->uldrxq) { + kfree(rxq_info); + return -ENOMEM; + } + + rxq_info->rspq_id = kcalloc(nrxq, sizeof(unsigned short), GFP_KERNEL); + if (!rxq_info->rspq_id) { + kfree(rxq_info->uldrxq); + kfree(rxq_info); + return -ENOMEM; + } + + for (i = 0; i < rxq_info->nrxq; i++) { + struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; + + init_rspq(adap, &r->rspq, 5, 1, uld_info->rxq_size, 64); + r->rspq.uld = uld_type; + r->fl.size = 72; + } + + ciq_size = 64 + adap->vres.cq.size + adap->tids.nftids; + if (ciq_size > SGE_MAX_IQ_SIZE) { + dev_warn(adap->pdev_dev, "CIQ size too small for available IQs\n"); + ciq_size = SGE_MAX_IQ_SIZE; + } + + for (i = rxq_info->nrxq; i < nrxq; i++) { + struct sge_ofld_rxq *r = &rxq_info->uldrxq[i]; + + init_rspq(adap, &r->rspq, 5, 1, ciq_size, 64); + r->rspq.uld = uld_type; + } + + memcpy(rxq_info->name, uld_info->name, IFNAMSIZ); + adap->sge.uld_rxq_info[uld_type] = rxq_info; + + return 0; +} + +static void free_queues_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + kfree(rxq_info->rspq_id); + kfree(rxq_info->uldrxq); + kfree(rxq_info); +} + +static int +request_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int err = 0; + unsigned int idx, bmap_idx; + + for_each_uldrxq(rxq_info, idx) { + bmap_idx = rxq_info->msix_tbl[idx]; + err = request_irq(adap->msix_info_ulds[bmap_idx].vec, + t4_sge_intr_msix, 0, + adap->msix_info_ulds[bmap_idx].desc, + &rxq_info->uldrxq[idx].rspq); + if (err) + goto unwind; + } + return 0; +unwind: + while (idx-- > 0) { + bmap_idx = rxq_info->msix_tbl[idx]; + free_msix_idx_in_bmap(adap, bmap_idx); + free_irq(adap->msix_info_ulds[bmap_idx].vec, + &rxq_info->uldrxq[idx].rspq); + } + return err; +} + +static void +free_msix_queue_irqs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + unsigned int idx, bmap_idx; + + for_each_uldrxq(rxq_info, idx) { + bmap_idx = rxq_info->msix_tbl[idx]; + + free_msix_idx_in_bmap(adap, bmap_idx); + free_irq(adap->msix_info_ulds[bmap_idx].vec, + &rxq_info->uldrxq[idx].rspq); + } +} + +static void name_msix_vecs_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int n = sizeof(adap->msix_info_ulds[0].desc); + unsigned int idx, bmap_idx; + + for_each_uldrxq(rxq_info, idx) { + bmap_idx = rxq_info->msix_tbl[idx]; + + snprintf(adap->msix_info_ulds[bmap_idx].desc, n, "%s-%s%d", + adap->port[0]->name, rxq_info->name, idx); + } +} + +static void enable_rx(struct adapter *adap, struct sge_rspq *q) +{ + if (!q) + return; + + if (q->handler) { + cxgb_busy_poll_init_lock(q); + napi_enable(&q->napi); + } + /* 0-increment GTS to start the timer and enable interrupts */ + t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A), + SEINTARM_V(q->intr_params) | + INGRESSQID_V(q->cntxt_id)); +} + +static void quiesce_rx(struct adapter *adap, struct sge_rspq *q) +{ + if (q && q->handler) { + napi_disable(&q->napi); + local_bh_disable(); + while (!cxgb_poll_lock_napi(q)) + mdelay(1); + local_bh_enable(); + } +} + +static void enable_rx_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx; + + for_each_uldrxq(rxq_info, idx) + enable_rx(adap, &rxq_info->uldrxq[idx].rspq); +} + +static void quiesce_rx_uld(struct adapter *adap, unsigned int uld_type) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + int idx; + + for_each_uldrxq(rxq_info, idx) + quiesce_rx(adap, &rxq_info->uldrxq[idx].rspq); +} + +static void uld_queue_init(struct adapter *adap, unsigned int uld_type, + struct cxgb4_lld_info *lli) +{ + struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type]; + + lli->rxq_ids = rxq_info->rspq_id; + lli->nrxq = rxq_info->nrxq; + lli->ciq_ids = rxq_info->rspq_id + rxq_info->nrxq; + lli->nciq = rxq_info->nciq; +} + +int t4_uld_mem_alloc(struct adapter *adap) +{ + struct sge *s = &adap->sge; + + adap->uld = kcalloc(CXGB4_ULD_MAX, sizeof(*adap->uld), GFP_KERNEL); + if (!adap->uld) + return -ENOMEM; + + s->uld_rxq_info = kzalloc(CXGB4_ULD_MAX * + sizeof(struct sge_uld_rxq_info *), + GFP_KERNEL); + if (!s->uld_rxq_info) + goto err_uld; + + return 0; +err_uld: + kfree(adap->uld); + return -ENOMEM; +} + +void t4_uld_mem_free(struct adapter *adap) +{ + struct sge *s = &adap->sge; + + kfree(s->uld_rxq_info); + kfree(adap->uld); +} + +void t4_uld_clean_up(struct adapter *adap) +{ + struct sge_uld_rxq_info *rxq_info; + unsigned int i; + + if (!adap->uld) + return; + for (i = 0; i < CXGB4_ULD_MAX; i++) { + if (!adap->uld[i].handle) + continue; + rxq_info = adap->sge.uld_rxq_info[i]; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, i); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, i); + free_sge_queues_uld(adap, i); + free_queues_uld(adap, i); + } +} + +static void uld_init(struct adapter *adap, struct cxgb4_lld_info *lld) +{ + int i; + + lld->pdev = adap->pdev; + lld->pf = adap->pf; + lld->l2t = adap->l2t; + lld->tids = &adap->tids; + lld->ports = adap->port; + lld->vr = &adap->vres; + lld->mtus = adap->params.mtus; + lld->ntxq = adap->sge.ofldqsets; + lld->nchan = adap->params.nports; + lld->nports = adap->params.nports; + lld->wr_cred = adap->params.ofldq_wr_cred; + lld->iscsi_iolen = MAXRXDATA_G(t4_read_reg(adap, TP_PARA_REG2_A)); + lld->iscsi_tagmask = t4_read_reg(adap, ULP_RX_ISCSI_TAGMASK_A); + lld->iscsi_pgsz_order = t4_read_reg(adap, ULP_RX_ISCSI_PSZ_A); + lld->iscsi_llimit = t4_read_reg(adap, ULP_RX_ISCSI_LLIMIT_A); + lld->iscsi_ppm = &adap->iscsi_ppm; + lld->adapter_type = adap->params.chip; + lld->cclk_ps = 1000000000 / adap->params.vpd.cclk; + lld->udb_density = 1 << adap->params.sge.eq_qpp; + lld->ucq_density = 1 << adap->params.sge.iq_qpp; + lld->filt_mode = adap->params.tp.vlan_pri_map; + /* MODQ_REQ_MAP sets queues 0-3 to chan 0-3 */ + for (i = 0; i < NCHAN; i++) + lld->tx_modq[i] = i; + lld->gts_reg = adap->regs + MYPF_REG(SGE_PF_GTS_A); + lld->db_reg = adap->regs + MYPF_REG(SGE_PF_KDOORBELL_A); + lld->fw_vers = adap->params.fw_vers; + lld->dbfifo_int_thresh = dbfifo_int_thresh; + lld->sge_ingpadboundary = adap->sge.fl_align; + lld->sge_egrstatuspagesize = adap->sge.stat_len; + lld->sge_pktshift = adap->sge.pktshift; + lld->enable_fw_ofld_conn = adap->flags & FW_OFLD_CONN; + lld->max_ordird_qp = adap->params.max_ordird_qp; + lld->max_ird_adapter = adap->params.max_ird_adapter; + lld->ulptx_memwrite_dsgl = adap->params.ulptx_memwrite_dsgl; + lld->nodeid = dev_to_node(adap->pdev_dev); +} + +static void uld_attach(struct adapter *adap, unsigned int uld) +{ + void *handle; + struct cxgb4_lld_info lli; + + uld_init(adap, &lli); + uld_queue_init(adap, uld, &lli); + + handle = adap->uld[uld].add(&lli); + if (IS_ERR(handle)) { + dev_warn(adap->pdev_dev, + "could not attach to the %s driver, error %ld\n", + adap->uld[uld].name, PTR_ERR(handle)); + return; + } + + adap->uld[uld].handle = handle; + t4_register_netevent_notifier(); + + if (adap->flags & FULL_INIT_DONE) + adap->uld[uld].state_change(handle, CXGB4_STATE_UP); +} + +/** + * cxgb4_register_uld - register an upper-layer driver + * @type: the ULD type + * @p: the ULD methods + * + * Registers an upper-layer driver with this driver and notifies the ULD + * about any presently available devices that support its type. Returns + * %-EBUSY if a ULD of the same type is already registered. + */ +int cxgb4_register_uld(enum cxgb4_uld type, + const struct cxgb4_uld_info *p) +{ + int ret = 0; + unsigned int adap_idx = 0; + struct adapter *adap; + + if (type >= CXGB4_ULD_MAX) + return -EINVAL; + + mutex_lock(&uld_mutex); + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + ret = cfg_queues_uld(adap, type, p); + if (ret) + goto out; + ret = setup_sge_queues_uld(adap, type, p->lro); + if (ret) + goto free_queues; + if (adap->flags & USING_MSIX) { + name_msix_vecs_uld(adap, type); + ret = request_msix_queue_irqs_uld(adap, type); + if (ret) + goto free_rxq; + } + if (adap->flags & FULL_INIT_DONE) + enable_rx_uld(adap, type); + if (adap->uld[type].add) { + ret = -EBUSY; + goto free_irq; + } + adap->uld[type] = *p; + uld_attach(adap, type); + adap_idx++; + } + mutex_unlock(&uld_mutex); + return 0; + +free_irq: + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); +free_rxq: + free_sge_queues_uld(adap, type); +free_queues: + free_queues_uld(adap, type); +out: + + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + if (!adap_idx) + break; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + adap_idx--; + } + mutex_unlock(&uld_mutex); + return ret; +} +EXPORT_SYMBOL(cxgb4_register_uld); + +/** + * cxgb4_unregister_uld - unregister an upper-layer driver + * @type: the ULD type + * + * Unregisters an existing upper-layer driver. + */ +int cxgb4_unregister_uld(enum cxgb4_uld type) +{ + struct adapter *adap; + + if (type >= CXGB4_ULD_MAX) + return -EINVAL; + + mutex_lock(&uld_mutex); + list_for_each_entry(adap, &adapter_list, list_node) { + if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) || + (type != CXGB4_ULD_CRYPTO && !is_offload(adap))) + continue; + if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip)) + continue; + adap->uld[type].handle = NULL; + adap->uld[type].add = NULL; + if (adap->flags & FULL_INIT_DONE) + quiesce_rx_uld(adap, type); + if (adap->flags & USING_MSIX) + free_msix_queue_irqs_uld(adap, type); + free_sge_queues_uld(adap, type); + free_queues_uld(adap, type); + } + mutex_unlock(&uld_mutex); + + return 0; +} +EXPORT_SYMBOL(cxgb4_unregister_uld); diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h index f3c58aaa932d..47bd14f602db 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -32,8 +32,8 @@ * SOFTWARE. */ -#ifndef __CXGB4_OFLD_H -#define __CXGB4_OFLD_H +#ifndef __CXGB4_ULD_H +#define __CXGB4_ULD_H #include #include @@ -42,6 +42,8 @@ #include #include "cxgb4.h" +#define MAX_ULD_QSETS 16 + /* CPL message priority levels */ enum { CPL_PRIORITY_DATA = 0, /* data messages */ @@ -104,6 +106,7 @@ struct tid_info { unsigned int atid_base; struct filter_entry *ftid_tab; + unsigned long *ftid_bmap; unsigned int nftids; unsigned int ftid_base; unsigned int aftid_base; @@ -124,6 +127,8 @@ struct tid_info { atomic_t tids_in_use; /* TIDs in the HASH */ atomic_t hash_tids_in_use; + /* lock for setting/clearing filter bitmap */ + spinlock_t ftid_lock; }; static inline void *lookup_tid(const struct tid_info *t, unsigned int tid) @@ -183,15 +188,38 @@ int cxgb4_create_server_filter(const struct net_device *dev, unsigned int stid, int cxgb4_remove_server_filter(const struct net_device *dev, unsigned int stid, unsigned int queue, bool ipv6); +/* Filter operation context to allow callers of cxgb4_set_filter() and + * cxgb4_del_filter() to wait for an asynchronous completion. + */ +struct filter_ctx { + struct completion completion; /* completion rendezvous */ + void *closure; /* caller's opaque information */ + int result; /* result of operation */ + u32 tid; /* to store tid */ +}; + +struct ch_filter_specification; + +int __cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs, + struct filter_ctx *ctx); +int __cxgb4_del_filter(struct net_device *dev, int filter_id, + struct filter_ctx *ctx); +int cxgb4_set_filter(struct net_device *dev, int filter_id, + struct ch_filter_specification *fs); +int cxgb4_del_filter(struct net_device *dev, int filter_id); + static inline void set_wr_txq(struct sk_buff *skb, int prio, int queue) { skb_set_queue_mapping(skb, (queue << 1) | prio); } enum cxgb4_uld { + CXGB4_ULD_INIT, CXGB4_ULD_RDMA, CXGB4_ULD_ISCSI, CXGB4_ULD_ISCSIT, + CXGB4_ULD_CRYPTO, CXGB4_ULD_MAX }; @@ -284,6 +312,11 @@ struct cxgb4_lld_info { struct cxgb4_uld_info { const char *name; + void *handle; + unsigned int nrxq; + unsigned int rxq_size; + bool ciq; + bool lro; void *(*add)(const struct cxgb4_lld_info *p); int (*rx_handler)(void *handle, const __be64 *rsp, const struct pkt_gl *gl); @@ -330,4 +363,4 @@ int cxgb4_bar2_sge_qregs(struct net_device *dev, u64 *pbar2_qoffset, unsigned int *pbar2_qid); -#endif /* !__CXGB4_OFLD_H */ +#endif /* !__CXGB4_ULD_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c new file mode 100644 index 000000000000..539de764bbd3 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c @@ -0,0 +1,556 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include + +#include "cxgb4.h" +#include "sched.h" + +/* Spinlock must be held by caller */ +static int t4_sched_class_fw_cmd(struct port_info *pi, + struct ch_sched_params *p, + enum sched_fw_ops op) +{ + struct adapter *adap = pi->adapter; + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + int err = 0; + + e = &s->tab[p->u.params.class]; + switch (op) { + case SCHED_FW_OP_ADD: + err = t4_sched_params(adap, p->type, + p->u.params.level, p->u.params.mode, + p->u.params.rateunit, + p->u.params.ratemode, + p->u.params.channel, e->idx, + p->u.params.minrate, p->u.params.maxrate, + p->u.params.weight, p->u.params.pktsize); + break; + default: + err = -ENOTSUPP; + break; + } + + return err; +} + +/* Spinlock must be held by caller */ +static int t4_sched_bind_unbind_op(struct port_info *pi, void *arg, + enum sched_bind_type type, bool bind) +{ + struct adapter *adap = pi->adapter; + u32 fw_mnem, fw_class, fw_param; + unsigned int pf = adap->pf; + unsigned int vf = 0; + int err = 0; + + switch (type) { + case SCHED_QUEUE: { + struct sched_queue_entry *qe; + + qe = (struct sched_queue_entry *)arg; + + /* Create a template for the FW_PARAMS_CMD mnemonic and + * value (TX Scheduling Class in this case). + */ + fw_mnem = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V( + FW_PARAMS_PARAM_DMAQ_EQ_SCHEDCLASS_ETH)); + fw_class = bind ? qe->param.class : FW_SCHED_CLS_NONE; + fw_param = (fw_mnem | FW_PARAMS_PARAM_YZ_V(qe->cntxt_id)); + + pf = adap->pf; + vf = 0; + break; + } + default: + err = -ENOTSUPP; + goto out; + } + + err = t4_set_params(adap, adap->mbox, pf, vf, 1, &fw_param, &fw_class); + +out: + return err; +} + +static struct sched_class *t4_sched_queue_lookup(struct port_info *pi, + const unsigned int qid, + int *index) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e, *end; + struct sched_class *found = NULL; + int i; + + /* Look for a class with matching bound queue parameters */ + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + struct sched_queue_entry *qe; + + i = 0; + if (e->state == SCHED_STATE_UNUSED) + continue; + + list_for_each_entry(qe, &e->queue_list, list) { + if (qe->cntxt_id == qid) { + found = e; + if (index) + *index = i; + break; + } + i++; + } + + if (found) + break; + } + + return found; +} + +static int t4_sched_queue_unbind(struct port_info *pi, struct ch_sched_queue *p) +{ + struct adapter *adap = pi->adapter; + struct sched_class *e; + struct sched_queue_entry *qe = NULL; + struct sge_eth_txq *txq; + unsigned int qid; + int index = -1; + int err = 0; + + if (p->queue < 0 || p->queue >= pi->nqsets) + return -ERANGE; + + txq = &adap->sge.ethtxq[pi->first_qset + p->queue]; + qid = txq->q.cntxt_id; + + /* Find the existing class that the queue is bound to */ + e = t4_sched_queue_lookup(pi, qid, &index); + if (e && index >= 0) { + int i = 0; + + spin_lock(&e->lock); + list_for_each_entry(qe, &e->queue_list, list) { + if (i == index) + break; + i++; + } + err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, + false); + if (err) { + spin_unlock(&e->lock); + goto out; + } + + list_del(&qe->list); + t4_free_mem(qe); + if (atomic_dec_and_test(&e->refcnt)) { + e->state = SCHED_STATE_UNUSED; + memset(&e->info, 0, sizeof(e->info)); + } + spin_unlock(&e->lock); + } +out: + return err; +} + +static int t4_sched_queue_bind(struct port_info *pi, struct ch_sched_queue *p) +{ + struct adapter *adap = pi->adapter; + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + struct sched_queue_entry *qe = NULL; + struct sge_eth_txq *txq; + unsigned int qid; + int err = 0; + + if (p->queue < 0 || p->queue >= pi->nqsets) + return -ERANGE; + + qe = t4_alloc_mem(sizeof(struct sched_queue_entry)); + if (!qe) + return -ENOMEM; + + txq = &adap->sge.ethtxq[pi->first_qset + p->queue]; + qid = txq->q.cntxt_id; + + /* Unbind queue from any existing class */ + err = t4_sched_queue_unbind(pi, p); + if (err) + goto out; + + /* Bind queue to specified class */ + memset(qe, 0, sizeof(*qe)); + qe->cntxt_id = qid; + memcpy(&qe->param, p, sizeof(qe->param)); + + e = &s->tab[qe->param.class]; + spin_lock(&e->lock); + err = t4_sched_bind_unbind_op(pi, (void *)qe, SCHED_QUEUE, true); + if (err) { + t4_free_mem(qe); + spin_unlock(&e->lock); + goto out; + } + + list_add_tail(&qe->list, &e->queue_list); + atomic_inc(&e->refcnt); + spin_unlock(&e->lock); +out: + return err; +} + +static void t4_sched_class_unbind_all(struct port_info *pi, + struct sched_class *e, + enum sched_bind_type type) +{ + if (!e) + return; + + switch (type) { + case SCHED_QUEUE: { + struct sched_queue_entry *qe; + + list_for_each_entry(qe, &e->queue_list, list) + t4_sched_queue_unbind(pi, &qe->param); + break; + } + default: + break; + } +} + +static int t4_sched_class_bind_unbind_op(struct port_info *pi, void *arg, + enum sched_bind_type type, bool bind) +{ + int err = 0; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + if (bind) + err = t4_sched_queue_bind(pi, qe); + else + err = t4_sched_queue_unbind(pi, qe); + break; + } + default: + err = -ENOTSUPP; + break; + } + + return err; +} + +/** + * cxgb4_sched_class_bind - Bind an entity to a scheduling class + * @dev: net_device pointer + * @arg: Entity opaque data + * @type: Entity type (Queue) + * + * Binds an entity (queue) to a scheduling class. If the entity + * is bound to another class, it will be unbound from the other class + * and bound to the class specified in @arg. + */ +int cxgb4_sched_class_bind(struct net_device *dev, void *arg, + enum sched_bind_type type) +{ + struct port_info *pi = netdev2pinfo(dev); + struct sched_table *s; + int err = 0; + u8 class_id; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + class_id = qe->class; + break; + } + default: + return -ENOTSUPP; + } + + if (!valid_class_id(dev, class_id)) + return -EINVAL; + + if (class_id == SCHED_CLS_NONE) + return -ENOTSUPP; + + s = pi->sched_tbl; + write_lock(&s->rw_lock); + err = t4_sched_class_bind_unbind_op(pi, arg, type, true); + write_unlock(&s->rw_lock); + + return err; +} + +/** + * cxgb4_sched_class_unbind - Unbind an entity from a scheduling class + * @dev: net_device pointer + * @arg: Entity opaque data + * @type: Entity type (Queue) + * + * Unbinds an entity (queue) from a scheduling class. + */ +int cxgb4_sched_class_unbind(struct net_device *dev, void *arg, + enum sched_bind_type type) +{ + struct port_info *pi = netdev2pinfo(dev); + struct sched_table *s; + int err = 0; + u8 class_id; + + if (!can_sched(dev)) + return -ENOTSUPP; + + if (!arg) + return -EINVAL; + + switch (type) { + case SCHED_QUEUE: { + struct ch_sched_queue *qe = (struct ch_sched_queue *)arg; + + class_id = qe->class; + break; + } + default: + return -ENOTSUPP; + } + + if (!valid_class_id(dev, class_id)) + return -EINVAL; + + s = pi->sched_tbl; + write_lock(&s->rw_lock); + err = t4_sched_class_bind_unbind_op(pi, arg, type, false); + write_unlock(&s->rw_lock); + + return err; +} + +/* If @p is NULL, fetch any available unused class */ +static struct sched_class *t4_sched_class_lookup(struct port_info *pi, + const struct ch_sched_params *p) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e, *end; + struct sched_class *found = NULL; + + if (!p) { + /* Get any available unused class */ + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + if (e->state == SCHED_STATE_UNUSED) { + found = e; + break; + } + } + } else { + /* Look for a class with matching scheduling parameters */ + struct ch_sched_params info; + struct ch_sched_params tp; + + memset(&info, 0, sizeof(info)); + memset(&tp, 0, sizeof(tp)); + + memcpy(&tp, p, sizeof(tp)); + /* Don't try to match class parameter */ + tp.u.params.class = SCHED_CLS_NONE; + + end = &s->tab[s->sched_size]; + for (e = &s->tab[0]; e != end; ++e) { + if (e->state == SCHED_STATE_UNUSED) + continue; + + memset(&info, 0, sizeof(info)); + memcpy(&info, &e->info, sizeof(info)); + /* Don't try to match class parameter */ + info.u.params.class = SCHED_CLS_NONE; + + if ((info.type == tp.type) && + (!memcmp(&info.u.params, &tp.u.params, + sizeof(info.u.params)))) { + found = e; + break; + } + } + } + + return found; +} + +static struct sched_class *t4_sched_class_alloc(struct port_info *pi, + struct ch_sched_params *p) +{ + struct sched_table *s = pi->sched_tbl; + struct sched_class *e; + u8 class_id; + int err; + + if (!p) + return NULL; + + class_id = p->u.params.class; + + /* Only accept search for existing class with matching params + * or allocation of new class with specified params + */ + if (class_id != SCHED_CLS_NONE) + return NULL; + + write_lock(&s->rw_lock); + /* See if there's an exisiting class with same + * requested sched params + */ + e = t4_sched_class_lookup(pi, p); + if (!e) { + struct ch_sched_params np; + + /* Fetch any available unused class */ + e = t4_sched_class_lookup(pi, NULL); + if (!e) + goto out; + + memset(&np, 0, sizeof(np)); + memcpy(&np, p, sizeof(np)); + np.u.params.class = e->idx; + + spin_lock(&e->lock); + /* New class */ + err = t4_sched_class_fw_cmd(pi, &np, SCHED_FW_OP_ADD); + if (err) { + spin_unlock(&e->lock); + e = NULL; + goto out; + } + memcpy(&e->info, &np, sizeof(e->info)); + atomic_set(&e->refcnt, 0); + e->state = SCHED_STATE_ACTIVE; + spin_unlock(&e->lock); + } + +out: + write_unlock(&s->rw_lock); + return e; +} + +/** + * cxgb4_sched_class_alloc - allocate a scheduling class + * @dev: net_device pointer + * @p: new scheduling class to create. + * + * Returns pointer to the scheduling class created. If @p is NULL, then + * it allocates and returns any available unused scheduling class. If a + * scheduling class with matching @p is found, then the matching class is + * returned. + */ +struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, + struct ch_sched_params *p) +{ + struct port_info *pi = netdev2pinfo(dev); + u8 class_id; + + if (!can_sched(dev)) + return NULL; + + class_id = p->u.params.class; + if (!valid_class_id(dev, class_id)) + return NULL; + + return t4_sched_class_alloc(pi, p); +} + +static void t4_sched_class_free(struct port_info *pi, struct sched_class *e) +{ + t4_sched_class_unbind_all(pi, e, SCHED_QUEUE); +} + +struct sched_table *t4_init_sched(unsigned int sched_size) +{ + struct sched_table *s; + unsigned int i; + + s = t4_alloc_mem(sizeof(*s) + sched_size * sizeof(struct sched_class)); + if (!s) + return NULL; + + s->sched_size = sched_size; + rwlock_init(&s->rw_lock); + + for (i = 0; i < s->sched_size; i++) { + memset(&s->tab[i], 0, sizeof(struct sched_class)); + s->tab[i].idx = i; + s->tab[i].state = SCHED_STATE_UNUSED; + INIT_LIST_HEAD(&s->tab[i].queue_list); + spin_lock_init(&s->tab[i].lock); + atomic_set(&s->tab[i].refcnt, 0); + } + return s; +} + +void t4_cleanup_sched(struct adapter *adap) +{ + struct sched_table *s; + unsigned int i; + + for_each_port(adap, i) { + struct port_info *pi = netdev2pinfo(adap->port[i]); + + s = pi->sched_tbl; + for (i = 0; i < s->sched_size; i++) { + struct sched_class *e; + + write_lock(&s->rw_lock); + e = &s->tab[i]; + if (e->state == SCHED_STATE_ACTIVE) + t4_sched_class_free(pi, e); + write_unlock(&s->rw_lock); + } + t4_free_mem(s); + } +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.h b/drivers/net/ethernet/chelsio/cxgb4/sched.h new file mode 100644 index 000000000000..77b2b3fd9021 --- /dev/null +++ b/drivers/net/ethernet/chelsio/cxgb4/sched.h @@ -0,0 +1,110 @@ +/* + * This file is part of the Chelsio T4 Ethernet driver for Linux. + * + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __CXGB4_SCHED_H +#define __CXGB4_SCHED_H + +#include +#include + +#define SCHED_CLS_NONE 0xff + +#define FW_SCHED_CLS_NONE 0xffffffff + +/* Max rate that can be set to a scheduling class is 10 Gbps */ +#define SCHED_MAX_RATE_KBPS 10000000U + +enum { + SCHED_STATE_ACTIVE, + SCHED_STATE_UNUSED, +}; + +enum sched_fw_ops { + SCHED_FW_OP_ADD, +}; + +enum sched_bind_type { + SCHED_QUEUE, +}; + +struct sched_queue_entry { + struct list_head list; + unsigned int cntxt_id; + struct ch_sched_queue param; +}; + +struct sched_class { + u8 state; + u8 idx; + struct ch_sched_params info; + struct list_head queue_list; + spinlock_t lock; /* Per class lock */ + atomic_t refcnt; +}; + +struct sched_table { /* per port scheduling table */ + u8 sched_size; + rwlock_t rw_lock; /* Table lock */ + struct sched_class tab[0]; +}; + +static inline bool can_sched(struct net_device *dev) +{ + struct port_info *pi = netdev2pinfo(dev); + + return !pi->sched_tbl ? false : true; +} + +static inline bool valid_class_id(struct net_device *dev, u8 class_id) +{ + struct port_info *pi = netdev2pinfo(dev); + + if ((class_id > pi->sched_tbl->sched_size - 1) && + (class_id != SCHED_CLS_NONE)) + return false; + + return true; +} + +int cxgb4_sched_class_bind(struct net_device *dev, void *arg, + enum sched_bind_type type); +int cxgb4_sched_class_unbind(struct net_device *dev, void *arg, + enum sched_bind_type type); + +struct sched_class *cxgb4_sched_class_alloc(struct net_device *dev, + struct ch_sched_params *p); + +struct sched_table *t4_init_sched(unsigned int size); +void t4_cleanup_sched(struct adapter *adap); +#endif /* __CXGB4_SCHED_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c index ad3552df0545..1e74fd6085df 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c @@ -2860,6 +2860,18 @@ int t4_sge_alloc_ctrl_txq(struct adapter *adap, struct sge_ctrl_txq *txq, return 0; } +int t4_sge_mod_ctrl_txq(struct adapter *adap, unsigned int eqid, + unsigned int cmplqid) +{ + u32 param, val; + + param = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DMAQ) | + FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DMAQ_EQ_CMPLIQID_CTRL) | + FW_PARAMS_PARAM_YZ_V(eqid)); + val = cmplqid; + return t4_set_params(adap, adap->mbox, adap->pf, 0, 1, ¶m, &val); +} + int t4_sge_alloc_ofld_txq(struct adapter *adap, struct sge_ofld_txq *txq, struct net_device *dev, unsigned int iqid) { @@ -2928,8 +2940,8 @@ static void free_txq(struct adapter *adap, struct sge_txq *q) q->desc = NULL; } -static void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, - struct sge_fl *fl) +void free_rspq_fl(struct adapter *adap, struct sge_rspq *rq, + struct sge_fl *fl) { struct sge *s = &adap->sge; unsigned int fl_id = fl ? fl->cntxt_id : 0xffff; @@ -3014,12 +3026,6 @@ void t4_free_sge_resources(struct adapter *adap) } } - /* clean up RDMA and iSCSI Rx queues */ - t4_free_ofld_rxqs(adap, adap->sge.iscsiqsets, adap->sge.iscsirxq); - t4_free_ofld_rxqs(adap, adap->sge.niscsitq, adap->sge.iscsitrxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaqs, adap->sge.rdmarxq); - t4_free_ofld_rxqs(adap, adap->sge.rdmaciqs, adap->sge.rdmaciq); - /* clean up offload Tx queues */ for (i = 0; i < ARRAY_SIZE(adap->sge.ofldtxq); i++) { struct sge_ofld_txq *q = &adap->sge.ofldtxq[i]; diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 660204bff726..20dec85da63d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2003-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2003-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -2729,7 +2729,7 @@ int t4_get_raw_vpd_params(struct adapter *adapter, struct vpd_params *p) out: vfree(vpd); - return ret; + return ret < 0 ? ret : 0; } /** @@ -8269,3 +8269,73 @@ void t4_idma_monitor(struct adapter *adapter, t4_sge_decode_idma_state(adapter, idma->idma_state[i]); } } + +/** + * t4_set_vf_mac - Set MAC address for the specified VF + * @adapter: The adapter + * @vf: one of the VFs instantiated by the specified PF + * @naddr: the number of MAC addresses + * @addr: the MAC address(es) to be set to the specified VF + */ +int t4_set_vf_mac_acl(struct adapter *adapter, unsigned int vf, + unsigned int naddr, u8 *addr) +{ + struct fw_acl_mac_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F | + FW_ACL_MAC_CMD_PFN_V(adapter->pf) | + FW_ACL_MAC_CMD_VFN_V(vf)); + + /* Note: Do not enable the ACL */ + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + cmd.nmac = naddr; + + switch (adapter->pf) { + case 3: + memcpy(cmd.macaddr3, addr, sizeof(cmd.macaddr3)); + break; + case 2: + memcpy(cmd.macaddr2, addr, sizeof(cmd.macaddr2)); + break; + case 1: + memcpy(cmd.macaddr1, addr, sizeof(cmd.macaddr1)); + break; + case 0: + memcpy(cmd.macaddr0, addr, sizeof(cmd.macaddr0)); + break; + } + + return t4_wr_mbox(adapter, adapter->mbox, &cmd, sizeof(cmd), &cmd); +} + +int t4_sched_params(struct adapter *adapter, int type, int level, int mode, + int rateunit, int ratemode, int channel, int class, + int minrate, int maxrate, int weight, int pktsize) +{ + struct fw_sched_cmd cmd; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_write = cpu_to_be32(FW_CMD_OP_V(FW_SCHED_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_WRITE_F); + cmd.retval_len16 = cpu_to_be32(FW_LEN16(cmd)); + + cmd.u.params.sc = FW_SCHED_SC_PARAMS; + cmd.u.params.type = type; + cmd.u.params.level = level; + cmd.u.params.mode = mode; + cmd.u.params.ch = channel; + cmd.u.params.cl = class; + cmd.u.params.unit = rateunit; + cmd.u.params.rate = ratemode; + cmd.u.params.min = cpu_to_be32(minrate); + cmd.u.params.max = cpu_to_be32(maxrate); + cmd.u.params.weight = cpu_to_be16(weight); + cmd.u.params.pktsize = cpu_to_be16(pktsize); + + return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd), + NULL, 1); +} diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h index e0ebe1378cb2..fba3b2ad382d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_msg.h @@ -61,6 +61,7 @@ enum { CPL_ABORT_REQ_RSS = 0x2B, CPL_ABORT_RPL_RSS = 0x2D, + CPL_RX_PHYS_ADDR = 0x30, CPL_CLOSE_CON_RPL = 0x32, CPL_ISCSI_HDR = 0x33, CPL_RDMA_CQE = 0x35, @@ -83,6 +84,10 @@ enum { CPL_PASS_OPEN_REQ6 = 0x81, CPL_ACT_OPEN_REQ6 = 0x83, + CPL_TX_TLS_PDU = 0x88, + CPL_TX_SEC_PDU = 0x8A, + CPL_TX_TLS_ACK = 0x8B, + CPL_RDMA_TERMINATE = 0xA2, CPL_RDMA_WRITE = 0xA4, CPL_SGE_EGR_UPDATE = 0xA5, @@ -94,6 +99,8 @@ enum { CPL_FW4_PLD = 0xC1, CPL_FW4_ACK = 0xC3, + CPL_RX_PHYS_DSGL = 0xD0, + CPL_FW6_MSG = 0xE0, CPL_FW6_PLD = 0xE1, CPL_TX_PKT_LSO = 0xED, @@ -1362,6 +1369,15 @@ struct ulptx_idata { __be32 len; }; +struct ulp_txpkt { + __be32 cmd_dest; + __be32 len; +}; + +#define ULPTX_CMD_S 24 +#define ULPTX_CMD_M 0xFF +#define ULPTX_CMD_V(x) ((x) << ULPTX_CMD_S) + #define ULPTX_NSGE_S 0 #define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S) @@ -1369,6 +1385,22 @@ struct ulptx_idata { #define ULPTX_MORE_V(x) ((x) << ULPTX_MORE_S) #define ULPTX_MORE_F ULPTX_MORE_V(1U) +#define ULP_TXPKT_DEST_S 16 +#define ULP_TXPKT_DEST_M 0x3 +#define ULP_TXPKT_DEST_V(x) ((x) << ULP_TXPKT_DEST_S) + +#define ULP_TXPKT_FID_S 4 +#define ULP_TXPKT_FID_M 0x7ff +#define ULP_TXPKT_FID_V(x) ((x) << ULP_TXPKT_FID_S) + +#define ULP_TXPKT_RO_S 3 +#define ULP_TXPKT_RO_V(x) ((x) << ULP_TXPKT_RO_S) +#define ULP_TXPKT_RO_F ULP_TXPKT_RO_V(1U) + +#define ULP_TX_SC_MORE_S 23 +#define ULP_TX_SC_MORE_V(x) ((x) << ULP_TX_SC_MORE_S) +#define ULP_TX_SC_MORE_F ULP_TX_SC_MORE_V(1U) + struct ulp_mem_io { WR_HDR; __be32 cmd; @@ -1406,4 +1438,409 @@ struct ulp_mem_io { #define ULP_MEMIO_DATA_LEN_S 0 #define ULP_MEMIO_DATA_LEN_V(x) ((x) << ULP_MEMIO_DATA_LEN_S) +#define ULPTX_NSGE_S 0 +#define ULPTX_NSGE_M 0xFFFF +#define ULPTX_NSGE_V(x) ((x) << ULPTX_NSGE_S) +#define ULPTX_NSGE_G(x) (((x) >> ULPTX_NSGE_S) & ULPTX_NSGE_M) + +struct ulptx_sc_memrd { + __be32 cmd_to_len; + __be32 addr; +}; + +#define ULP_TXPKT_DATAMODIFY_S 23 +#define ULP_TXPKT_DATAMODIFY_M 0x1 +#define ULP_TXPKT_DATAMODIFY_V(x) ((x) << ULP_TXPKT_DATAMODIFY_S) +#define ULP_TXPKT_DATAMODIFY_G(x) \ + (((x) >> ULP_TXPKT_DATAMODIFY_S) & ULP_TXPKT_DATAMODIFY__M) +#define ULP_TXPKT_DATAMODIFY_F ULP_TXPKT_DATAMODIFY_V(1U) + +#define ULP_TXPKT_CHANNELID_S 22 +#define ULP_TXPKT_CHANNELID_M 0x1 +#define ULP_TXPKT_CHANNELID_V(x) ((x) << ULP_TXPKT_CHANNELID_S) +#define ULP_TXPKT_CHANNELID_G(x) \ + (((x) >> ULP_TXPKT_CHANNELID_S) & ULP_TXPKT_CHANNELID_M) +#define ULP_TXPKT_CHANNELID_F ULP_TXPKT_CHANNELID_V(1U) + +#define SCMD_SEQ_NO_CTRL_S 29 +#define SCMD_SEQ_NO_CTRL_M 0x3 +#define SCMD_SEQ_NO_CTRL_V(x) ((x) << SCMD_SEQ_NO_CTRL_S) +#define SCMD_SEQ_NO_CTRL_G(x) \ + (((x) >> SCMD_SEQ_NO_CTRL_S) & SCMD_SEQ_NO_CTRL_M) + +/* StsFieldPrsnt- Status field at the end of the TLS PDU */ +#define SCMD_STATUS_PRESENT_S 28 +#define SCMD_STATUS_PRESENT_M 0x1 +#define SCMD_STATUS_PRESENT_V(x) ((x) << SCMD_STATUS_PRESENT_S) +#define SCMD_STATUS_PRESENT_G(x) \ + (((x) >> SCMD_STATUS_PRESENT_S) & SCMD_STATUS_PRESENT_M) +#define SCMD_STATUS_PRESENT_F SCMD_STATUS_PRESENT_V(1U) + +/* ProtoVersion - Protocol Version 0: 1.2, 1:1.1, 2:DTLS, 3:Generic, + * 3-15: Reserved. + */ +#define SCMD_PROTO_VERSION_S 24 +#define SCMD_PROTO_VERSION_M 0xf +#define SCMD_PROTO_VERSION_V(x) ((x) << SCMD_PROTO_VERSION_S) +#define SCMD_PROTO_VERSION_G(x) \ + (((x) >> SCMD_PROTO_VERSION_S) & SCMD_PROTO_VERSION_M) + +/* EncDecCtrl - Encryption/Decryption Control. 0: Encrypt, 1: Decrypt */ +#define SCMD_ENC_DEC_CTRL_S 23 +#define SCMD_ENC_DEC_CTRL_M 0x1 +#define SCMD_ENC_DEC_CTRL_V(x) ((x) << SCMD_ENC_DEC_CTRL_S) +#define SCMD_ENC_DEC_CTRL_G(x) \ + (((x) >> SCMD_ENC_DEC_CTRL_S) & SCMD_ENC_DEC_CTRL_M) +#define SCMD_ENC_DEC_CTRL_F SCMD_ENC_DEC_CTRL_V(1U) + +/* CipherAuthSeqCtrl - Cipher Authentication Sequence Control. */ +#define SCMD_CIPH_AUTH_SEQ_CTRL_S 22 +#define SCMD_CIPH_AUTH_SEQ_CTRL_M 0x1 +#define SCMD_CIPH_AUTH_SEQ_CTRL_V(x) \ + ((x) << SCMD_CIPH_AUTH_SEQ_CTRL_S) +#define SCMD_CIPH_AUTH_SEQ_CTRL_G(x) \ + (((x) >> SCMD_CIPH_AUTH_SEQ_CTRL_S) & SCMD_CIPH_AUTH_SEQ_CTRL_M) +#define SCMD_CIPH_AUTH_SEQ_CTRL_F SCMD_CIPH_AUTH_SEQ_CTRL_V(1U) + +/* CiphMode - Cipher Mode. 0: NOP, 1:AES-CBC, 2:AES-GCM, 3:AES-CTR, + * 4:Generic-AES, 5-15: Reserved. + */ +#define SCMD_CIPH_MODE_S 18 +#define SCMD_CIPH_MODE_M 0xf +#define SCMD_CIPH_MODE_V(x) ((x) << SCMD_CIPH_MODE_S) +#define SCMD_CIPH_MODE_G(x) \ + (((x) >> SCMD_CIPH_MODE_S) & SCMD_CIPH_MODE_M) + +/* AuthMode - Auth Mode. 0: NOP, 1:SHA1, 2:SHA2-224, 3:SHA2-256 + * 4-15: Reserved + */ +#define SCMD_AUTH_MODE_S 14 +#define SCMD_AUTH_MODE_M 0xf +#define SCMD_AUTH_MODE_V(x) ((x) << SCMD_AUTH_MODE_S) +#define SCMD_AUTH_MODE_G(x) \ + (((x) >> SCMD_AUTH_MODE_S) & SCMD_AUTH_MODE_M) + +/* HmacCtrl - HMAC Control. 0:NOP, 1:No truncation, 2:Support HMAC Truncation + * per RFC 4366, 3:IPSec 96 bits, 4-7:Reserved + */ +#define SCMD_HMAC_CTRL_S 11 +#define SCMD_HMAC_CTRL_M 0x7 +#define SCMD_HMAC_CTRL_V(x) ((x) << SCMD_HMAC_CTRL_S) +#define SCMD_HMAC_CTRL_G(x) \ + (((x) >> SCMD_HMAC_CTRL_S) & SCMD_HMAC_CTRL_M) + +/* IvSize - IV size in units of 2 bytes */ +#define SCMD_IV_SIZE_S 7 +#define SCMD_IV_SIZE_M 0xf +#define SCMD_IV_SIZE_V(x) ((x) << SCMD_IV_SIZE_S) +#define SCMD_IV_SIZE_G(x) \ + (((x) >> SCMD_IV_SIZE_S) & SCMD_IV_SIZE_M) + +/* NumIVs - Number of IVs */ +#define SCMD_NUM_IVS_S 0 +#define SCMD_NUM_IVS_M 0x7f +#define SCMD_NUM_IVS_V(x) ((x) << SCMD_NUM_IVS_S) +#define SCMD_NUM_IVS_G(x) \ + (((x) >> SCMD_NUM_IVS_S) & SCMD_NUM_IVS_M) + +/* EnbDbgId - If this is enabled upper 20 (63:44) bits if SeqNumber + * (below) are used as Cid (connection id for debug status), these + * bits are padded to zero for forming the 64 bit + * sequence number for TLS + */ +#define SCMD_ENB_DBGID_S 31 +#define SCMD_ENB_DBGID_M 0x1 +#define SCMD_ENB_DBGID_V(x) ((x) << SCMD_ENB_DBGID_S) +#define SCMD_ENB_DBGID_G(x) \ + (((x) >> SCMD_ENB_DBGID_S) & SCMD_ENB_DBGID_M) + +/* IV generation in SW. */ +#define SCMD_IV_GEN_CTRL_S 30 +#define SCMD_IV_GEN_CTRL_M 0x1 +#define SCMD_IV_GEN_CTRL_V(x) ((x) << SCMD_IV_GEN_CTRL_S) +#define SCMD_IV_GEN_CTRL_G(x) \ + (((x) >> SCMD_IV_GEN_CTRL_S) & SCMD_IV_GEN_CTRL_M) +#define SCMD_IV_GEN_CTRL_F SCMD_IV_GEN_CTRL_V(1U) + +/* More frags */ +#define SCMD_MORE_FRAGS_S 20 +#define SCMD_MORE_FRAGS_M 0x1 +#define SCMD_MORE_FRAGS_V(x) ((x) << SCMD_MORE_FRAGS_S) +#define SCMD_MORE_FRAGS_G(x) (((x) >> SCMD_MORE_FRAGS_S) & SCMD_MORE_FRAGS_M) + +/*last frag */ +#define SCMD_LAST_FRAG_S 19 +#define SCMD_LAST_FRAG_M 0x1 +#define SCMD_LAST_FRAG_V(x) ((x) << SCMD_LAST_FRAG_S) +#define SCMD_LAST_FRAG_G(x) (((x) >> SCMD_LAST_FRAG_S) & SCMD_LAST_FRAG_M) + +/* TlsCompPdu */ +#define SCMD_TLS_COMPPDU_S 18 +#define SCMD_TLS_COMPPDU_M 0x1 +#define SCMD_TLS_COMPPDU_V(x) ((x) << SCMD_TLS_COMPPDU_S) +#define SCMD_TLS_COMPPDU_G(x) (((x) >> SCMD_TLS_COMPPDU_S) & SCMD_TLS_COMPPDU_M) + +/* KeyCntxtInline - Key context inline after the scmd OR PayloadOnly*/ +#define SCMD_KEY_CTX_INLINE_S 17 +#define SCMD_KEY_CTX_INLINE_M 0x1 +#define SCMD_KEY_CTX_INLINE_V(x) ((x) << SCMD_KEY_CTX_INLINE_S) +#define SCMD_KEY_CTX_INLINE_G(x) \ + (((x) >> SCMD_KEY_CTX_INLINE_S) & SCMD_KEY_CTX_INLINE_M) +#define SCMD_KEY_CTX_INLINE_F SCMD_KEY_CTX_INLINE_V(1U) + +/* TLSFragEnable - 0: Host created TLS PDUs, 1: TLS Framgmentation in ASIC */ +#define SCMD_TLS_FRAG_ENABLE_S 16 +#define SCMD_TLS_FRAG_ENABLE_M 0x1 +#define SCMD_TLS_FRAG_ENABLE_V(x) ((x) << SCMD_TLS_FRAG_ENABLE_S) +#define SCMD_TLS_FRAG_ENABLE_G(x) \ + (((x) >> SCMD_TLS_FRAG_ENABLE_S) & SCMD_TLS_FRAG_ENABLE_M) +#define SCMD_TLS_FRAG_ENABLE_F SCMD_TLS_FRAG_ENABLE_V(1U) + +/* MacOnly - Only send the MAC and discard PDU. This is valid for hash only + * modes, in this case TLS_TX will drop the PDU and only + * send back the MAC bytes. + */ +#define SCMD_MAC_ONLY_S 15 +#define SCMD_MAC_ONLY_M 0x1 +#define SCMD_MAC_ONLY_V(x) ((x) << SCMD_MAC_ONLY_S) +#define SCMD_MAC_ONLY_G(x) \ + (((x) >> SCMD_MAC_ONLY_S) & SCMD_MAC_ONLY_M) +#define SCMD_MAC_ONLY_F SCMD_MAC_ONLY_V(1U) + +/* AadIVDrop - Drop the AAD and IV fields. Useful in protocols + * which have complex AAD and IV formations Eg:AES-CCM + */ +#define SCMD_AADIVDROP_S 14 +#define SCMD_AADIVDROP_M 0x1 +#define SCMD_AADIVDROP_V(x) ((x) << SCMD_AADIVDROP_S) +#define SCMD_AADIVDROP_G(x) \ + (((x) >> SCMD_AADIVDROP_S) & SCMD_AADIVDROP_M) +#define SCMD_AADIVDROP_F SCMD_AADIVDROP_V(1U) + +/* HdrLength - Length of all headers excluding TLS header + * present before start of crypto PDU/payload. + */ +#define SCMD_HDR_LEN_S 0 +#define SCMD_HDR_LEN_M 0x3fff +#define SCMD_HDR_LEN_V(x) ((x) << SCMD_HDR_LEN_S) +#define SCMD_HDR_LEN_G(x) \ + (((x) >> SCMD_HDR_LEN_S) & SCMD_HDR_LEN_M) + +struct cpl_tx_sec_pdu { + __be32 op_ivinsrtofst; + __be32 pldlen; + __be32 aadstart_cipherstop_hi; + __be32 cipherstop_lo_authinsert; + __be32 seqno_numivs; + __be32 ivgen_hdrlen; + __be64 scmd1; +}; + +#define CPL_TX_SEC_PDU_OPCODE_S 24 +#define CPL_TX_SEC_PDU_OPCODE_M 0xff +#define CPL_TX_SEC_PDU_OPCODE_V(x) ((x) << CPL_TX_SEC_PDU_OPCODE_S) +#define CPL_TX_SEC_PDU_OPCODE_G(x) \ + (((x) >> CPL_TX_SEC_PDU_OPCODE_S) & CPL_TX_SEC_PDU_OPCODE_M) + +/* RX Channel Id */ +#define CPL_TX_SEC_PDU_RXCHID_S 22 +#define CPL_TX_SEC_PDU_RXCHID_M 0x1 +#define CPL_TX_SEC_PDU_RXCHID_V(x) ((x) << CPL_TX_SEC_PDU_RXCHID_S) +#define CPL_TX_SEC_PDU_RXCHID_G(x) \ + (((x) >> CPL_TX_SEC_PDU_RXCHID_S) & CPL_TX_SEC_PDU_RXCHID_M) +#define CPL_TX_SEC_PDU_RXCHID_F CPL_TX_SEC_PDU_RXCHID_V(1U) + +/* Ack Follows */ +#define CPL_TX_SEC_PDU_ACKFOLLOWS_S 21 +#define CPL_TX_SEC_PDU_ACKFOLLOWS_M 0x1 +#define CPL_TX_SEC_PDU_ACKFOLLOWS_V(x) ((x) << CPL_TX_SEC_PDU_ACKFOLLOWS_S) +#define CPL_TX_SEC_PDU_ACKFOLLOWS_G(x) \ + (((x) >> CPL_TX_SEC_PDU_ACKFOLLOWS_S) & CPL_TX_SEC_PDU_ACKFOLLOWS_M) +#define CPL_TX_SEC_PDU_ACKFOLLOWS_F CPL_TX_SEC_PDU_ACKFOLLOWS_V(1U) + +/* Loopback bit in cpl_tx_sec_pdu */ +#define CPL_TX_SEC_PDU_ULPTXLPBK_S 20 +#define CPL_TX_SEC_PDU_ULPTXLPBK_M 0x1 +#define CPL_TX_SEC_PDU_ULPTXLPBK_V(x) ((x) << CPL_TX_SEC_PDU_ULPTXLPBK_S) +#define CPL_TX_SEC_PDU_ULPTXLPBK_G(x) \ + (((x) >> CPL_TX_SEC_PDU_ULPTXLPBK_S) & CPL_TX_SEC_PDU_ULPTXLPBK_M) +#define CPL_TX_SEC_PDU_ULPTXLPBK_F CPL_TX_SEC_PDU_ULPTXLPBK_V(1U) + +/* Length of cpl header encapsulated */ +#define CPL_TX_SEC_PDU_CPLLEN_S 16 +#define CPL_TX_SEC_PDU_CPLLEN_M 0xf +#define CPL_TX_SEC_PDU_CPLLEN_V(x) ((x) << CPL_TX_SEC_PDU_CPLLEN_S) +#define CPL_TX_SEC_PDU_CPLLEN_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CPLLEN_S) & CPL_TX_SEC_PDU_CPLLEN_M) + +/* PlaceHolder */ +#define CPL_TX_SEC_PDU_PLACEHOLDER_S 10 +#define CPL_TX_SEC_PDU_PLACEHOLDER_M 0x1 +#define CPL_TX_SEC_PDU_PLACEHOLDER_V(x) ((x) << CPL_TX_SEC_PDU_PLACEHOLDER_S) +#define CPL_TX_SEC_PDU_PLACEHOLDER_G(x) \ + (((x) >> CPL_TX_SEC_PDU_PLACEHOLDER_S) & \ + CPL_TX_SEC_PDU_PLACEHOLDER_M) + +/* IvInsrtOffset: Insertion location for IV */ +#define CPL_TX_SEC_PDU_IVINSRTOFST_S 0 +#define CPL_TX_SEC_PDU_IVINSRTOFST_M 0x3ff +#define CPL_TX_SEC_PDU_IVINSRTOFST_V(x) ((x) << CPL_TX_SEC_PDU_IVINSRTOFST_S) +#define CPL_TX_SEC_PDU_IVINSRTOFST_G(x) \ + (((x) >> CPL_TX_SEC_PDU_IVINSRTOFST_S) & \ + CPL_TX_SEC_PDU_IVINSRTOFST_M) + +/* AadStartOffset: Offset in bytes for AAD start from + * the first byte following the pkt headers (0-255 bytes) + */ +#define CPL_TX_SEC_PDU_AADSTART_S 24 +#define CPL_TX_SEC_PDU_AADSTART_M 0xff +#define CPL_TX_SEC_PDU_AADSTART_V(x) ((x) << CPL_TX_SEC_PDU_AADSTART_S) +#define CPL_TX_SEC_PDU_AADSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AADSTART_S) & \ + CPL_TX_SEC_PDU_AADSTART_M) + +/* AadStopOffset: offset in bytes for AAD stop/end from the first byte following + * the pkt headers (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_AADSTOP_S 15 +#define CPL_TX_SEC_PDU_AADSTOP_M 0x1ff +#define CPL_TX_SEC_PDU_AADSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AADSTOP_S) +#define CPL_TX_SEC_PDU_AADSTOP_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AADSTOP_S) & CPL_TX_SEC_PDU_AADSTOP_M) + +/* CipherStartOffset: offset in bytes for encryption/decryption start from the + * first byte following the pkt headers (0-1023 bytes) + */ +#define CPL_TX_SEC_PDU_CIPHERSTART_S 5 +#define CPL_TX_SEC_PDU_CIPHERSTART_M 0x3ff +#define CPL_TX_SEC_PDU_CIPHERSTART_V(x) ((x) << CPL_TX_SEC_PDU_CIPHERSTART_S) +#define CPL_TX_SEC_PDU_CIPHERSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTART_S) & \ + CPL_TX_SEC_PDU_CIPHERSTART_M) + +/* CipherStopOffset: offset in bytes for encryption/decryption end + * from end of the payload of this command (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_S 0 +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_M 0x1f +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_V(x) \ + ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) +#define CPL_TX_SEC_PDU_CIPHERSTOP_HI_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_HI_S) & \ + CPL_TX_SEC_PDU_CIPHERSTOP_HI_M) + +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_S 28 +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_M 0xf +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_V(x) \ + ((x) << CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) +#define CPL_TX_SEC_PDU_CIPHERSTOP_LO_G(x) \ + (((x) >> CPL_TX_SEC_PDU_CIPHERSTOP_LO_S) & \ + CPL_TX_SEC_PDU_CIPHERSTOP_LO_M) + +/* AuthStartOffset: offset in bytes for authentication start from + * the first byte following the pkt headers (0-1023) + */ +#define CPL_TX_SEC_PDU_AUTHSTART_S 18 +#define CPL_TX_SEC_PDU_AUTHSTART_M 0x3ff +#define CPL_TX_SEC_PDU_AUTHSTART_V(x) ((x) << CPL_TX_SEC_PDU_AUTHSTART_S) +#define CPL_TX_SEC_PDU_AUTHSTART_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHSTART_S) & \ + CPL_TX_SEC_PDU_AUTHSTART_M) + +/* AuthStopOffset: offset in bytes for authentication + * end from end of the payload of this command (0-511 Bytes) + */ +#define CPL_TX_SEC_PDU_AUTHSTOP_S 9 +#define CPL_TX_SEC_PDU_AUTHSTOP_M 0x1ff +#define CPL_TX_SEC_PDU_AUTHSTOP_V(x) ((x) << CPL_TX_SEC_PDU_AUTHSTOP_S) +#define CPL_TX_SEC_PDU_AUTHSTOP_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHSTOP_S) & \ + CPL_TX_SEC_PDU_AUTHSTOP_M) + +/* AuthInsrtOffset: offset in bytes for authentication insertion + * from end of the payload of this command (0-511 bytes) + */ +#define CPL_TX_SEC_PDU_AUTHINSERT_S 0 +#define CPL_TX_SEC_PDU_AUTHINSERT_M 0x1ff +#define CPL_TX_SEC_PDU_AUTHINSERT_V(x) ((x) << CPL_TX_SEC_PDU_AUTHINSERT_S) +#define CPL_TX_SEC_PDU_AUTHINSERT_G(x) \ + (((x) >> CPL_TX_SEC_PDU_AUTHINSERT_S) & \ + CPL_TX_SEC_PDU_AUTHINSERT_M) + +struct cpl_rx_phys_dsgl { + __be32 op_to_tid; + __be32 pcirlxorder_to_noofsgentr; + struct rss_header rss_hdr_int; +}; + +#define CPL_RX_PHYS_DSGL_OPCODE_S 24 +#define CPL_RX_PHYS_DSGL_OPCODE_M 0xff +#define CPL_RX_PHYS_DSGL_OPCODE_V(x) ((x) << CPL_RX_PHYS_DSGL_OPCODE_S) +#define CPL_RX_PHYS_DSGL_OPCODE_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_OPCODE_S) & CPL_RX_PHYS_DSGL_OPCODE_M) + +#define CPL_RX_PHYS_DSGL_ISRDMA_S 23 +#define CPL_RX_PHYS_DSGL_ISRDMA_M 0x1 +#define CPL_RX_PHYS_DSGL_ISRDMA_V(x) ((x) << CPL_RX_PHYS_DSGL_ISRDMA_S) +#define CPL_RX_PHYS_DSGL_ISRDMA_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_ISRDMA_S) & CPL_RX_PHYS_DSGL_ISRDMA_M) +#define CPL_RX_PHYS_DSGL_ISRDMA_F CPL_RX_PHYS_DSGL_ISRDMA_V(1U) + +#define CPL_RX_PHYS_DSGL_RSVD1_S 20 +#define CPL_RX_PHYS_DSGL_RSVD1_M 0x7 +#define CPL_RX_PHYS_DSGL_RSVD1_V(x) ((x) << CPL_RX_PHYS_DSGL_RSVD1_S) +#define CPL_RX_PHYS_DSGL_RSVD1_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_RSVD1_S) & \ + CPL_RX_PHYS_DSGL_RSVD1_M) + +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_S 31 +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_M 0x1 +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCIRLXORDER_S) +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCIRLXORDER_S) & \ + CPL_RX_PHYS_DSGL_PCIRLXORDER_M) +#define CPL_RX_PHYS_DSGL_PCIRLXORDER_F CPL_RX_PHYS_DSGL_PCIRLXORDER_V(1U) + +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_S 30 +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_M 0x1 +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCINOSNOOP_S) +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCINOSNOOP_S) & \ + CPL_RX_PHYS_DSGL_PCINOSNOOP_M) + +#define CPL_RX_PHYS_DSGL_PCINOSNOOP_F CPL_RX_PHYS_DSGL_PCINOSNOOP_V(1U) + +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_S 29 +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_M 0x1 +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_PCITPHNTENB_S) +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCITPHNTENB_S) & \ + CPL_RX_PHYS_DSGL_PCITPHNTENB_M) +#define CPL_RX_PHYS_DSGL_PCITPHNTENB_F CPL_RX_PHYS_DSGL_PCITPHNTENB_V(1U) + +#define CPL_RX_PHYS_DSGL_PCITPHNT_S 27 +#define CPL_RX_PHYS_DSGL_PCITPHNT_M 0x3 +#define CPL_RX_PHYS_DSGL_PCITPHNT_V(x) ((x) << CPL_RX_PHYS_DSGL_PCITPHNT_S) +#define CPL_RX_PHYS_DSGL_PCITPHNT_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_PCITPHNT_S) & \ + CPL_RX_PHYS_DSGL_PCITPHNT_M) + +#define CPL_RX_PHYS_DSGL_DCAID_S 16 +#define CPL_RX_PHYS_DSGL_DCAID_M 0x7ff +#define CPL_RX_PHYS_DSGL_DCAID_V(x) ((x) << CPL_RX_PHYS_DSGL_DCAID_S) +#define CPL_RX_PHYS_DSGL_DCAID_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_DCAID_S) & \ + CPL_RX_PHYS_DSGL_DCAID_M) + +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_S 0 +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_M 0xffff +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_V(x) \ + ((x) << CPL_RX_PHYS_DSGL_NOOFSGENTR_S) +#define CPL_RX_PHYS_DSGL_NOOFSGENTR_G(x) \ + (((x) >> CPL_RX_PHYS_DSGL_NOOFSGENTR_S) & \ + CPL_RX_PHYS_DSGL_NOOFSGENTR_M) + #endif /* __T4_MSG_H */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h index 30507d44422c..4b58b32105f7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h +++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h @@ -1,7 +1,7 @@ /* * This file is part of the Chelsio T4 Ethernet driver for Linux. * - * Copyright (c) 2009-2014 Chelsio Communications, Inc. All rights reserved. + * Copyright (c) 2009-2016 Chelsio Communications, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU @@ -102,6 +102,7 @@ enum fw_wr_opcodes { FW_RI_FR_NSMR_WR = 0x19, FW_RI_INV_LSTAG_WR = 0x1a, FW_ISCSI_TX_DATA_WR = 0x45, + FW_CRYPTO_LOOKASIDE_WR = 0X6d, FW_LASTC2E_WR = 0x70 }; @@ -680,6 +681,7 @@ enum fw_cmd_opcodes { FW_RSS_IND_TBL_CMD = 0x20, FW_RSS_GLB_CONFIG_CMD = 0x22, FW_RSS_VI_CONFIG_CMD = 0x23, + FW_SCHED_CMD = 0x24, FW_DEVLOG_CMD = 0x25, FW_CLIP_CMD = 0x28, FW_LASTC2E_CMD = 0x40, @@ -1060,7 +1062,7 @@ struct fw_caps_config_cmd { __be16 niccaps; __be16 ofldcaps; __be16 rdmacaps; - __be16 r4; + __be16 cryptocaps; __be16 iscsicaps; __be16 fcoecaps; __be32 cfcsum; @@ -2967,6 +2969,41 @@ struct fw_rss_vi_config_cmd { #define FW_RSS_VI_CONFIG_CMD_UDPEN_V(x) ((x) << FW_RSS_VI_CONFIG_CMD_UDPEN_S) #define FW_RSS_VI_CONFIG_CMD_UDPEN_F FW_RSS_VI_CONFIG_CMD_UDPEN_V(1U) +enum fw_sched_sc { + FW_SCHED_SC_PARAMS = 1, +}; + +struct fw_sched_cmd { + __be32 op_to_write; + __be32 retval_len16; + union fw_sched { + struct fw_sched_config { + __u8 sc; + __u8 type; + __u8 minmaxen; + __u8 r3[5]; + __u8 nclasses[4]; + __be32 r4; + } config; + struct fw_sched_params { + __u8 sc; + __u8 type; + __u8 level; + __u8 mode; + __u8 unit; + __u8 rate; + __u8 ch; + __u8 cl; + __be32 min; + __be32 max; + __be16 weight; + __be16 pktsize; + __be16 burstsize; + __be16 r4; + } params; + } u; +}; + struct fw_clip_cmd { __be32 op_to_write; __be32 alloc_to_len16; @@ -3255,4 +3292,127 @@ struct fw_devlog_cmd { #define PCIE_FW_PF_DEVLOG_MEMTYPE_G(x) \ (((x) >> PCIE_FW_PF_DEVLOG_MEMTYPE_S) & PCIE_FW_PF_DEVLOG_MEMTYPE_M) +#define MAX_IMM_OFLD_TX_DATA_WR_LEN (0xff + sizeof(struct fw_ofld_tx_data_wr)) + +struct fw_crypto_lookaside_wr { + __be32 op_to_cctx_size; + __be32 len16_pkd; + __be32 session_id; + __be32 rx_chid_to_rx_q_id; + __be32 key_addr; + __be32 pld_size_hash_size; + __be64 cookie; +}; + +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_S 24 +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) +#define FW_CRYPTO_LOOKASIDE_WR_OPCODE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_OPCODE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_OPCODE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_S 23 +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_M 0x1 +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_COMPL_S) +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_COMPL_S) & \ + FW_CRYPTO_LOOKASIDE_WR_COMPL_M) +#define FW_CRYPTO_LOOKASIDE_WR_COMPL_F FW_CRYPTO_LOOKASIDE_WR_COMPL_V(1U) + +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S 15 +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) +#define FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_S) & \ + FW_CRYPTO_LOOKASIDE_WR_IMM_LEN_M) + +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S 5 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_S) & \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_LOC_M) + +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M 0x1f +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_CCTX_SIZE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_LEN16_S) +#define FW_CRYPTO_LOOKASIDE_WR_LEN16_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_LEN16_S) & \ + FW_CRYPTO_LOOKASIDE_WR_LEN16_M) + +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S 29 +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) +#define FW_CRYPTO_LOOKASIDE_WR_RX_CHID_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_CHID_S) & \ + FW_CRYPTO_LOOKASIDE_WR_RX_CHID_M) + +#define FW_CRYPTO_LOOKASIDE_WR_LCB_S 27 +#define FW_CRYPTO_LOOKASIDE_WR_LCB_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_LCB_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_LCB_S) +#define FW_CRYPTO_LOOKASIDE_WR_LCB_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_LCB_S) & FW_CRYPTO_LOOKASIDE_WR_LCB_M) + +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_S 25 +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_PHASH_S) +#define FW_CRYPTO_LOOKASIDE_WR_PHASH_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_PHASH_S) & \ + FW_CRYPTO_LOOKASIDE_WR_PHASH_M) + +#define FW_CRYPTO_LOOKASIDE_WR_IV_S 23 +#define FW_CRYPTO_LOOKASIDE_WR_IV_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_IV_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_IV_S) +#define FW_CRYPTO_LOOKASIDE_WR_IV_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_IV_S) & FW_CRYPTO_LOOKASIDE_WR_IV_M) + +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_S 10 +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_M 0x3 +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) +#define FW_CRYPTO_LOOKASIDE_WR_TX_CH_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_TX_CH_S) & \ + FW_CRYPTO_LOOKASIDE_WR_TX_CH_M) + +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S 0 +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M 0x3ff +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) +#define FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_S) & \ + FW_CRYPTO_LOOKASIDE_WR_RX_Q_ID_M) + +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S 24 +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M 0xff +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_PLD_SIZE_M) + +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S 17 +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M 0x7f +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_V(x) \ + ((x) << FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) +#define FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_G(x) \ + (((x) >> FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_S) & \ + FW_CRYPTO_LOOKASIDE_WR_HASH_SIZE_M) + #endif /* _T4FW_INTERFACE_H_ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index e116bb8d1729..100b2cc064a3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -2378,7 +2378,7 @@ static void size_nports_qsets(struct adapter *adapter) */ pmask_nports = hweight32(adapter->params.vfres.pmask); if (pmask_nports < adapter->params.nports) { - dev_warn(adapter->pdev_dev, "only using %d of %d provissioned" + dev_warn(adapter->pdev_dev, "only using %d of %d provisioned" " virtual interfaces; limited by Port Access Rights" " mask %#x\n", pmask_nports, adapter->params.nports, adapter->params.vfres.pmask); @@ -2777,6 +2777,7 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, struct adapter *adapter; struct port_info *pi; struct net_device *netdev; + unsigned int pf; /* * Print our driver banner the first time we're called to initialize a @@ -2903,8 +2904,11 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, * Allocate our "adapter ports" and stitch everything together. */ pmask = adapter->params.vfres.pmask; + pf = t4vf_get_pf_from_vf(adapter); for_each_port(adapter, pidx) { int port_id, viid; + u8 mac[ETH_ALEN]; + unsigned int naddr = 1; /* * We simplistically allocate our virtual interfaces @@ -2975,6 +2979,26 @@ static int cxgb4vf_pci_probe(struct pci_dev *pdev, pidx); goto err_free_dev; } + + err = t4vf_get_vf_mac_acl(adapter, pf, &naddr, mac); + if (err) { + dev_err(&pdev->dev, + "unable to determine MAC ACL address, " + "continuing anyway.. (status %d)\n", err); + } else if (naddr && adapter->params.vfres.nvi == 1) { + struct sockaddr addr; + + ether_addr_copy(addr.sa_data, mac); + err = cxgb4vf_set_mac_addr(netdev, &addr); + if (err) { + dev_err(&pdev->dev, + "unable to set MAC address %pM\n", + mac); + goto err_free_dev; + } + dev_info(&pdev->dev, + "Using assigned MAC ACL: %pM\n", mac); + } } /* See what interrupts we'll be using. If we've been configured to diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c index c8fd4f8fe1fa..f3ed9ce99e5e 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c @@ -1648,14 +1648,15 @@ int t4vf_ethrx_handler(struct sge_rspq *rspq, const __be64 *rsp, if (csum_ok && !pkt->err_vec && (be32_to_cpu(pkt->l2info) & (RXF_UDP_F | RXF_TCP_F))) { - if (!pkt->ip_frag) + if (!pkt->ip_frag) { skb->ip_summed = CHECKSUM_UNNECESSARY; - else { + rxq->stats.rx_cso++; + } else if (pkt->l2info & htonl(RXF_IP_F)) { __sum16 c = (__force __sum16)pkt->csum; skb->csum = csum_unfold(c); skb->ip_summed = CHECKSUM_COMPLETE; + rxq->stats.rx_cso++; } - rxq->stats.rx_cso++; } else skb_checksum_none_assert(skb); diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index 17a2bbcf93f0..b3903fe411aa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -354,6 +354,7 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter, u64 *pbar2_qoffset, unsigned int *pbar2_qid); +unsigned int t4vf_get_pf_from_vf(struct adapter *); int t4vf_get_sge_params(struct adapter *); int t4vf_get_vpd_params(struct adapter *); int t4vf_get_dev_params(struct adapter *); @@ -388,5 +389,7 @@ int t4vf_eth_eq_free(struct adapter *, unsigned int); int t4vf_handle_fw_rpl(struct adapter *, const __be64 *); int t4vf_prep_adapter(struct adapter *); +int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, + unsigned int *naddr, u8 *addr); #endif /* __T4VF_COMMON_H__ */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index b5622b1689e9..e98248f00fef 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -640,6 +640,15 @@ int t4vf_bar2_sge_qregs(struct adapter *adapter, return 0; } +unsigned int t4vf_get_pf_from_vf(struct adapter *adapter) +{ + u32 whoami; + + whoami = t4_read_reg(adapter, T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A); + return (CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ? + SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami)); +} + /** * t4vf_get_sge_params - retrieve adapter Scatter gather Engine parameters * @adapter: the adapter @@ -717,7 +726,6 @@ int t4vf_get_sge_params(struct adapter *adapter) * read. */ if (!is_t4(adapter->params.chip)) { - u32 whoami; unsigned int pf, s_hps, s_qpp; params[0] = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_REG) | @@ -741,11 +749,7 @@ int t4vf_get_sge_params(struct adapter *adapter) * register we just read. Do it once here so other code in * the driver can just use it. */ - whoami = t4_read_reg(adapter, - T4VF_PL_BASE_ADDR + PL_VF_WHOAMI_A); - pf = CHELSIO_CHIP_VERSION(adapter->params.chip) <= CHELSIO_T5 ? - SOURCEPF_G(whoami) : T6_SOURCEPF_G(whoami); - + pf = t4vf_get_pf_from_vf(adapter); s_hps = (HOSTPAGESIZEPF0_S + (HOSTPAGESIZEPF1_S - HOSTPAGESIZEPF0_S) * pf); sge_params->sge_vf_hps = @@ -1812,3 +1816,50 @@ int t4vf_prep_adapter(struct adapter *adapter) return 0; } + +/** + * t4vf_get_vf_mac_acl - Get the MAC address to be set to + * the VI of this VF. + * @adapter: The adapter + * @pf: The pf associated with vf + * @naddr: the number of ACL MAC addresses returned in addr + * @addr: Placeholder for MAC addresses + * + * Find the MAC address to be set to the VF's VI. The requested MAC address + * is from the host OS via callback in the PF driver. + */ +int t4vf_get_vf_mac_acl(struct adapter *adapter, unsigned int pf, + unsigned int *naddr, u8 *addr) +{ + struct fw_acl_mac_cmd cmd; + int ret; + + memset(&cmd, 0, sizeof(cmd)); + cmd.op_to_vfn = cpu_to_be32(FW_CMD_OP_V(FW_ACL_MAC_CMD) | + FW_CMD_REQUEST_F | + FW_CMD_READ_F); + cmd.en_to_len16 = cpu_to_be32((unsigned int)FW_LEN16(cmd)); + ret = t4vf_wr_mbox(adapter, &cmd, sizeof(cmd), &cmd); + if (ret) + return ret; + + if (cmd.nmac < *naddr) + *naddr = cmd.nmac; + + switch (pf) { + case 3: + memcpy(addr, cmd.macaddr3, sizeof(cmd.macaddr3)); + break; + case 2: + memcpy(addr, cmd.macaddr2, sizeof(cmd.macaddr2)); + break; + case 1: + memcpy(addr, cmd.macaddr1, sizeof(cmd.macaddr1)); + break; + case 0: + memcpy(addr, cmd.macaddr0, sizeof(cmd.macaddr0)); + break; + } + + return ret; +} diff --git a/drivers/net/ethernet/chelsio/libcxgb/Makefile b/drivers/net/ethernet/chelsio/libcxgb/Makefile index 2362230ef4fe..2534e30a1560 100644 --- a/drivers/net/ethernet/chelsio/libcxgb/Makefile +++ b/drivers/net/ethernet/chelsio/libcxgb/Makefile @@ -1,3 +1,5 @@ +ccflags-y := -Idrivers/net/ethernet/chelsio/cxgb4 + obj-$(CONFIG_CHELSIO_LIB) += libcxgb.o -libcxgb-y := libcxgb_ppm.o +libcxgb-y := libcxgb_ppm.o libcxgb_cm.o diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c new file mode 100644 index 000000000000..0f0de5b63622 --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.c @@ -0,0 +1,149 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include + +#include "libcxgb_cm.h" + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *req, enum chip_type type, + int *iptype, __u8 *local_ip, __u8 *peer_ip, + __be16 *local_port, __be16 *peer_port) +{ + int eth_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + int ip_len = (CHELSIO_CHIP_VERSION(type) <= CHELSIO_T5) ? + IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)) : + T6_IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); + struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); + struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); + struct tcphdr *tcp = (struct tcphdr *) + ((u8 *)(req + 1) + eth_len + ip_len); + + if (ip->version == 4) { + pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", + __func__, ntohl(ip->saddr), ntohl(ip->daddr), + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 4; + memcpy(peer_ip, &ip->saddr, 4); + memcpy(local_ip, &ip->daddr, 4); + } else { + pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", + __func__, ip6->saddr.s6_addr, ip6->daddr.s6_addr, + ntohs(tcp->source), ntohs(tcp->dest)); + *iptype = 6; + memcpy(peer_ip, ip6->saddr.s6_addr, 16); + memcpy(local_ip, ip6->daddr.s6_addr, 16); + } + *peer_port = tcp->source; + *local_port = tcp->dest; +} +EXPORT_SYMBOL(cxgb_get_4tuple); + +static bool +cxgb_our_interface(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + struct net_device *egress_dev) +{ + int i; + + egress_dev = get_real_dev(egress_dev); + for (i = 0; i < lldi->nports; i++) + if (lldi->ports[i] == egress_dev) + return true; + return false; +} + +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __be32 local_ip, __be32 peer_ip, __be16 local_port, + __be16 peer_port, u8 tos) +{ + struct rtable *rt; + struct flowi4 fl4; + struct neighbour *n; + + rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, local_ip, + peer_port, local_port, IPPROTO_TCP, + tos, 0); + if (IS_ERR(rt)) + return NULL; + n = dst_neigh_lookup(&rt->dst, &peer_ip); + if (!n) + return NULL; + if (!cxgb_our_interface(lldi, get_real_dev, n->dev) && + !(n->dev->flags & IFF_LOOPBACK)) { + neigh_release(n); + dst_release(&rt->dst); + return NULL; + } + neigh_release(n); + return &rt->dst; +} +EXPORT_SYMBOL(cxgb_find_route); + +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *lldi, + struct net_device *(*get_real_dev)(struct net_device *), + __u8 *local_ip, __u8 *peer_ip, __be16 local_port, + __be16 peer_port, u8 tos, __u32 sin6_scope_id) +{ + struct dst_entry *dst = NULL; + + if (IS_ENABLED(CONFIG_IPV6)) { + struct flowi6 fl6; + + memset(&fl6, 0, sizeof(fl6)); + memcpy(&fl6.daddr, peer_ip, 16); + memcpy(&fl6.saddr, local_ip, 16); + if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + fl6.flowi6_oif = sin6_scope_id; + dst = ip6_route_output(&init_net, NULL, &fl6); + if (!dst) + goto out; + if (!cxgb_our_interface(lldi, get_real_dev, + ip6_dst_idev(dst)->dev) && + !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { + dst_release(dst); + dst = NULL; + } + } + +out: + return dst; +} +EXPORT_SYMBOL(cxgb_find_route6); diff --git a/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h new file mode 100644 index 000000000000..515b94ff9080 --- /dev/null +++ b/drivers/net/ethernet/chelsio/libcxgb/libcxgb_cm.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __LIBCXGB_CM_H__ +#define __LIBCXGB_CM_H__ + + +#include + +#include +#include +#include + +void +cxgb_get_4tuple(struct cpl_pass_accept_req *, enum chip_type, + int *, __u8 *, __u8 *, __be16 *, __be16 *); +struct dst_entry * +cxgb_find_route(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __be32, __be32, __be16, __be16, u8); +struct dst_entry * +cxgb_find_route6(struct cxgb4_lld_info *, + struct net_device *(*)(struct net_device *), + __u8 *, __u8 *, __be16, __be16, u8, __u32); + +/* Returns whether a CPL status conveys negative advice. + */ +static inline bool cxgb_is_neg_adv(unsigned int status) +{ + return status == CPL_ERR_RTX_NEG_ADVICE || + status == CPL_ERR_PERSIST_NEG_ADVICE || + status == CPL_ERR_KEEPALV_NEG_ADVICE; +} + +static inline void +cxgb_best_mtu(const unsigned short *mtus, unsigned short mtu, + unsigned int *idx, int use_ts, int ipv6) +{ + unsigned short hdr_size = (ipv6 ? + sizeof(struct ipv6hdr) : + sizeof(struct iphdr)) + + sizeof(struct tcphdr) + + (use_ts ? + round_up(TCPOLEN_TIMESTAMP, 4) : 0); + unsigned short data_size = mtu - hdr_size; + + cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); +} + +static inline u32 cxgb_compute_wscale(u32 win) +{ + u32 wscale = 0; + + while (wscale < 14 && (65535 << wscale) < win) + wscale++; + return wscale; +} + +static inline void +cxgb_mk_tid_release(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_tid_release *req; + + req = (struct cpl_tid_release *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); + set_wr_txq(skb, CPL_PRIORITY_SETUP, chan); +} + +static inline void +cxgb_mk_close_con_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_close_con_req *req; + + req = (struct cpl_close_con_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} + +static inline void +cxgb_mk_abort_req(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + void *handle, arp_err_handler_t handler) +{ + struct cpl_abort_req *req; + + req = (struct cpl_abort_req *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); + req->cmd = CPL_ABORT_SEND_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); + t4_set_arp_err_handler(skb, handle, handler); +} + +static inline void +cxgb_mk_abort_rpl(struct sk_buff *skb, u32 len, u32 tid, u16 chan) +{ + struct cpl_abort_rpl *rpl; + + rpl = (struct cpl_abort_rpl *)__skb_put(skb, len); + memset(rpl, 0, len); + + INIT_TP_WR(rpl, tid); + OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); + rpl->cmd = CPL_ABORT_NO_RST; + set_wr_txq(skb, CPL_PRIORITY_DATA, chan); +} + +static inline void +cxgb_mk_rx_data_ack(struct sk_buff *skb, u32 len, u32 tid, u16 chan, + u32 credit_dack) +{ + struct cpl_rx_data_ack *req; + + req = (struct cpl_rx_data_ack *)__skb_put(skb, len); + memset(req, 0, len); + + INIT_TP_WR(req, tid); + OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); + req->credit_dack = cpu_to_be32(credit_dack); + set_wr_txq(skb, CPL_PRIORITY_ACK, chan); +} +#endif diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c index f0e9e2ef62a0..6620fc861c47 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.c +++ b/drivers/net/ethernet/dec/tulip/de4x5.c @@ -1966,7 +1966,7 @@ SetMulticastFilter(struct net_device *dev) } else if (lp->setup_f == HASH_PERF) { /* Hash Filtering */ netdev_for_each_mc_addr(ha, dev) { crc = ether_crc_le(ETH_ALEN, ha->addr); - hashcode = crc & HASH_BITS; /* hashcode is 9 LSb of CRC */ + hashcode = crc & DE4X5_HASH_BITS; /* hashcode is 9 LSb of CRC */ byte = hashcode >> 3; /* bit[3-8] -> byte in filter */ bit = 1 << (hashcode & 0x07);/* bit[0-2] -> bit in byte */ @@ -5043,7 +5043,7 @@ build_setup_frame(struct net_device *dev, int mode) *(pa + i) = dev->dev_addr[i]; /* Host address */ if (i & 0x01) pa += 2; } - *(lp->setup_frame + (HASH_TABLE_LEN >> 3) - 3) = 0x80; + *(lp->setup_frame + (DE4X5_HASH_TABLE_LEN >> 3) - 3) = 0x80; } else { for (i=0; idev_addr[i]; diff --git a/drivers/net/ethernet/dec/tulip/de4x5.h b/drivers/net/ethernet/dec/tulip/de4x5.h index ec756eba397b..1bfdc9b117f6 100644 --- a/drivers/net/ethernet/dec/tulip/de4x5.h +++ b/drivers/net/ethernet/dec/tulip/de4x5.h @@ -860,8 +860,8 @@ #define PCI 0 #define EISA 1 -#define HASH_TABLE_LEN 512 /* Bits */ -#define HASH_BITS 0x01ff /* 9 LS bits */ +#define DE4X5_HASH_TABLE_LEN 512 /* Bits */ +#define DE4X5_HASH_BITS 0x01ff /* 9 LS bits */ #define SETUP_FRAME_LEN 192 /* Bytes */ #define IMPERF_PA_OFFSET 156 /* Bytes */ diff --git a/drivers/net/ethernet/dlink/sundance.c b/drivers/net/ethernet/dlink/sundance.c index 58c6338a839e..79d80090eac8 100644 --- a/drivers/net/ethernet/dlink/sundance.c +++ b/drivers/net/ethernet/dlink/sundance.c @@ -867,7 +867,7 @@ static int netdev_open(struct net_device *dev) /* Initialize other registers. */ __set_mac_addr(dev); -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) iowrite16(dev->mtu + 18, ioaddr + MaxFrameSize); #else iowrite16(dev->mtu + 14, ioaddr + MaxFrameSize); diff --git a/drivers/net/ethernet/emulex/benet/be.h b/drivers/net/ethernet/emulex/benet/be.h index 4555e041ef69..6cfa63a5e9b4 100644 --- a/drivers/net/ethernet/emulex/benet/be.h +++ b/drivers/net/ethernet/emulex/benet/be.h @@ -37,7 +37,7 @@ #include "be_hw.h" #include "be_roce.h" -#define DRV_VER "11.0.0.0" +#define DRV_VER "11.1.0.0" #define DRV_NAME "be2net" #define BE_NAME "Emulex BladeEngine2" #define BE3_NAME "Emulex BladeEngine3" @@ -399,13 +399,13 @@ enum vf_state { #define BE_FLAGS_PHY_MISCONFIGURED BIT(10) #define BE_FLAGS_ERR_DETECTION_SCHEDULED BIT(11) #define BE_FLAGS_OS2BMC BIT(12) +#define BE_FLAGS_TRY_RECOVERY BIT(13) #define BE_UC_PMAC_COUNT 30 #define BE_VF_UC_PMAC_COUNT 2 #define MAX_ERR_RECOVERY_RETRY_COUNT 3 #define ERR_DETECTION_DELAY 1000 -#define ERR_RECOVERY_RETRY_DELAY 30000 /* Ethtool set_dump flags */ #define LANCER_INITIATE_FW_DUMP 0x1 @@ -508,6 +508,70 @@ struct be_wrb_params { u16 lso_mss; /* MSS for LSO */ }; +struct be_eth_addr { + unsigned char mac[ETH_ALEN]; +}; + +#define BE_SEC 1000 /* in msec */ +#define BE_MIN (60 * BE_SEC) /* in msec */ +#define BE_HOUR (60 * BE_MIN) /* in msec */ + +#define ERR_RECOVERY_MAX_RETRY_COUNT 3 +#define ERR_RECOVERY_DETECTION_DELAY BE_SEC +#define ERR_RECOVERY_RETRY_DELAY (30 * BE_SEC) + +/* UE-detection-duration in BEx/Skyhawk: + * All PFs must wait for this duration after they detect UE before reading + * SLIPORT_SEMAPHORE register. At the end of this duration, the Firmware + * guarantees that the SLIPORT_SEMAPHORE register is updated to indicate + * if the UE is recoverable. + */ +#define ERR_RECOVERY_UE_DETECT_DURATION BE_SEC + +/* Initial idle time (in msec) to elapse after driver load, + * before UE recovery is allowed. + */ +#define ERR_IDLE_HR 24 +#define ERR_RECOVERY_IDLE_TIME (ERR_IDLE_HR * BE_HOUR) + +/* Time interval (in msec) after which UE recovery can be repeated */ +#define ERR_INTERVAL_HR 72 +#define ERR_RECOVERY_INTERVAL (ERR_INTERVAL_HR * BE_HOUR) + +/* BEx/SH UE recovery state machine */ +enum { + ERR_RECOVERY_ST_NONE = 0, /* No Recovery */ + ERR_RECOVERY_ST_DETECT = 1, /* UE detection duration */ + ERR_RECOVERY_ST_RESET = 2, /* Reset Phase (PF0 only) */ + ERR_RECOVERY_ST_PRE_POLL = 3, /* Pre-Poll Phase (all PFs) */ + ERR_RECOVERY_ST_REINIT = 4 /* Re-initialize Phase */ +}; + +struct be_error_recovery { + /* Lancer error recovery variables */ + u8 recovery_retries; + + /* BEx/Skyhawk error recovery variables */ + u8 recovery_state; + u16 ue_to_reset_time; /* Time after UE, to soft reset + * the chip - PF0 only + */ + u16 ue_to_poll_time; /* Time after UE, to Restart Polling + * of SLIPORT_SEMAPHORE reg + */ + u16 last_err_code; + bool recovery_supported; + unsigned long probe_time; + unsigned long last_recovery_time; + + /* Common to both Lancer & BEx/SH error recovery */ + u32 resched_delay; + struct delayed_work err_detection_work; +}; + +/* Ethtool priv_flags */ +#define BE_DISABLE_TPE_RECOVERY 0x1 + struct be_adapter { struct pci_dev *pdev; struct net_device *netdev; @@ -523,7 +587,7 @@ struct be_adapter { struct be_dma_mem mbox_mem_alloced; struct be_mcc_obj mcc_obj; - spinlock_t mcc_lock; /* For serializing mcc cmds to BE card */ + struct mutex mcc_lock; /* For serializing mcc cmds to BE card */ spinlock_t mcc_cq_lock; u16 cfg_num_rx_irqs; /* configured via set-channels */ @@ -556,7 +620,6 @@ struct be_adapter { struct delayed_work work; u16 work_counter; - struct delayed_work be_err_detection_work; u8 recovery_retries; u8 err_flags; bool pcicfg_mapped; /* pcicfg obtained via pci_iomap() */ @@ -570,9 +633,15 @@ struct be_adapter { int if_handle; /* Used to configure filtering */ u32 if_flags; /* Interface filtering flags */ u32 *pmac_id; /* MAC addr handle used by BE card */ + struct be_eth_addr *uc_list;/* list of uc-addrs programmed (not perm) */ u32 uc_macs; /* Count of secondary UC MAC programmed */ + struct be_eth_addr *mc_list;/* list of mcast addrs programmed */ + u32 mc_count; unsigned long vids[BITS_TO_LONGS(VLAN_N_VID)]; u16 vlans_added; + bool update_uc_list; + bool update_mc_list; + struct mutex rx_filter_lock;/* For protecting vids[] & mc/uc_list[] */ u32 beacon_state; /* for set_phys_id */ @@ -624,6 +693,18 @@ struct be_adapter { u32 fat_dump_len; u16 serial_num[CNTL_SERIAL_NUM_WORDS]; u8 phy_state; /* state of sfp optics (functional, faulted, etc.,) */ + u8 dev_mac[ETH_ALEN]; + u32 priv_flags; /* ethtool get/set_priv_flags() */ + struct be_error_recovery error_recovery; +}; + +/* Used for defered FW config cmds. Add fields to this struct as reqd */ +struct be_cmd_work { + struct work_struct work; + struct be_adapter *adapter; + union { + __be16 vxlan_port; + } info; }; #define be_physfn(adapter) (!adapter->virtfn) @@ -848,6 +929,9 @@ static inline bool is_ipv4_pkt(struct sk_buff *skb) return skb->protocol == htons(ETH_P_IP) && ip_hdr(skb)->version == 4; } +#define be_error_recovering(adapter) \ + (adapter->flags & BE_FLAGS_TRY_RECOVERY) + #define BE_ERROR_EEH 1 #define BE_ERROR_UE BIT(1) #define BE_ERROR_FW BIT(2) diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.c b/drivers/net/ethernet/emulex/benet/be_cmds.c index 2cc11756859f..9cffe48be156 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.c +++ b/drivers/net/ethernet/emulex/benet/be_cmds.c @@ -92,6 +92,11 @@ static struct be_cmd_priv_map cmd_priv_map[] = { CMD_SUBSYSTEM_COMMON, BE_PRIV_DEVCFG | BE_PRIV_VHADM }, + { + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON, + BE_PRIV_DEVCFG + } }; static bool be_cmd_allowed(struct be_adapter *adapter, u8 opcode, u8 subsystem) @@ -571,7 +576,7 @@ int be_process_mcc(struct be_adapter *adapter) /* Wait till no more pending mcc requests are present */ static int be_mcc_wait_compl(struct be_adapter *adapter) { -#define mcc_timeout 120000 /* 12s timeout */ +#define mcc_timeout 12000 /* 12s timeout */ int i, status = 0; struct be_mcc_obj *mcc_obj = &adapter->mcc_obj; @@ -585,7 +590,7 @@ static int be_mcc_wait_compl(struct be_adapter *adapter) if (atomic_read(&mcc_obj->q.used) == 0) break; - udelay(100); + usleep_range(500, 1000); } if (i == mcc_timeout) { dev_err(&adapter->pdev->dev, "FW not responding\n"); @@ -705,7 +710,7 @@ static int be_mbox_notify_wait(struct be_adapter *adapter) return 0; } -static u16 be_POST_stage_get(struct be_adapter *adapter) +u16 be_POST_stage_get(struct be_adapter *adapter) { u32 sem; @@ -863,7 +868,7 @@ static bool use_mcc(struct be_adapter *adapter) static int be_cmd_lock(struct be_adapter *adapter) { if (use_mcc(adapter)) { - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); return 0; } else { return mutex_lock_interruptible(&adapter->mbox_lock); @@ -874,7 +879,7 @@ static int be_cmd_lock(struct be_adapter *adapter) static void be_cmd_unlock(struct be_adapter *adapter) { if (use_mcc(adapter)) - spin_unlock_bh(&adapter->mcc_lock); + return mutex_unlock(&adapter->mcc_lock); else return mutex_unlock(&adapter->mbox_lock); } @@ -1044,7 +1049,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_mac_query *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1073,7 +1078,7 @@ int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1085,7 +1090,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, struct be_cmd_req_pmac_add *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1110,7 +1115,7 @@ int be_cmd_pmac_add(struct be_adapter *adapter, u8 *mac_addr, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (status == MCC_STATUS_UNAUTHORIZED_REQUEST) status = -EPERM; @@ -1128,7 +1133,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) if (pmac_id == -1) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1148,7 +1153,7 @@ int be_cmd_pmac_del(struct be_adapter *adapter, u32 if_id, int pmac_id, u32 dom) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1411,7 +1416,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, struct be_dma_mem *q_mem = &rxq->dma_mem; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1441,7 +1446,7 @@ int be_cmd_rxq_create(struct be_adapter *adapter, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1505,7 +1510,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) struct be_cmd_req_q_destroy *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1522,7 +1527,7 @@ int be_cmd_rxq_destroy(struct be_adapter *adapter, struct be_queue_info *q) q->created = false; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1590,7 +1595,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) struct be_cmd_req_hdr *hdr; int status = 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1618,7 +1623,7 @@ int be_cmd_get_stats(struct be_adapter *adapter, struct be_dma_mem *nonemb_cmd) adapter->stats_cmd_sent = true; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1634,7 +1639,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, CMD_SUBSYSTEM_ETH)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1657,7 +1662,7 @@ int lancer_cmd_get_pport_stats(struct be_adapter *adapter, adapter->stats_cmd_sent = true; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1694,7 +1699,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, struct be_cmd_req_link_status *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); if (link_status) *link_status = LINK_DOWN; @@ -1733,7 +1738,7 @@ int be_cmd_link_status_query(struct be_adapter *adapter, u16 *link_speed, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1744,7 +1749,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) struct be_cmd_req_get_cntl_addnl_attribs *req; int status = 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1759,7 +1764,7 @@ int be_cmd_get_die_temperature(struct be_adapter *adapter) status = be_mcc_notify(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1808,7 +1813,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) if (!get_fat_cmd.va) return -ENOMEM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); while (total_size) { buf_size = min(total_size, (u32)60*1024); @@ -1848,7 +1853,7 @@ int be_cmd_get_fat_dump(struct be_adapter *adapter, u32 buf_len, void *buf) err: dma_free_coherent(&adapter->pdev->dev, get_fat_cmd.size, get_fat_cmd.va, get_fat_cmd.dma); - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1859,7 +1864,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) struct be_cmd_req_get_fw_version *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1882,7 +1887,7 @@ int be_cmd_get_fw_ver(struct be_adapter *adapter) sizeof(adapter->fw_on_flash)); } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1896,7 +1901,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, struct be_cmd_req_modify_eq_delay *req; int status = 0, i; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1919,7 +1924,7 @@ static int __be_cmd_modify_eqd(struct be_adapter *adapter, status = be_mcc_notify(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1946,7 +1951,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, struct be_cmd_req_vlan_config *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1968,7 +1973,7 @@ int be_cmd_vlan_config(struct be_adapter *adapter, u32 if_id, u16 *vtag_array, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -1979,7 +1984,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) struct be_cmd_req_rx_filter *req = mem->va; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -1996,8 +2001,7 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_flags = (value == ON) ? req->if_flags_mask : 0; if (flags & BE_IF_FLAGS_MULTICAST) { - struct netdev_hw_addr *ha; - int i = 0; + int i; /* Reset mcast promisc mode if already set by setting mask * and not setting flags field @@ -2005,14 +2009,15 @@ static int __be_cmd_rx_filter(struct be_adapter *adapter, u32 flags, u32 value) req->if_flags_mask |= cpu_to_le32(BE_IF_FLAGS_MCAST_PROMISCUOUS & be_if_cap_flags(adapter)); - req->mcast_num = cpu_to_le32(netdev_mc_count(adapter->netdev)); - netdev_for_each_mc_addr(ha, adapter->netdev) - memcpy(req->mcast_mac[i++].byte, ha->addr, ETH_ALEN); + req->mcast_num = cpu_to_le32(adapter->mc_count); + for (i = 0; i < adapter->mc_count; i++) + ether_addr_copy(req->mcast_mac[i].byte, + adapter->mc_list[i].mac); } status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2043,7 +2048,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2063,7 +2068,7 @@ int be_cmd_set_flow_control(struct be_adapter *adapter, u32 tx_fc, u32 rx_fc) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (base_status(status) == MCC_STATUS_FEATURE_NOT_SUPPORTED) return -EOPNOTSUPP; @@ -2082,7 +2087,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2105,7 +2110,7 @@ int be_cmd_get_flow_control(struct be_adapter *adapter, u32 *tx_fc, u32 *rx_fc) } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2186,7 +2191,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, if (!(be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS)) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2211,7 +2216,7 @@ int be_cmd_rss_config(struct be_adapter *adapter, u8 *rsstable, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2223,7 +2228,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, struct be_cmd_req_enable_disable_beacon *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2244,7 +2249,7 @@ int be_cmd_set_beacon_state(struct be_adapter *adapter, u8 port_num, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2255,7 +2260,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) struct be_cmd_req_get_beacon_state *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2279,7 +2284,7 @@ int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num, u32 *state) } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2303,7 +2308,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, return -ENOMEM; } - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2325,7 +2330,7 @@ int be_cmd_read_port_transceiver_data(struct be_adapter *adapter, memcpy(data, resp->page_data, PAGE_DATA_LEN); } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); return status; } @@ -2342,7 +2347,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, void *ctxt = NULL; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2384,7 +2389,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(60000))) @@ -2403,7 +2408,7 @@ static int lancer_cmd_write_object(struct be_adapter *adapter, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2457,7 +2462,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, struct be_mcc_wrb *wrb; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2475,7 +2480,7 @@ static int lancer_cmd_delete_object(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2488,7 +2493,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, struct lancer_cmd_resp_read_object *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2522,7 +2527,7 @@ int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd, } err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2534,7 +2539,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, struct be_cmd_write_flashrom *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); adapter->flash_status = 0; wrb = wrb_from_mccq(adapter); @@ -2559,7 +2564,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(40000))) @@ -2570,7 +2575,7 @@ static int be_cmd_write_flashrom(struct be_adapter *adapter, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -2581,7 +2586,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, struct be_mcc_wrb *wrb; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -2608,7 +2613,7 @@ static int be_cmd_get_flash_crc(struct be_adapter *adapter, u8 *flashed_crc, memcpy(flashed_crc, req->crc, 4); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3192,7 +3197,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, struct be_cmd_req_acpi_wol_magic_config *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3209,7 +3214,7 @@ int be_cmd_enable_magic_wol(struct be_adapter *adapter, u8 *mac, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3224,7 +3229,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3247,7 +3252,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, if (status) goto err_unlock; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); if (!wait_for_completion_timeout(&adapter->et_cmd_compl, msecs_to_jiffies(SET_LB_MODE_TIMEOUT))) @@ -3256,7 +3261,7 @@ int be_cmd_set_loopback(struct be_adapter *adapter, u8 port_num, return status; err_unlock: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3273,7 +3278,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3299,7 +3304,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, if (status) goto err; - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); wait_for_completion(&adapter->et_cmd_compl); resp = embedded_payload(wrb); @@ -3307,7 +3312,7 @@ int be_cmd_loopback_test(struct be_adapter *adapter, u32 port_num, return status; err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3323,7 +3328,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, CMD_SUBSYSTEM_LOWLEVEL)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3357,7 +3362,7 @@ int be_cmd_ddr_dma_test(struct be_adapter *adapter, u64 pattern, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3368,7 +3373,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, struct be_cmd_req_seeprom_read *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3384,7 +3389,7 @@ int be_cmd_get_seeprom_data(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3399,7 +3404,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3444,7 +3449,7 @@ int be_cmd_get_phy_info(struct be_adapter *adapter) } dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3454,7 +3459,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) struct be_cmd_req_set_qos *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3474,7 +3479,7 @@ static int be_cmd_set_qos(struct be_adapter *adapter, u32 bps, u32 domain) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3581,7 +3586,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, struct be_cmd_req_get_fn_privileges *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3613,7 +3618,7 @@ int be_cmd_get_fn_privileges(struct be_adapter *adapter, u32 *privilege, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3625,7 +3630,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, struct be_cmd_req_set_fn_privileges *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3645,7 +3650,7 @@ int be_cmd_set_fn_privileges(struct be_adapter *adapter, u32 privileges, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3677,7 +3682,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, return -ENOMEM; } - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3741,7 +3746,7 @@ int be_cmd_get_mac_from_list(struct be_adapter *adapter, u8 *mac, } out: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); dma_free_coherent(&adapter->pdev->dev, get_mac_list_cmd.size, get_mac_list_cmd.va, get_mac_list_cmd.dma); return status; @@ -3801,7 +3806,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, if (!cmd.va) return -ENOMEM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3823,7 +3828,7 @@ int be_cmd_set_mac_list(struct be_adapter *adapter, u8 *mac_array, err: dma_free_coherent(&adapter->pdev->dev, cmd.size, cmd.va, cmd.dma); - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3859,7 +3864,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, CMD_SUBSYSTEM_COMMON)) return -EPERM; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3900,7 +3905,7 @@ int be_cmd_set_hsw_config(struct be_adapter *adapter, u16 pvid, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -3914,7 +3919,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, int status; u16 vid; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -3961,7 +3966,7 @@ int be_cmd_get_hsw_config(struct be_adapter *adapter, u16 *pvid, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4127,6 +4132,10 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_get_ext_fat_caps *req; int status; + if (!be_cmd_allowed(adapter, OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, + CMD_SUBSYSTEM_COMMON)) + return -EPERM; + if (mutex_lock_interruptible(&adapter->mbox_lock)) return -1; @@ -4138,7 +4147,7 @@ int be_cmd_get_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_GET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); req->parameter_type = cpu_to_le32(1); @@ -4156,7 +4165,7 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, struct be_cmd_req_set_ext_fat_caps *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4167,12 +4176,12 @@ int be_cmd_set_ext_fat_capabilites(struct be_adapter *adapter, req = cmd->va; memcpy(&req->set_params, configs, sizeof(struct be_fat_conf_params)); be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, - OPCODE_COMMON_SET_EXT_FAT_CAPABILITES, + OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES, cmd->size, wrb, cmd); status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4373,7 +4382,7 @@ err: } /* This routine returns a list of all the NIC PF_nums in the adapter */ -u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) +static u16 be_get_nic_pf_num_list(u8 *buf, u32 desc_count, u16 *nic_pf_nums) { struct be_res_desc_hdr *hdr = (struct be_res_desc_hdr *)buf; struct be_pcie_res_desc *pcie = NULL; @@ -4525,7 +4534,7 @@ static int be_cmd_set_profile_config(struct be_adapter *adapter, void *desc, } /* Mark all fields invalid */ -void be_reset_nic_desc(struct be_nic_res_desc *nic) +static void be_reset_nic_desc(struct be_nic_res_desc *nic) { memset(nic, 0, sizeof(*nic)); nic->unicast_mac_count = 0xFFFF; @@ -4650,7 +4659,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) if (iface == 0xFFFFFFFF) return -1; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4667,7 +4676,7 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op) status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4701,7 +4710,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, struct be_cmd_resp_get_iface_list *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4722,7 +4731,7 @@ int be_cmd_get_if_id(struct be_adapter *adapter, struct be_vf_cfg *vf_cfg, } err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4816,7 +4825,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) if (BEx_chip(adapter)) return 0; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4834,7 +4843,7 @@ int be_cmd_enable_vf(struct be_adapter *adapter, u8 domain) req->enable = 1; status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4898,14 +4907,15 @@ err: return status; } -int __be_cmd_set_logical_link_config(struct be_adapter *adapter, - int link_state, int version, u8 domain) +static int +__be_cmd_set_logical_link_config(struct be_adapter *adapter, + int link_state, int version, u8 domain) { struct be_mcc_wrb *wrb; struct be_cmd_req_set_ll_link *req; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4931,7 +4941,7 @@ int __be_cmd_set_logical_link_config(struct be_adapter *adapter, status = be_mcc_notify_wait(adapter); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } @@ -4954,6 +4964,57 @@ int be_cmd_set_logical_link_config(struct be_adapter *adapter, 1, domain); return status; } + +int be_cmd_set_features(struct be_adapter *adapter) +{ + struct be_cmd_resp_set_features *resp; + struct be_cmd_req_set_features *req; + struct be_mcc_wrb *wrb; + int status; + + if (mutex_lock_interruptible(&adapter->mcc_lock)) + return -1; + + wrb = wrb_from_mccq(adapter); + if (!wrb) { + status = -EBUSY; + goto err; + } + + req = embedded_payload(wrb); + + be_wrb_cmd_hdr_prepare(&req->hdr, CMD_SUBSYSTEM_COMMON, + OPCODE_COMMON_SET_FEATURES, + sizeof(*req), wrb, NULL); + + req->features = cpu_to_le32(BE_FEATURE_UE_RECOVERY); + req->parameter_len = cpu_to_le32(sizeof(struct be_req_ue_recovery)); + req->parameter.req.uer = cpu_to_le32(BE_UE_RECOVERY_UER_MASK); + + status = be_mcc_notify_wait(adapter); + if (status) + goto err; + + resp = embedded_payload(wrb); + + adapter->error_recovery.ue_to_poll_time = + le16_to_cpu(resp->parameter.resp.ue2rp); + adapter->error_recovery.ue_to_reset_time = + le16_to_cpu(resp->parameter.resp.ue2sr); + adapter->error_recovery.recovery_supported = true; +err: + /* Checking "MCC_STATUS_INVALID_LENGTH" for SKH as FW + * returns this error in older firmware versions + */ + if (base_status(status) == MCC_STATUS_ILLEGAL_REQUEST || + base_status(status) == MCC_STATUS_INVALID_LENGTH) + dev_info(&adapter->pdev->dev, + "Adapter does not support HW error recovery\n"); + + mutex_unlock(&adapter->mcc_lock); + return status; +} + int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, int wrb_payload_size, u16 *cmd_status, u16 *ext_status) { @@ -4964,7 +5025,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, struct be_cmd_resp_hdr *resp; int status; - spin_lock_bh(&adapter->mcc_lock); + mutex_lock(&adapter->mcc_lock); wrb = wrb_from_mccq(adapter); if (!wrb) { @@ -4987,7 +5048,7 @@ int be_roce_mcc_cmd(void *netdev_handle, void *wrb_payload, memcpy(wrb_payload, resp, sizeof(*resp) + resp->response_length); be_dws_le_to_cpu(wrb_payload, sizeof(*resp) + resp->response_length); err: - spin_unlock_bh(&adapter->mcc_lock); + mutex_unlock(&adapter->mcc_lock); return status; } EXPORT_SYMBOL(be_roce_mcc_cmd); diff --git a/drivers/net/ethernet/emulex/benet/be_cmds.h b/drivers/net/ethernet/emulex/benet/be_cmds.h index 0d6be224a787..1bd82bcb3be5 100644 --- a/drivers/net/ethernet/emulex/benet/be_cmds.h +++ b/drivers/net/ethernet/emulex/benet/be_cmds.h @@ -58,7 +58,8 @@ enum mcc_base_status { MCC_STATUS_INSUFFICIENT_BUFFER = 4, MCC_STATUS_UNAUTHORIZED_REQUEST = 5, MCC_STATUS_NOT_SUPPORTED = 66, - MCC_STATUS_FEATURE_NOT_SUPPORTED = 68 + MCC_STATUS_FEATURE_NOT_SUPPORTED = 68, + MCC_STATUS_INVALID_LENGTH = 116 }; /* Additional status */ @@ -294,8 +295,8 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_GET_PHY_DETAILS 102 #define OPCODE_COMMON_SET_DRIVER_FUNCTION_CAP 103 #define OPCODE_COMMON_GET_CNTL_ADDITIONAL_ATTRIBUTES 121 -#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITES 125 -#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITES 126 +#define OPCODE_COMMON_GET_EXT_FAT_CAPABILITIES 125 +#define OPCODE_COMMON_SET_EXT_FAT_CAPABILITIES 126 #define OPCODE_COMMON_GET_MAC_LIST 147 #define OPCODE_COMMON_SET_MAC_LIST 148 #define OPCODE_COMMON_GET_HSW_CONFIG 152 @@ -308,6 +309,7 @@ struct be_mcc_mailbox { #define OPCODE_COMMON_READ_OBJECT 171 #define OPCODE_COMMON_WRITE_OBJECT 172 #define OPCODE_COMMON_DELETE_OBJECT 174 +#define OPCODE_COMMON_SET_FEATURES 191 #define OPCODE_COMMON_MANAGE_IFACE_FILTERS 193 #define OPCODE_COMMON_GET_IFACE_LIST 194 #define OPCODE_COMMON_ENABLE_DISABLE_VF 196 @@ -2315,6 +2317,41 @@ struct be_cmd_resp_get_iface_list { struct be_if_desc if_desc; }; +/************** Set Features *******************/ +#define BE_FEATURE_UE_RECOVERY 0x10 +#define BE_UE_RECOVERY_UER_MASK 0x1 + +struct be_req_ue_recovery { + u32 uer; + u32 rsvd; +}; + +struct be_cmd_req_set_features { + struct be_cmd_req_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_req_ue_recovery req; + u32 rsvd[2]; + } parameter; +}; + +struct be_resp_ue_recovery { + u32 uer; + u16 ue2rp; + u16 ue2sr; +}; + +struct be_cmd_resp_set_features { + struct be_cmd_resp_hdr hdr; + u32 features; + u32 parameter_len; + union { + struct be_resp_ue_recovery resp; + u32 rsvd[2]; + } parameter; +}; + /*************** Set logical link ********************/ #define PLINK_ENABLE BIT(0) #define PLINK_TRACK BIT(8) @@ -2343,6 +2380,7 @@ struct be_cmd_req_manage_iface_filters { u32 cap_control_flags; } __packed; +u16 be_POST_stage_get(struct be_adapter *adapter); int be_pci_fnum_get(struct be_adapter *adapter); int be_fw_wait_ready(struct be_adapter *adapter); int be_cmd_mac_addr_query(struct be_adapter *adapter, u8 *mac_addr, @@ -2470,3 +2508,4 @@ int be_cmd_manage_iface(struct be_adapter *adapter, u32 iface, u8 op); int be_cmd_set_sriov_config(struct be_adapter *adapter, struct be_resources res, u16 num_vfs, struct be_resources *vft_res); +int be_cmd_set_features(struct be_adapter *adapter); diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c index 50e7be5da50c..0a48a31225e6 100644 --- a/drivers/net/ethernet/emulex/benet/be_ethtool.c +++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c @@ -421,6 +421,10 @@ static void be_get_ethtool_stats(struct net_device *netdev, } } +static const char be_priv_flags[][ETH_GSTRING_LEN] = { + "disable-tpe-recovery" +}; + static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, uint8_t *data) { @@ -454,6 +458,10 @@ static void be_get_stat_strings(struct net_device *netdev, uint32_t stringset, data += ETH_GSTRING_LEN; } break; + case ETH_SS_PRIV_FLAGS: + for (i = 0; i < ARRAY_SIZE(be_priv_flags); i++) + strcpy(data + i * ETH_GSTRING_LEN, be_priv_flags[i]); + break; } } @@ -468,6 +476,8 @@ static int be_get_sset_count(struct net_device *netdev, int stringset) return ETHTOOL_STATS_NUM + adapter->num_rx_qs * ETHTOOL_RXSTATS_NUM + adapter->num_tx_qs * ETHTOOL_TXSTATS_NUM; + case ETH_SS_PRIV_FLAGS: + return ARRAY_SIZE(be_priv_flags); default: return -EINVAL; } @@ -1360,6 +1370,34 @@ err: return be_cmd_status(status); } +static u32 be_get_priv_flags(struct net_device *netdev) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + return adapter->priv_flags; +} + +static int be_set_priv_flags(struct net_device *netdev, u32 flags) +{ + struct be_adapter *adapter = netdev_priv(netdev); + bool tpe_old = !!(adapter->priv_flags & BE_DISABLE_TPE_RECOVERY); + bool tpe_new = !!(flags & BE_DISABLE_TPE_RECOVERY); + + if (tpe_old != tpe_new) { + if (tpe_new) { + adapter->priv_flags |= BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is disabled\n"); + } else { + adapter->priv_flags &= ~BE_DISABLE_TPE_RECOVERY; + dev_info(&adapter->pdev->dev, + "HW error recovery is enabled\n"); + } + } + + return 0; +} + const struct ethtool_ops be_ethtool_ops = { .get_settings = be_get_settings, .get_drvinfo = be_get_drvinfo, @@ -1373,6 +1411,8 @@ const struct ethtool_ops be_ethtool_ops = { .get_ringparam = be_get_ringparam, .get_pauseparam = be_get_pauseparam, .set_pauseparam = be_set_pauseparam, + .set_priv_flags = be_set_priv_flags, + .get_priv_flags = be_get_priv_flags, .get_strings = be_get_stat_strings, .set_phys_id = be_set_phys_id, .set_dump = be_set_dump, diff --git a/drivers/net/ethernet/emulex/benet/be_hw.h b/drivers/net/ethernet/emulex/benet/be_hw.h index c684bb32b487..92942c84d329 100644 --- a/drivers/net/ethernet/emulex/benet/be_hw.h +++ b/drivers/net/ethernet/emulex/benet/be_hw.h @@ -32,18 +32,23 @@ #define MPU_EP_CONTROL 0 /********** MPU semphore: used for SH & BE *************/ +#define SLIPORT_SOFTRESET_OFFSET 0x5c /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_BEx 0xac /* CSR BAR offset */ #define SLIPORT_SEMAPHORE_OFFSET_SH 0x94 /* PCI-CFG offset */ #define POST_STAGE_MASK 0x0000FFFF #define POST_ERR_MASK 0x1 #define POST_ERR_SHIFT 31 +#define POST_ERR_RECOVERY_CODE_MASK 0xFFF + +/* Soft Reset register masks */ +#define SLIPORT_SOFTRESET_SR_MASK 0x00000080 /* SR bit */ /* MPU semphore POST stage values */ #define POST_STAGE_AWAITING_HOST_RDY 0x1 /* FW awaiting goahead from host */ #define POST_STAGE_HOST_RDY 0x2 /* Host has given go-ahed to FW */ #define POST_STAGE_BE_RESET 0x3 /* Host wants to reset chip */ #define POST_STAGE_ARMFW_RDY 0xc000 /* FW is done with POST */ - +#define POST_STAGE_RECOVERABLE_ERR 0xE000 /* Recoverable err detected */ /* Lancer SLIPORT registers */ #define SLIPORT_STATUS_OFFSET 0x404 diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c index 874c7539a79d..dcb930a52613 100644 --- a/drivers/net/ethernet/emulex/benet/be_main.c +++ b/drivers/net/ethernet/emulex/benet/be_main.c @@ -41,6 +41,11 @@ static ushort rx_frag_size = 2048; module_param(rx_frag_size, ushort, S_IRUGO); MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data."); +/* Per-module error detection/recovery workq shared across all functions. + * Each function schedules its own work request on this shared workq. + */ +static struct workqueue_struct *be_err_recovery_workq; + static const struct pci_device_id be_dev_ids[] = { { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) }, { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) }, @@ -53,6 +58,10 @@ static const struct pci_device_id be_dev_ids[] = { { 0 } }; MODULE_DEVICE_TABLE(pci, be_dev_ids); + +/* Workqueue used by all functions for defering cmd calls to the adapter */ +static struct workqueue_struct *be_wq; + /* UE Status Low CSR */ static const char * const ue_status_low_desc[] = { "CEV", @@ -260,6 +269,38 @@ void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped) iowrite32(val, adapter->db + DB_CQ_OFFSET); } +static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac) +{ + int i; + + /* Check if mac has already been added as part of uc-list */ + for (i = 0; i < adapter->uc_macs; i++) { + if (ether_addr_equal((u8 *)&adapter->uc_list[i * ETH_ALEN], + mac)) { + /* mac already added, skip addition */ + adapter->pmac_id[0] = adapter->pmac_id[i + 1]; + return 0; + } + } + + return be_cmd_pmac_add(adapter, mac, adapter->if_handle, + &adapter->pmac_id[0], 0); +} + +static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id) +{ + int i; + + /* Skip deletion if the programmed mac is + * being used in uc-list + */ + for (i = 0; i < adapter->uc_macs; i++) { + if (adapter->pmac_id[i + 1] == pmac_id) + return; + } + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); +} + static int be_mac_addr_set(struct net_device *netdev, void *p) { struct be_adapter *adapter = netdev_priv(netdev); @@ -267,7 +308,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) struct sockaddr *addr = p; int status; u8 mac[ETH_ALEN]; - u32 old_pmac_id = adapter->pmac_id[0], curr_pmac_id = 0; + u32 old_pmac_id = adapter->pmac_id[0]; if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; @@ -275,7 +316,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) /* Proceed further only if, User provided MAC is different * from active MAC */ - if (ether_addr_equal(addr->sa_data, netdev->dev_addr)) + if (ether_addr_equal(addr->sa_data, adapter->dev_mac)) return 0; /* if device is not running, copy MAC to netdev->dev_addr */ @@ -288,23 +329,22 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) * FILTMGMT privilege. This failure is OK, only if the PF programmed * the MAC for the VF. */ - status = be_cmd_pmac_add(adapter, (u8 *)addr->sa_data, - adapter->if_handle, &adapter->pmac_id[0], 0); + mutex_lock(&adapter->rx_filter_lock); + status = be_dev_mac_add(adapter, (u8 *)addr->sa_data); if (!status) { - curr_pmac_id = adapter->pmac_id[0]; /* Delete the old programmed MAC. This call may fail if the * old MAC was already deleted by the PF driver. */ if (adapter->pmac_id[0] != old_pmac_id) - be_cmd_pmac_del(adapter, adapter->if_handle, - old_pmac_id, 0); + be_dev_mac_del(adapter, old_pmac_id); } + mutex_unlock(&adapter->rx_filter_lock); /* Decide if the new MAC is successfully activated only after * querying the FW */ - status = be_cmd_get_active_mac(adapter, curr_pmac_id, mac, + status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac, adapter->if_handle, true, 0); if (status) goto err; @@ -317,6 +357,7 @@ static int be_mac_addr_set(struct net_device *netdev, void *p) goto err; } done: + ether_addr_copy(adapter->dev_mac, addr->sa_data); ether_addr_copy(netdev->dev_addr, addr->sa_data); dev_info(dev, "MAC address changed to %pM\n", addr->sa_data); return 0; @@ -1420,13 +1461,18 @@ static int be_vid_config(struct be_adapter *adapter) u16 num = 0, i = 0; int status = 0; - /* No need to further configure vids if in promiscuous mode */ - if (be_in_all_promisc(adapter)) + /* No need to change the VLAN state if the I/F is in promiscuous */ + if (adapter->netdev->flags & IFF_PROMISC) return 0; if (adapter->vlans_added > be_max_vlans(adapter)) return be_set_vlan_promisc(adapter); + if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { + status = be_clear_vlan_promisc(adapter); + if (status) + return status; + } /* Construct VLAN Table to give to HW */ for_each_set_bit(i, adapter->vids, VLAN_N_VID) vids[num++] = cpu_to_le16(i); @@ -1439,8 +1485,6 @@ static int be_vid_config(struct be_adapter *adapter) addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES) return be_set_vlan_promisc(adapter); - } else if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) { - status = be_clear_vlan_promisc(adapter); } return status; } @@ -1450,46 +1494,45 @@ static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid) struct be_adapter *adapter = netdev_priv(netdev); int status = 0; + mutex_lock(&adapter->rx_filter_lock); + /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) - return status; + goto done; if (test_bit(vid, adapter->vids)) - return status; + goto done; set_bit(vid, adapter->vids); adapter->vlans_added++; status = be_vid_config(adapter); - if (status) { - adapter->vlans_added--; - clear_bit(vid, adapter->vids); - } - +done: + mutex_unlock(&adapter->rx_filter_lock); return status; } static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid) { struct be_adapter *adapter = netdev_priv(netdev); + int status = 0; + + mutex_lock(&adapter->rx_filter_lock); /* Packets with VID 0 are always received by Lancer by default */ if (lancer_chip(adapter) && vid == 0) - return 0; + goto done; if (!test_bit(vid, adapter->vids)) - return 0; + goto done; clear_bit(vid, adapter->vids); adapter->vlans_added--; - return be_vid_config(adapter); -} - -static void be_clear_all_promisc(struct be_adapter *adapter) -{ - be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, OFF); - adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS; + status = be_vid_config(adapter); +done: + mutex_unlock(&adapter->rx_filter_lock); + return status; } static void be_set_all_promisc(struct be_adapter *adapter) @@ -1510,75 +1553,226 @@ static void be_set_mc_promisc(struct be_adapter *adapter) adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS; } -static void be_set_mc_list(struct be_adapter *adapter) +static void be_set_uc_promisc(struct be_adapter *adapter) { int status; - status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON); + if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) + return; + + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON); if (!status) - adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS; - else + adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS; +} + +static void be_clear_uc_promisc(struct be_adapter *adapter) +{ + int status; + + if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)) + return; + + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF); + if (!status) + adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS; +} + +/* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync(). + * We use a single callback function for both sync and unsync. We really don't + * add/remove addresses through this callback. But, we use it to detect changes + * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode(). + */ +static int be_uc_list_update(struct net_device *netdev, + const unsigned char *addr) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + adapter->update_uc_list = true; + return 0; +} + +static int be_mc_list_update(struct net_device *netdev, + const unsigned char *addr) +{ + struct be_adapter *adapter = netdev_priv(netdev); + + adapter->update_mc_list = true; + return 0; +} + +static void be_set_mc_list(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct netdev_hw_addr *ha; + bool mc_promisc = false; + int status; + + netif_addr_lock_bh(netdev); + __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update); + + if (netdev->flags & IFF_PROMISC) { + adapter->update_mc_list = false; + } else if (netdev->flags & IFF_ALLMULTI || + netdev_mc_count(netdev) > be_max_mc(adapter)) { + /* Enable multicast promisc if num configured exceeds + * what we support + */ + mc_promisc = true; + adapter->update_mc_list = false; + } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) { + /* Update mc-list unconditionally if the iface was previously + * in mc-promisc mode and now is out of that mode. + */ + adapter->update_mc_list = true; + } + + if (adapter->update_mc_list) { + int i = 0; + + /* cache the mc-list in adapter */ + netdev_for_each_mc_addr(ha, netdev) { + ether_addr_copy(adapter->mc_list[i].mac, ha->addr); + i++; + } + adapter->mc_count = netdev_mc_count(netdev); + } + netif_addr_unlock_bh(netdev); + + if (mc_promisc) { be_set_mc_promisc(adapter); + } else if (adapter->update_mc_list) { + status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON); + if (!status) + adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS; + else + be_set_mc_promisc(adapter); + + adapter->update_mc_list = false; + } +} + +static void be_clear_mc_list(struct be_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + __dev_mc_unsync(netdev, NULL); + be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF); + adapter->mc_count = 0; +} + +static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx) +{ + if (ether_addr_equal((u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->dev_mac)) { + adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0]; + return 0; + } + + return be_cmd_pmac_add(adapter, + (u8 *)&adapter->uc_list[uc_idx * ETH_ALEN], + adapter->if_handle, + &adapter->pmac_id[uc_idx + 1], 0); +} + +static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id) +{ + if (pmac_id == adapter->pmac_id[0]) + return; + + be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0); } static void be_set_uc_list(struct be_adapter *adapter) { + struct net_device *netdev = adapter->netdev; struct netdev_hw_addr *ha; - int i = 1; /* First slot is claimed by the Primary MAC */ + bool uc_promisc = false; + int curr_uc_macs = 0, i; - for (; adapter->uc_macs > 0; adapter->uc_macs--, i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); + netif_addr_lock_bh(netdev); + __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update); - if (netdev_uc_count(adapter->netdev) > be_max_uc(adapter)) { - be_set_all_promisc(adapter); - return; + if (netdev->flags & IFF_PROMISC) { + adapter->update_uc_list = false; + } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) { + uc_promisc = true; + adapter->update_uc_list = false; + } else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) { + /* Update uc-list unconditionally if the iface was previously + * in uc-promisc mode and now is out of that mode. + */ + adapter->update_uc_list = true; } - netdev_for_each_uc_addr(ha, adapter->netdev) { - adapter->uc_macs++; /* First slot is for Primary MAC */ - be_cmd_pmac_add(adapter, (u8 *)ha->addr, adapter->if_handle, - &adapter->pmac_id[adapter->uc_macs], 0); + if (adapter->update_uc_list) { + i = 1; /* First slot is claimed by the Primary MAC */ + + /* cache the uc-list in adapter array */ + netdev_for_each_uc_addr(ha, netdev) { + ether_addr_copy(adapter->uc_list[i].mac, ha->addr); + i++; + } + curr_uc_macs = netdev_uc_count(netdev); + } + netif_addr_unlock_bh(netdev); + + if (uc_promisc) { + be_set_uc_promisc(adapter); + } else if (adapter->update_uc_list) { + be_clear_uc_promisc(adapter); + + for (i = 0; i < adapter->uc_macs; i++) + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); + + for (i = 0; i < curr_uc_macs; i++) + be_uc_mac_add(adapter, i); + adapter->uc_macs = curr_uc_macs; + adapter->update_uc_list = false; } } static void be_clear_uc_list(struct be_adapter *adapter) { + struct net_device *netdev = adapter->netdev; int i; - for (i = 1; i < (adapter->uc_macs + 1); i++) - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[i], 0); + __dev_uc_unsync(netdev, NULL); + for (i = 0; i < adapter->uc_macs; i++) + be_uc_mac_del(adapter, adapter->pmac_id[i + 1]); + adapter->uc_macs = 0; } -static void be_set_rx_mode(struct net_device *netdev) +static void __be_set_rx_mode(struct be_adapter *adapter) { - struct be_adapter *adapter = netdev_priv(netdev); + struct net_device *netdev = adapter->netdev; + + mutex_lock(&adapter->rx_filter_lock); if (netdev->flags & IFF_PROMISC) { - be_set_all_promisc(adapter); - return; + if (!be_in_all_promisc(adapter)) + be_set_all_promisc(adapter); + } else if (be_in_all_promisc(adapter)) { + /* We need to re-program the vlan-list or clear + * vlan-promisc mode (if needed) when the interface + * comes out of promisc mode. + */ + be_vid_config(adapter); } - /* Interface was previously in promiscuous mode; disable it */ - if (be_in_all_promisc(adapter)) { - be_clear_all_promisc(adapter); - if (adapter->vlans_added) - be_vid_config(adapter); - } - - /* Enable multicast promisc if num configured exceeds what we support */ - if (netdev->flags & IFF_ALLMULTI || - netdev_mc_count(netdev) > be_max_mc(adapter)) { - be_set_mc_promisc(adapter); - return; - } - - if (netdev_uc_count(netdev) != adapter->uc_macs) - be_set_uc_list(adapter); - + be_set_uc_list(adapter); be_set_mc_list(adapter); + + mutex_unlock(&adapter->rx_filter_lock); +} + +static void be_work_set_rx_mode(struct work_struct *work) +{ + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + + __be_set_rx_mode(cmd_work->adapter); + kfree(cmd_work); } static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) @@ -1701,7 +1895,8 @@ static int be_clear_vf_tvt(struct be_adapter *adapter, int vf) return 0; } -static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct be_adapter *adapter = netdev_priv(netdev); struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf]; @@ -1713,6 +1908,9 @@ static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan || qos) { vlan |= qos << VLAN_PRIO_SHIFT; status = be_set_vf_tvt(adapter, vf, vlan); @@ -3220,9 +3418,7 @@ void be_detect_error(struct be_adapter *adapter) */ if (ue_lo || ue_hi) { - dev_err(dev, - "Unrecoverable Error detected in the adapter"); - dev_err(dev, "Please reboot server to recover"); + dev_err(dev, "Error detected in the adapter"); if (skyhawk_chip(adapter)) be_set_error(adapter, BE_ERROR_UE); @@ -3425,10 +3621,9 @@ static void be_rx_qs_destroy(struct be_adapter *adapter) static void be_disable_if_filters(struct be_adapter *adapter) { - be_cmd_pmac_del(adapter, adapter->if_handle, - adapter->pmac_id[0], 0); - + be_dev_mac_del(adapter, adapter->pmac_id[0]); be_clear_uc_list(adapter); + be_clear_mc_list(adapter); /* The IFACE flags are enabled in the open path and cleared * in the close path. When a VF gets detached from the host and @@ -3462,6 +3657,11 @@ static int be_close(struct net_device *netdev) if (!(adapter->flags & BE_FLAGS_SETUP_DONE)) return 0; + /* Before attempting cleanup ensure all the pending cmds in the + * config_wq have finished execution + */ + flush_workqueue(be_wq); + be_disable_if_filters(adapter); if (adapter->flags & BE_FLAGS_NAPI_ENABLED) { @@ -3576,17 +3776,16 @@ static int be_enable_if_filters(struct be_adapter *adapter) /* For BE3 VFs, the PF programs the initial MAC address */ if (!(BEx_chip(adapter) && be_virtfn(adapter))) { - status = be_cmd_pmac_add(adapter, adapter->netdev->dev_addr, - adapter->if_handle, - &adapter->pmac_id[0], 0); + status = be_dev_mac_add(adapter, adapter->netdev->dev_addr); if (status) return status; + ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr); } if (adapter->vlans_added) be_vid_config(adapter); - be_set_rx_mode(adapter->netdev); + __be_set_rx_mode(adapter); return 0; } @@ -3759,8 +3958,13 @@ static void be_cancel_worker(struct be_adapter *adapter) static void be_cancel_err_detection(struct be_adapter *adapter) { + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) { - cancel_delayed_work_sync(&adapter->be_err_detection_work); + cancel_delayed_work_sync(&err_rec->err_detection_work); adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED; } } @@ -3860,6 +4064,20 @@ static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs, vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1); } +static void be_if_destroy(struct be_adapter *adapter) +{ + be_cmd_if_destroy(adapter, adapter->if_handle, 0); + + kfree(adapter->pmac_id); + adapter->pmac_id = NULL; + + kfree(adapter->mc_list); + adapter->mc_list = NULL; + + kfree(adapter->uc_list); + adapter->uc_list = NULL; +} + static int be_clear(struct be_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; @@ -3867,6 +4085,8 @@ static int be_clear(struct be_adapter *adapter) be_cancel_worker(adapter); + flush_workqueue(be_wq); + if (sriov_enabled(adapter)) be_vf_clear(adapter); @@ -3884,10 +4104,8 @@ static int be_clear(struct be_adapter *adapter) } be_disable_vxlan_offloads(adapter); - kfree(adapter->pmac_id); - adapter->pmac_id = NULL; - be_cmd_if_destroy(adapter, adapter->if_handle, 0); + be_if_destroy(adapter); be_clear_queues(adapter); @@ -4151,7 +4369,7 @@ static void be_setup_init(struct be_adapter *adapter) * for distribution between the VFs. This self-imposed limit will determine the * no: of VFs for which RSS can be enabled. */ -void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) +static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter) { struct be_port_resources port_res = {0}; u8 rss_tables_on_port; @@ -4341,14 +4559,29 @@ static int be_mac_setup(struct be_adapter *adapter) static void be_schedule_worker(struct be_adapter *adapter) { - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); + queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000)); adapter->flags |= BE_FLAGS_WORKER_SCHEDULED; } +static void be_destroy_err_recovery_workq(void) +{ + if (!be_err_recovery_workq) + return; + + flush_workqueue(be_err_recovery_workq); + destroy_workqueue(be_err_recovery_workq); + be_err_recovery_workq = NULL; +} + static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay) { - schedule_delayed_work(&adapter->be_err_detection_work, - msecs_to_jiffies(delay)); + struct be_error_recovery *err_rec = &adapter->error_recovery; + + if (!be_err_recovery_workq) + return; + + queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work, + msecs_to_jiffies(delay)); adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED; } @@ -4393,6 +4626,22 @@ static int be_if_create(struct be_adapter *adapter) u32 cap_flags = be_if_cap_flags(adapter); int status; + /* alloc required memory for other filtering fields */ + adapter->pmac_id = kcalloc(be_max_uc(adapter), + sizeof(*adapter->pmac_id), GFP_KERNEL); + if (!adapter->pmac_id) + return -ENOMEM; + + adapter->mc_list = kcalloc(be_max_mc(adapter), + sizeof(*adapter->mc_list), GFP_KERNEL); + if (!adapter->mc_list) + return -ENOMEM; + + adapter->uc_list = kcalloc(be_max_uc(adapter), + sizeof(*adapter->uc_list), GFP_KERNEL); + if (!adapter->uc_list) + return -ENOMEM; + if (adapter->cfg_num_rx_irqs == 1) cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS); @@ -4401,7 +4650,10 @@ static int be_if_create(struct be_adapter *adapter) status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags, &adapter->if_handle, 0); - return status; + if (status) + return status; + + return 0; } int be_update_queues(struct be_adapter *adapter) @@ -4458,10 +4710,15 @@ static inline int fw_major_num(const char *fw_ver) return fw_major; } -/* If any VFs are already enabled don't FLR the PF */ +/* If it is error recovery, FLR the PF + * Else if any VFs are already enabled don't FLR the PF + */ static bool be_reset_required(struct be_adapter *adapter) { - return pci_num_vf(adapter->pdev) ? false : true; + if (be_error_recovering(adapter)) + return true; + else + return pci_num_vf(adapter->pdev) == 0; } /* Wait for the FW to be ready and perform the required initialization */ @@ -4473,6 +4730,9 @@ static int be_func_init(struct be_adapter *adapter) if (status) return status; + /* FW is now ready; clear errors to allow cmds/doorbell */ + be_clear_error(adapter, BE_CLEAR_ALL); + if (be_reset_required(adapter)) { status = be_cmd_reset_function(adapter); if (status) @@ -4480,9 +4740,6 @@ static int be_func_init(struct be_adapter *adapter) /* Wait for interrupts to quiesce after an FLR */ msleep(100); - - /* We can clear all errors when function reset succeeds */ - be_clear_error(adapter, BE_CLEAR_ALL); } /* Tell FW we're ready to fire cmds */ @@ -4530,11 +4787,6 @@ static int be_setup(struct be_adapter *adapter) if (status) goto err; - adapter->pmac_id = kcalloc(be_max_uc(adapter), - sizeof(*adapter->pmac_id), GFP_KERNEL); - if (!adapter->pmac_id) - return -ENOMEM; - status = be_msix_enable(adapter); if (status) goto err; @@ -4595,6 +4847,9 @@ static int be_setup(struct be_adapter *adapter) if (!status && be_pause_supported(adapter)) adapter->phy.fc_autoneg = 1; + if (be_physfn(adapter) && !lancer_chip(adapter)) + be_cmd_set_features(adapter); + be_schedule_worker(adapter); adapter->flags |= BE_FLAGS_SETUP_DONE; return 0; @@ -4728,6 +4983,23 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, 0, 0, nlflags, filter_mask, NULL); } +static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter, + void (*func)(struct work_struct *)) +{ + struct be_cmd_work *work; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) { + dev_err(&adapter->pdev->dev, + "be_work memory allocation failed\n"); + return NULL; + } + + INIT_WORK(&work->work, func); + work->adapter = adapter; + return work; +} + /* VxLAN offload Notes: * * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't @@ -4742,23 +5014,19 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, * adds more than one port, disable offloads and don't re-enable them again * until after all the tunnels are removed. */ -static void be_add_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void be_work_add_vxlan_port(struct work_struct *work) { - struct be_adapter *adapter = netdev_priv(netdev); + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + struct be_adapter *adapter = cmd_work->adapter; + struct net_device *netdev = adapter->netdev; struct device *dev = &adapter->pdev->dev; - __be16 port = ti->port; + __be16 port = cmd_work->info.vxlan_port; int status; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) - return; - if (adapter->vxlan_port == port && adapter->vxlan_port_count) { adapter->vxlan_port_aliases++; - return; + goto done; } if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) { @@ -4770,7 +5038,7 @@ static void be_add_vxlan_port(struct net_device *netdev, } if (adapter->vxlan_port_count++ >= 1) - return; + goto done; status = be_cmd_manage_iface(adapter, adapter->if_handle, OP_CONVERT_NORMAL_TO_TUNNEL); @@ -4795,29 +5063,26 @@ static void be_add_vxlan_port(struct net_device *netdev, dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n", be16_to_cpu(port)); - return; + goto done; err: be_disable_vxlan_offloads(adapter); +done: + kfree(cmd_work); } -static void be_del_vxlan_port(struct net_device *netdev, - struct udp_tunnel_info *ti) +static void be_work_del_vxlan_port(struct work_struct *work) { - struct be_adapter *adapter = netdev_priv(netdev); - __be16 port = ti->port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; - - if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) - return; + struct be_cmd_work *cmd_work = + container_of(work, struct be_cmd_work, work); + struct be_adapter *adapter = cmd_work->adapter; + __be16 port = cmd_work->info.vxlan_port; if (adapter->vxlan_port != port) goto done; if (adapter->vxlan_port_aliases) { adapter->vxlan_port_aliases--; - return; + goto out; } be_disable_vxlan_offloads(adapter); @@ -4827,6 +5092,40 @@ static void be_del_vxlan_port(struct net_device *netdev, be16_to_cpu(port)); done: adapter->vxlan_port_count--; +out: + kfree(cmd_work); +} + +static void be_cfg_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti, + void (*func)(struct work_struct *)) +{ + struct be_adapter *adapter = netdev_priv(netdev); + struct be_cmd_work *cmd_work; + + if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + return; + + if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter)) + return; + + cmd_work = be_alloc_work(adapter, func); + if (cmd_work) { + cmd_work->info.vxlan_port = ti->port; + queue_work(be_wq, &cmd_work->work); + } +} + +static void be_del_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port); +} + +static void be_add_vxlan_port(struct net_device *netdev, + struct udp_tunnel_info *ti) +{ + be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port); } static netdev_features_t be_features_check(struct sk_buff *skb, @@ -4891,6 +5190,16 @@ static int be_get_phys_port_id(struct net_device *dev, return 0; } +static void be_set_rx_mode(struct net_device *dev) +{ + struct be_adapter *adapter = netdev_priv(dev); + struct be_cmd_work *work; + + work = be_alloc_work(adapter, be_work_set_rx_mode); + if (work) + queue_work(be_wq, &work->work); +} + static const struct net_device_ops be_netdev_ops = { .ndo_open = be_open, .ndo_stop = be_close, @@ -4984,13 +5293,145 @@ static int be_resume(struct be_adapter *adapter) return 0; } +static void be_soft_reset(struct be_adapter *adapter) +{ + u32 val; + + dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n"); + val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); + val |= SLIPORT_SOFTRESET_SR_MASK; + iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET); +} + +static bool be_err_is_recoverable(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + unsigned long initial_idle_time = + msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME); + unsigned long recovery_interval = + msecs_to_jiffies(ERR_RECOVERY_INTERVAL); + u16 ue_err_code; + u32 val; + + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR) + return false; + ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK; + if (ue_err_code == 0) + return false; + + dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n", + ue_err_code); + + if (jiffies - err_rec->probe_time <= initial_idle_time) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from driver load\n", + jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC); + return false; + } + + if (err_rec->last_recovery_time && + (jiffies - err_rec->last_recovery_time <= recovery_interval)) { + dev_err(&adapter->pdev->dev, + "Cannot recover within %lu sec from last recovery\n", + jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC); + return false; + } + + if (ue_err_code == err_rec->last_err_code) { + dev_err(&adapter->pdev->dev, + "Cannot recover from a consecutive TPE error\n"); + return false; + } + + err_rec->last_recovery_time = jiffies; + err_rec->last_err_code = ue_err_code; + return true; +} + +static int be_tpe_recover(struct be_adapter *adapter) +{ + struct be_error_recovery *err_rec = &adapter->error_recovery; + int status = -EAGAIN; + u32 val; + + switch (err_rec->recovery_state) { + case ERR_RECOVERY_ST_NONE: + err_rec->recovery_state = ERR_RECOVERY_ST_DETECT; + err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_DETECT: + val = be_POST_stage_get(adapter); + if ((val & POST_STAGE_RECOVERABLE_ERR) != + POST_STAGE_RECOVERABLE_ERR) { + dev_err(&adapter->pdev->dev, + "Unrecoverable HW error detected: 0x%x\n", val); + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n"); + + /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR + * milliseconds before it checks for final error status in + * SLIPORT_SEMAPHORE to determine if recovery criteria is met. + * If it does, then PF0 initiates a Soft Reset. + */ + if (adapter->pf_num == 0) { + err_rec->recovery_state = ERR_RECOVERY_ST_RESET; + err_rec->resched_delay = err_rec->ue_to_reset_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + } + + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + ERR_RECOVERY_UE_DETECT_DURATION; + break; + + case ERR_RECOVERY_ST_RESET: + if (!be_err_is_recoverable(adapter)) { + dev_err(&adapter->pdev->dev, + "Failed to meet recovery criteria\n"); + status = -EIO; + err_rec->resched_delay = 0; + break; + } + be_soft_reset(adapter); + err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL; + err_rec->resched_delay = err_rec->ue_to_poll_time - + err_rec->ue_to_reset_time; + break; + + case ERR_RECOVERY_ST_PRE_POLL: + err_rec->recovery_state = ERR_RECOVERY_ST_REINIT; + err_rec->resched_delay = 0; + status = 0; /* done */ + break; + + default: + status = -EINVAL; + err_rec->resched_delay = 0; + break; + } + + return status; +} + static int be_err_recover(struct be_adapter *adapter) { int status; - /* Error recovery is supported only Lancer as of now */ - if (!lancer_chip(adapter)) - return -EIO; + if (!lancer_chip(adapter)) { + if (!adapter->error_recovery.recovery_supported || + adapter->priv_flags & BE_DISABLE_TPE_RECOVERY) + return -EIO; + status = be_tpe_recover(adapter); + if (status) + goto err; + } /* Wait for adapter to reach quiescent state before * destroying queues @@ -4999,59 +5440,74 @@ static int be_err_recover(struct be_adapter *adapter) if (status) goto err; + adapter->flags |= BE_FLAGS_TRY_RECOVERY; + be_cleanup(adapter); status = be_resume(adapter); if (status) goto err; - return 0; + adapter->flags &= ~BE_FLAGS_TRY_RECOVERY; + err: return status; } static void be_err_detection_task(struct work_struct *work) { + struct be_error_recovery *err_rec = + container_of(work, struct be_error_recovery, + err_detection_work.work); struct be_adapter *adapter = - container_of(work, struct be_adapter, - be_err_detection_work.work); + container_of(err_rec, struct be_adapter, + error_recovery); + u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY; struct device *dev = &adapter->pdev->dev; int recovery_status; - int delay = ERR_DETECTION_DELAY; be_detect_error(adapter); - - if (be_check_error(adapter, BE_ERROR_HW)) - recovery_status = be_err_recover(adapter); - else + if (!be_check_error(adapter, BE_ERROR_HW)) goto reschedule_task; + recovery_status = be_err_recover(adapter); if (!recovery_status) { - adapter->recovery_retries = 0; + err_rec->recovery_retries = 0; + err_rec->recovery_state = ERR_RECOVERY_ST_NONE; dev_info(dev, "Adapter recovery successful\n"); goto reschedule_task; - } else if (be_virtfn(adapter)) { + } else if (!lancer_chip(adapter) && err_rec->resched_delay) { + /* BEx/SH recovery state machine */ + if (adapter->pf_num == 0 && + err_rec->recovery_state > ERR_RECOVERY_ST_DETECT) + dev_err(&adapter->pdev->dev, + "Adapter recovery in progress\n"); + resched_delay = err_rec->resched_delay; + goto reschedule_task; + } else if (lancer_chip(adapter) && be_virtfn(adapter)) { /* For VFs, check if PF have allocated resources * every second. */ dev_err(dev, "Re-trying adapter recovery\n"); goto reschedule_task; - } else if (adapter->recovery_retries++ < - MAX_ERR_RECOVERY_RETRY_COUNT) { + } else if (lancer_chip(adapter) && err_rec->recovery_retries++ < + ERR_RECOVERY_MAX_RETRY_COUNT) { /* In case of another error during recovery, it takes 30 sec * for adapter to come out of error. Retry error recovery after * this time interval. */ dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n"); - delay = ERR_RECOVERY_RETRY_DELAY; + resched_delay = ERR_RECOVERY_RETRY_DELAY; goto reschedule_task; } else { dev_err(dev, "Adapter recovery failed\n"); + dev_err(dev, "Please reboot server to recover\n"); } return; + reschedule_task: - be_schedule_err_detection(adapter, delay); + be_schedule_err_detection(adapter, resched_delay); } static void be_log_sfp_info(struct be_adapter *adapter) @@ -5116,7 +5572,7 @@ static void be_worker(struct work_struct *work) reschedule: adapter->work_counter++; - schedule_delayed_work(&adapter->work, msecs_to_jiffies(1000)); + queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000)); } static void be_unmap_pci_bars(struct be_adapter *adapter) @@ -5256,14 +5712,18 @@ static int be_drv_init(struct be_adapter *adapter) } mutex_init(&adapter->mbox_lock); - spin_lock_init(&adapter->mcc_lock); + mutex_init(&adapter->mcc_lock); + mutex_init(&adapter->rx_filter_lock); spin_lock_init(&adapter->mcc_cq_lock); init_completion(&adapter->et_cmd_compl); pci_save_state(adapter->pdev); INIT_DELAYED_WORK(&adapter->work, be_worker); - INIT_DELAYED_WORK(&adapter->be_err_detection_work, + + adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE; + adapter->error_recovery.resched_delay = 0; + INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work, be_err_detection_task); adapter->rx_fc = true; @@ -5298,6 +5758,9 @@ static void be_remove(struct pci_dev *pdev) be_clear(adapter); + if (!pci_vfs_assigned(adapter->pdev)) + be_cmd_reset_function(adapter); + /* tell fw we're done with firing cmds */ be_cmd_fw_clean(adapter); @@ -5454,6 +5917,7 @@ static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id) be_roce_dev_add(adapter); be_schedule_err_detection(adapter, ERR_DETECTION_DELAY); + adapter->error_recovery.probe_time = jiffies; /* On Die temperature not supported for VF. */ if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) { @@ -5699,6 +6163,8 @@ static struct pci_driver be_driver = { static int __init be_init_module(void) { + int status; + if (rx_frag_size != 8192 && rx_frag_size != 4096 && rx_frag_size != 2048) { printk(KERN_WARNING DRV_NAME @@ -5712,12 +6178,33 @@ static int __init be_init_module(void) pr_info(DRV_NAME " : Use sysfs method to enable VFs\n"); } - return pci_register_driver(&be_driver); + be_wq = create_singlethread_workqueue("be_wq"); + if (!be_wq) { + pr_warn(DRV_NAME "workqueue creation failed\n"); + return -1; + } + + be_err_recovery_workq = + create_singlethread_workqueue("be_err_recover"); + if (!be_err_recovery_workq) + pr_warn(DRV_NAME "Could not create error recovery workqueue\n"); + + status = pci_register_driver(&be_driver); + if (status) { + destroy_workqueue(be_wq); + be_destroy_err_recovery_workq(); + } + return status; } module_init(be_init_module); static void __exit be_exit_module(void) { pci_unregister_driver(&be_driver); + + be_destroy_err_recovery_workq(); + + if (be_wq) + destroy_workqueue(be_wq); } module_exit(be_exit_module); diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index 36361f8bf894..262587240c86 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -60,6 +60,8 @@ struct ftgmac100 { struct ftgmac100_descs *descs; dma_addr_t descs_dma_addr; + struct page *rx_pages[RX_QUEUE_ENTRIES]; + unsigned int rx_pointer; unsigned int tx_clean_pointer; unsigned int tx_pointer; @@ -77,6 +79,9 @@ struct ftgmac100 { int int_mask_all; bool use_ncsi; bool enabled; + + u32 rxdes0_edorr_mask; + u32 txdes0_edotr_mask; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -257,10 +262,11 @@ static bool ftgmac100_rxdes_packet_ready(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_RXPKT_RDY); } -static void ftgmac100_rxdes_set_dma_own(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_dma_own(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { /* clear status bits */ - rxdes->rxdes0 &= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 &= cpu_to_le32(priv->rxdes0_edorr_mask); } static bool ftgmac100_rxdes_rx_error(struct ftgmac100_rxdes *rxdes) @@ -298,9 +304,10 @@ static bool ftgmac100_rxdes_multicast(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes0 & cpu_to_le32(FTGMAC100_RXDES0_MULTICAST); } -static void ftgmac100_rxdes_set_end_of_ring(struct ftgmac100_rxdes *rxdes) +static void ftgmac100_rxdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - rxdes->rxdes0 |= cpu_to_le32(FTGMAC100_RXDES0_EDORR); + rxdes->rxdes0 |= cpu_to_le32(priv->rxdes0_edorr_mask); } static void ftgmac100_rxdes_set_dma_addr(struct ftgmac100_rxdes *rxdes, @@ -341,18 +348,27 @@ static bool ftgmac100_rxdes_ipcs_err(struct ftgmac100_rxdes *rxdes) return rxdes->rxdes1 & cpu_to_le32(FTGMAC100_RXDES1_IP_CHKSUM_ERR); } +static inline struct page **ftgmac100_rxdes_page_slot(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) +{ + return &priv->rx_pages[rxdes - priv->descs->rxdes]; +} + /* * rxdes2 is not used by hardware. We use it to keep track of page. * Since hardware does not touch it, we can skip cpu_to_le32()/le32_to_cpu(). */ -static void ftgmac100_rxdes_set_page(struct ftgmac100_rxdes *rxdes, struct page *page) +static void ftgmac100_rxdes_set_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes, + struct page *page) { - rxdes->rxdes2 = (unsigned int)page; + *ftgmac100_rxdes_page_slot(priv, rxdes) = page; } -static struct page *ftgmac100_rxdes_get_page(struct ftgmac100_rxdes *rxdes) +static struct page *ftgmac100_rxdes_get_page(struct ftgmac100 *priv, + struct ftgmac100_rxdes *rxdes) { - return (struct page *)rxdes->rxdes2; + return *ftgmac100_rxdes_page_slot(priv, rxdes); } /****************************************************************************** @@ -382,7 +398,7 @@ ftgmac100_rx_locate_first_segment(struct ftgmac100 *priv) if (ftgmac100_rxdes_first_segment(rxdes)) return rxdes; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } @@ -453,7 +469,7 @@ static void ftgmac100_rx_drop_packet(struct ftgmac100 *priv) if (ftgmac100_rxdes_last_segment(rxdes)) done = true; - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); ftgmac100_rx_pointer_advance(priv); rxdes = ftgmac100_current_rxdes(priv); } while (!done && ftgmac100_rxdes_packet_ready(rxdes)); @@ -501,7 +517,7 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) do { dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); unsigned int size; dma_unmap_page(priv->dev, map, RX_BUF_SIZE, DMA_FROM_DEVICE); @@ -545,10 +561,11 @@ static bool ftgmac100_rx_packet(struct ftgmac100 *priv, int *processed) /****************************************************************************** * internal functions (transmit descriptor) *****************************************************************************/ -static void ftgmac100_txdes_reset(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_reset(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { /* clear all except end of ring bit */ - txdes->txdes0 &= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 &= cpu_to_le32(priv->txdes0_edotr_mask); txdes->txdes1 = 0; txdes->txdes2 = 0; txdes->txdes3 = 0; @@ -569,9 +586,10 @@ static void ftgmac100_txdes_set_dma_own(struct ftgmac100_txdes *txdes) txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_TXDMA_OWN); } -static void ftgmac100_txdes_set_end_of_ring(struct ftgmac100_txdes *txdes) +static void ftgmac100_txdes_set_end_of_ring(const struct ftgmac100 *priv, + struct ftgmac100_txdes *txdes) { - txdes->txdes0 |= cpu_to_le32(FTGMAC100_TXDES0_EDOTR); + txdes->txdes0 |= cpu_to_le32(priv->txdes0_edotr_mask); } static void ftgmac100_txdes_set_first_segment(struct ftgmac100_txdes *txdes) @@ -690,7 +708,7 @@ static bool ftgmac100_tx_complete_packet(struct ftgmac100 *priv) dev_kfree_skb(skb); - ftgmac100_txdes_reset(txdes); + ftgmac100_txdes_reset(priv, txdes); ftgmac100_tx_clean_pointer_advance(priv); @@ -779,9 +797,9 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, return -ENOMEM; } - ftgmac100_rxdes_set_page(rxdes, page); + ftgmac100_rxdes_set_page(priv, rxdes, page); ftgmac100_rxdes_set_dma_addr(rxdes, map); - ftgmac100_rxdes_set_dma_own(rxdes); + ftgmac100_rxdes_set_dma_own(priv, rxdes); return 0; } @@ -791,7 +809,7 @@ static void ftgmac100_free_buffers(struct ftgmac100 *priv) for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; - struct page *page = ftgmac100_rxdes_get_page(rxdes); + struct page *page = ftgmac100_rxdes_get_page(priv, rxdes); dma_addr_t map = ftgmac100_rxdes_get_dma_addr(rxdes); if (!page) @@ -828,7 +846,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) return -ENOMEM; /* initialize RX ring */ - ftgmac100_rxdes_set_end_of_ring(&priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); + ftgmac100_rxdes_set_end_of_ring(priv, + &priv->descs->rxdes[RX_QUEUE_ENTRIES - 1]); for (i = 0; i < RX_QUEUE_ENTRIES; i++) { struct ftgmac100_rxdes *rxdes = &priv->descs->rxdes[i]; @@ -838,7 +857,8 @@ static int ftgmac100_alloc_buffers(struct ftgmac100 *priv) } /* initialize TX ring */ - ftgmac100_txdes_set_end_of_ring(&priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); + ftgmac100_txdes_set_end_of_ring(priv, + &priv->descs->txdes[TX_QUEUE_ENTRIES - 1]); return 0; err: @@ -1055,14 +1075,12 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) } if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | - FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG)) { + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR)) { if (net_ratelimit()) - netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, + netdev_info(netdev, "[ISR] = 0x%x: %s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", status & FTGMAC100_INT_RPKT_LOST ? "RPKT_LOST " : "", - status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : "", - status & FTGMAC100_INT_PHYSTS_CHG ? "PHYSTS_CHG" : ""); + status & FTGMAC100_INT_AHB_ERR ? "AHB_ERR " : ""); if (status & FTGMAC100_INT_NO_RXBUF) { /* RX buffer unavailable */ @@ -1092,6 +1110,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) static int ftgmac100_open(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + unsigned int status; int err; err = ftgmac100_alloc_buffers(priv); @@ -1117,6 +1136,11 @@ static int ftgmac100_open(struct net_device *netdev) ftgmac100_init_hw(priv); ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + + /* Clear stale interrupts */ + status = ioread32(priv->base + FTGMAC100_OFFSET_ISR); + iowrite32(status, priv->base + FTGMAC100_OFFSET_ISR); + if (netdev->phydev) phy_start(netdev->phydev); else if (priv->use_ncsi) @@ -1166,6 +1190,8 @@ static int ftgmac100_stop(struct net_device *netdev) napi_disable(&priv->napi); if (netdev->phydev) phy_stop(netdev->phydev); + else if (priv->use_ncsi) + ncsi_stop_dev(priv->ndev); ftgmac100_stop_hw(priv); free_irq(priv->irq, netdev); @@ -1226,12 +1252,21 @@ static int ftgmac100_setup_mdio(struct net_device *netdev) struct ftgmac100 *priv = netdev_priv(netdev); struct platform_device *pdev = to_platform_device(priv->dev); int i, err = 0; + u32 reg; /* initialize mdio bus */ priv->mii_bus = mdiobus_alloc(); if (!priv->mii_bus) return -EIO; + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + /* This driver supports the old MDIO interface */ + reg = ioread32(priv->base + FTGMAC100_OFFSET_REVR); + reg &= ~FTGMAC100_REVR_NEW_MDIO_INTERFACE; + iowrite32(reg, priv->base + FTGMAC100_OFFSET_REVR); + }; + priv->mii_bus->name = "ftgmac100_mdio"; snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", pdev->name, pdev->id); @@ -1355,9 +1390,18 @@ static int ftgmac100_probe(struct platform_device *pdev) FTGMAC100_INT_XPKT_ETH | FTGMAC100_INT_XPKT_LOST | FTGMAC100_INT_AHB_ERR | - FTGMAC100_INT_PHYSTS_CHG | FTGMAC100_INT_RPKT_BUF | FTGMAC100_INT_NO_RXBUF); + + if (of_machine_is_compatible("aspeed,ast2400") || + of_machine_is_compatible("aspeed,ast2500")) { + priv->rxdes0_edorr_mask = BIT(30); + priv->txdes0_edotr_mask = BIT(30); + } else { + priv->rxdes0_edorr_mask = BIT(15); + priv->txdes0_edotr_mask = BIT(15); + } + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { if (!IS_ENABLED(CONFIG_NET_NCSI)) { @@ -1367,7 +1411,6 @@ static int ftgmac100_probe(struct platform_device *pdev) dev_info(&pdev->dev, "Using NCSI interface\n"); priv->use_ncsi = true; - priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); if (!priv->ndev) goto err_ncsi_dev; diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 13408d448b05..a7ce0ac8858a 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -133,6 +133,11 @@ #define FTGMAC100_DMAFIFOS_RXDMA_REQ (1 << 30) #define FTGMAC100_DMAFIFOS_TXDMA_REQ (1 << 31) +/* + * Feature Register + */ +#define FTGMAC100_REVR_NEW_MDIO_INTERFACE BIT(31) + /* * Receive buffer size register */ @@ -152,6 +157,7 @@ #define FTGMAC100_MACCR_FULLDUP (1 << 8) #define FTGMAC100_MACCR_GIGA_MODE (1 << 9) #define FTGMAC100_MACCR_CRC_APD (1 << 10) +#define FTGMAC100_MACCR_PHY_LINK_LEVEL (1 << 11) #define FTGMAC100_MACCR_RX_RUNT (1 << 12) #define FTGMAC100_MACCR_JUMBO_LF (1 << 13) #define FTGMAC100_MACCR_RX_ALL (1 << 14) @@ -189,7 +195,6 @@ struct ftgmac100_txdes { } __attribute__ ((aligned(16))); #define FTGMAC100_TXDES0_TXBUF_SIZE(x) ((x) & 0x3fff) -#define FTGMAC100_TXDES0_EDOTR (1 << 15) #define FTGMAC100_TXDES0_CRC_ERR (1 << 19) #define FTGMAC100_TXDES0_LTS (1 << 28) #define FTGMAC100_TXDES0_FTS (1 << 29) @@ -215,7 +220,6 @@ struct ftgmac100_rxdes { } __attribute__ ((aligned(16))); #define FTGMAC100_RXDES0_VDBC 0x3fff -#define FTGMAC100_RXDES0_EDORR (1 << 15) #define FTGMAC100_RXDES0_MULTICAST (1 << 16) #define FTGMAC100_RXDES0_BROADCAST (1 << 17) #define FTGMAC100_RXDES0_RX_ERR (1 << 18) diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 692ee248e486..48a033e64423 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -913,13 +913,11 @@ fec_restart(struct net_device *ndev) * enet-mac reset will reset mac address registers too, * so need to reconfigure it. */ - if (fep->quirks & FEC_QUIRK_ENET_MAC) { - memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); - writel((__force u32)cpu_to_be32(temp_mac[0]), - fep->hwp + FEC_ADDR_LOW); - writel((__force u32)cpu_to_be32(temp_mac[1]), - fep->hwp + FEC_ADDR_HIGH); - } + memcpy(&temp_mac, ndev->dev_addr, ETH_ALEN); + writel((__force u32)cpu_to_be32(temp_mac[0]), + fep->hwp + FEC_ADDR_LOW); + writel((__force u32)cpu_to_be32(temp_mac[1]), + fep->hwp + FEC_ADDR_HIGH); /* Clear any outstanding interrupt. */ writel(0xffffffff, fep->hwp + FEC_IEVENT); @@ -2896,7 +2894,7 @@ fec_enet_close(struct net_device *ndev) * this kind of feature?). */ -#define HASH_BITS 6 /* #bits in hash */ +#define FEC_HASH_BITS 6 /* #bits in hash */ #define CRC32_POLY 0xEDB88320 static void set_multicast_list(struct net_device *ndev) @@ -2944,10 +2942,10 @@ static void set_multicast_list(struct net_device *ndev) } } - /* only upper 6 bits (HASH_BITS) are used + /* only upper 6 bits (FEC_HASH_BITS) are used * which point to specific bit in he hash registers */ - hash = (crc >> (32 - HASH_BITS)) & 0x3f; + hash = (crc >> (32 - FEC_HASH_BITS)) & 0x3f; if (hash > 31) { tmp = readl(fep->hwp + FEC_GRP_HASH_TABLE_HIGH); diff --git a/drivers/net/ethernet/freescale/fman/fman_mac.h b/drivers/net/ethernet/freescale/fman/fman_mac.h index 8ddeedbcef9c..ddf0260176c9 100644 --- a/drivers/net/ethernet/freescale/fman/fman_mac.h +++ b/drivers/net/ethernet/freescale/fman/fman_mac.h @@ -192,7 +192,7 @@ struct fman_mac_params { /* A handle to the FM object this port related to */ void *fm; /* MDIO exceptions interrupt source - not valid for all - * MACs; MUST be set to 'NO_IRQ' for MACs that don't have + * MACs; MUST be set to 0 for MACs that don't have * mdio-irq, or for polling */ void *dev_id; /* device cookie used by the exception cbs */ diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c index 61fd486c50bb..dc120c148d97 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c @@ -60,6 +60,9 @@ module_param(fs_enet_debug, int, 0); MODULE_PARM_DESC(fs_enet_debug, "Freescale bitmapped debugging message enable value"); +#define RX_RING_SIZE 32 +#define TX_RING_SIZE 64 + #ifdef CONFIG_NET_POLL_CONTROLLER static void fs_enet_netpoll(struct net_device *dev); #endif @@ -79,8 +82,8 @@ static void skb_align(struct sk_buff *skb, int align) skb_reserve(skb, align - off); } -/* NAPI receive function */ -static int fs_enet_rx_napi(struct napi_struct *napi, int budget) +/* NAPI function */ +static int fs_enet_napi(struct napi_struct *napi, int budget) { struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, napi); struct net_device *dev = fep->ndev; @@ -90,147 +93,17 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget) int received = 0; u16 pkt_len, sc; int curidx; - - if (budget <= 0) - return received; - - /* - * First, grab all of the stats for the incoming packet. - * These get messed up if we get called due to a busy condition. - */ - bdp = fep->cur_rx; - - /* clear RX status bits for napi*/ - (*fep->ops->napi_clear_rx_event)(dev); - - while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) { - curidx = bdp - fep->rx_bd_base; - - /* - * Since we have allocated space to hold a complete frame, - * the last indicator should be set. - */ - if ((sc & BD_ENET_RX_LAST) == 0) - dev_warn(fep->dev, "rcv is not +last\n"); - - /* - * Check for errors. - */ - if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL | - BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) { - fep->stats.rx_errors++; - /* Frame too long or too short. */ - if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) - fep->stats.rx_length_errors++; - /* Frame alignment */ - if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL)) - fep->stats.rx_frame_errors++; - /* CRC Error */ - if (sc & BD_ENET_RX_CR) - fep->stats.rx_crc_errors++; - /* FIFO overrun */ - if (sc & BD_ENET_RX_OV) - fep->stats.rx_crc_errors++; - - skb = fep->rx_skbuff[curidx]; - - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - - skbn = skb; - - } else { - skb = fep->rx_skbuff[curidx]; - - dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp), - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE); - - /* - * Process the incoming frame. - */ - fep->stats.rx_packets++; - pkt_len = CBDR_DATLEN(bdp) - 4; /* remove CRC */ - fep->stats.rx_bytes += pkt_len + 4; - - if (pkt_len <= fpi->rx_copybreak) { - /* +2 to make IP header L1 cache aligned */ - skbn = netdev_alloc_skb(dev, pkt_len + 2); - if (skbn != NULL) { - skb_reserve(skbn, 2); /* align IP header */ - skb_copy_from_linear_data(skb, - skbn->data, pkt_len); - swap(skb, skbn); - } - } else { - skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE); - - if (skbn) - skb_align(skbn, ENET_RX_ALIGN); - } - - if (skbn != NULL) { - skb_put(skb, pkt_len); /* Make room */ - skb->protocol = eth_type_trans(skb, dev); - received++; - netif_receive_skb(skb); - } else { - fep->stats.rx_dropped++; - skbn = skb; - } - } - - fep->rx_skbuff[curidx] = skbn; - CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data, - L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), - DMA_FROM_DEVICE)); - CBDW_DATLEN(bdp, 0); - CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY); - - /* - * Update BD pointer to next entry. - */ - if ((sc & BD_ENET_RX_WRAP) == 0) - bdp++; - else - bdp = fep->rx_bd_base; - - (*fep->ops->rx_bd_done)(dev); - - if (received >= budget) - break; - } - - fep->cur_rx = bdp; - - if (received < budget) { - /* done */ - napi_complete(napi); - (*fep->ops->napi_enable_rx)(dev); - } - return received; -} - -static int fs_enet_tx_napi(struct napi_struct *napi, int budget) -{ - struct fs_enet_private *fep = container_of(napi, struct fs_enet_private, - napi_tx); - struct net_device *dev = fep->ndev; - cbd_t __iomem *bdp; - struct sk_buff *skb; int dirtyidx, do_wake, do_restart; - u16 sc; - int has_tx_work = 0; + int tx_left = TX_RING_SIZE; spin_lock(&fep->tx_lock); bdp = fep->dirty_tx; - /* clear TX status bits for napi*/ - (*fep->ops->napi_clear_tx_event)(dev); + /* clear status bits for napi*/ + (*fep->ops->napi_clear_event)(dev); do_wake = do_restart = 0; - while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0) { + while (((sc = CBDR_SC(bdp)) & BD_ENET_TX_READY) == 0 && tx_left) { dirtyidx = bdp - fep->tx_bd_base; if (fep->tx_free == fep->tx_ring) @@ -302,9 +175,9 @@ static int fs_enet_tx_napi(struct napi_struct *napi, int budget) * Since we have freed up a buffer, the ring is no longer * full. */ - if (++fep->tx_free >= MAX_SKB_FRAGS) + if (++fep->tx_free == MAX_SKB_FRAGS) do_wake = 1; - has_tx_work = 1; + tx_left--; } fep->dirty_tx = bdp; @@ -312,19 +185,129 @@ static int fs_enet_tx_napi(struct napi_struct *napi, int budget) if (do_restart) (*fep->ops->tx_restart)(dev); - if (!has_tx_work) { - napi_complete(napi); - (*fep->ops->napi_enable_tx)(dev); - } - spin_unlock(&fep->tx_lock); if (do_wake) netif_wake_queue(dev); - if (has_tx_work) - return budget; - return 0; + /* + * First, grab all of the stats for the incoming packet. + * These get messed up if we get called due to a busy condition. + */ + bdp = fep->cur_rx; + + while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0 && + received < budget) { + curidx = bdp - fep->rx_bd_base; + + /* + * Since we have allocated space to hold a complete frame, + * the last indicator should be set. + */ + if ((sc & BD_ENET_RX_LAST) == 0) + dev_warn(fep->dev, "rcv is not +last\n"); + + /* + * Check for errors. + */ + if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL | + BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) { + fep->stats.rx_errors++; + /* Frame too long or too short. */ + if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH)) + fep->stats.rx_length_errors++; + /* Frame alignment */ + if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL)) + fep->stats.rx_frame_errors++; + /* CRC Error */ + if (sc & BD_ENET_RX_CR) + fep->stats.rx_crc_errors++; + /* FIFO overrun */ + if (sc & BD_ENET_RX_OV) + fep->stats.rx_crc_errors++; + + skbn = fep->rx_skbuff[curidx]; + } else { + skb = fep->rx_skbuff[curidx]; + + /* + * Process the incoming frame. + */ + fep->stats.rx_packets++; + pkt_len = CBDR_DATLEN(bdp) - 4; /* remove CRC */ + fep->stats.rx_bytes += pkt_len + 4; + + if (pkt_len <= fpi->rx_copybreak) { + /* +2 to make IP header L1 cache aligned */ + skbn = netdev_alloc_skb(dev, pkt_len + 2); + if (skbn != NULL) { + skb_reserve(skbn, 2); /* align IP header */ + skb_copy_from_linear_data(skb, + skbn->data, pkt_len); + swap(skb, skbn); + dma_sync_single_for_cpu(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(pkt_len), + DMA_FROM_DEVICE); + } + } else { + skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE); + + if (skbn) { + dma_addr_t dma; + + skb_align(skbn, ENET_RX_ALIGN); + + dma_unmap_single(fep->dev, + CBDR_BUFADDR(bdp), + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + + dma = dma_map_single(fep->dev, + skbn->data, + L1_CACHE_ALIGN(PKT_MAXBUF_SIZE), + DMA_FROM_DEVICE); + CBDW_BUFADDR(bdp, dma); + } + } + + if (skbn != NULL) { + skb_put(skb, pkt_len); /* Make room */ + skb->protocol = eth_type_trans(skb, dev); + received++; + netif_receive_skb(skb); + } else { + fep->stats.rx_dropped++; + skbn = skb; + } + } + + fep->rx_skbuff[curidx] = skbn; + CBDW_DATLEN(bdp, 0); + CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY); + + /* + * Update BD pointer to next entry. + */ + if ((sc & BD_ENET_RX_WRAP) == 0) + bdp++; + else + bdp = fep->rx_bd_base; + + (*fep->ops->rx_bd_done)(dev); + } + + fep->cur_rx = bdp; + + if (received < budget && tx_left) { + /* done */ + napi_complete(napi); + (*fep->ops->napi_enable)(dev); + + return received; + } + + return budget; } /* @@ -350,18 +333,18 @@ fs_enet_interrupt(int irq, void *dev_id) nr++; int_clr_events = int_events; - int_clr_events &= ~fep->ev_napi_rx; + int_clr_events &= ~fep->ev_napi; (*fep->ops->clear_int_events)(dev, int_clr_events); if (int_events & fep->ev_err) (*fep->ops->ev_error)(dev, int_events); - if (int_events & fep->ev_rx) { + if (int_events & fep->ev) { napi_ok = napi_schedule_prep(&fep->napi); - (*fep->ops->napi_disable_rx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_rx); + (*fep->ops->napi_disable)(dev); + (*fep->ops->clear_int_events)(dev, fep->ev_napi); /* NOTE: it is possible for FCCs in NAPI mode */ /* to submit a spurious interrupt while in poll */ @@ -369,17 +352,6 @@ fs_enet_interrupt(int irq, void *dev_id) __napi_schedule(&fep->napi); } - if (int_events & fep->ev_tx) { - napi_ok = napi_schedule_prep(&fep->napi_tx); - - (*fep->ops->napi_disable_tx)(dev); - (*fep->ops->clear_int_events)(dev, fep->ev_napi_tx); - - /* NOTE: it is possible for FCCs in NAPI mode */ - /* to submit a spurious interrupt while in poll */ - if (napi_ok) - __napi_schedule(&fep->napi_tx); - } } handled = nr > 0; @@ -659,7 +631,8 @@ static void fs_timeout(struct net_device *dev) } phy_start(dev->phydev); - wake = fep->tx_free && !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); + wake = fep->tx_free >= MAX_SKB_FRAGS && + !(CBDR_SC(fep->cur_tx) & BD_ENET_TX_READY); spin_unlock_irqrestore(&fep->lock, flags); if (wake) @@ -751,11 +724,10 @@ static int fs_enet_open(struct net_device *dev) int err; /* to initialize the fep->cur_rx,... */ - /* not doing this, will cause a crash in fs_enet_rx_napi */ + /* not doing this, will cause a crash in fs_enet_napi */ fs_init_bds(fep->ndev); napi_enable(&fep->napi); - napi_enable(&fep->napi_tx); /* Install our interrupt handler. */ r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED, @@ -763,7 +735,6 @@ static int fs_enet_open(struct net_device *dev) if (r != 0) { dev_err(fep->dev, "Could not allocate FS_ENET IRQ!"); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return -EINVAL; } @@ -771,7 +742,6 @@ static int fs_enet_open(struct net_device *dev) if (err) { free_irq(fep->interrupt, dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); return err; } phy_start(dev->phydev); @@ -789,7 +759,6 @@ static int fs_enet_close(struct net_device *dev) netif_stop_queue(dev); netif_carrier_off(dev); napi_disable(&fep->napi); - napi_disable(&fep->napi_tx); phy_stop(dev->phydev); spin_lock_irqsave(&fep->lock, flags); @@ -861,6 +830,44 @@ static void fs_set_msglevel(struct net_device *dev, u32 value) fep->msg_enable = value; } +static int fs_get_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = fpi->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int fs_set_tunable(struct net_device *dev, + const struct ethtool_tunable *tuna, const void *data) +{ + struct fs_enet_private *fep = netdev_priv(dev); + struct fs_platform_info *fpi = fep->fpi; + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + fpi->rx_copybreak = *(u32 *)data; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + static const struct ethtool_ops fs_ethtool_ops = { .get_drvinfo = fs_get_drvinfo, .get_regs_len = fs_get_regs_len, @@ -872,6 +879,8 @@ static const struct ethtool_ops fs_ethtool_ops = { .get_ts_info = ethtool_op_get_ts_info, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, + .get_tunable = fs_get_tunable, + .set_tunable = fs_set_tunable, }; static int fs_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) @@ -939,8 +948,8 @@ static int fs_enet_probe(struct platform_device *ofdev) fpi->cp_command = *data; } - fpi->rx_ring = 32; - fpi->tx_ring = 64; + fpi->rx_ring = RX_RING_SIZE; + fpi->tx_ring = TX_RING_SIZE; fpi->rx_copybreak = 240; fpi->napi_weight = 17; fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0); @@ -1024,8 +1033,7 @@ static int fs_enet_probe(struct platform_device *ofdev) ndev->netdev_ops = &fs_enet_netdev_ops; ndev->watchdog_timeo = 2 * HZ; - netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight); - netif_tx_napi_add(ndev, &fep->napi_tx, fs_enet_tx_napi, 2); + netif_napi_add(ndev, &fep->napi, fs_enet_napi, fpi->napi_weight); ndev->ethtool_ops = &fs_ethtool_ops; diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h index e29f54a35210..fee24c822fad 100644 --- a/drivers/net/ethernet/freescale/fs_enet/fs_enet.h +++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet.h @@ -81,12 +81,9 @@ struct fs_ops { void (*adjust_link)(struct net_device *dev); void (*restart)(struct net_device *dev); void (*stop)(struct net_device *dev); - void (*napi_clear_rx_event)(struct net_device *dev); - void (*napi_enable_rx)(struct net_device *dev); - void (*napi_disable_rx)(struct net_device *dev); - void (*napi_clear_tx_event)(struct net_device *dev); - void (*napi_enable_tx)(struct net_device *dev); - void (*napi_disable_tx)(struct net_device *dev); + void (*napi_clear_event)(struct net_device *dev); + void (*napi_enable)(struct net_device *dev); + void (*napi_disable)(struct net_device *dev); void (*rx_bd_done)(struct net_device *dev); void (*tx_kickstart)(struct net_device *dev); u32 (*get_int_events)(struct net_device *dev); @@ -122,7 +119,6 @@ struct phy_info { struct fs_enet_private { struct napi_struct napi; - struct napi_struct napi_tx; struct device *dev; /* pointer back to the device (must be initialized first) */ struct net_device *ndev; spinlock_t lock; /* during all ops except TX pckt processing */ @@ -152,10 +148,8 @@ struct fs_enet_private { int oldduplex, oldspeed, oldlink; /* current settings */ /* event masks */ - u32 ev_napi_rx; /* mask of NAPI rx events */ - u32 ev_napi_tx; /* mask of NAPI rx events */ - u32 ev_rx; /* rx event mask */ - u32 ev_tx; /* tx event mask */ + u32 ev_napi; /* mask of NAPI events */ + u32 ev; /* event mask */ u32 ev_err; /* error event mask */ u16 bd_rx_empty; /* mask of BD rx empty */ diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c index d71761a34022..120c758f5d01 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fcc.c @@ -90,7 +90,7 @@ static int do_pd_setup(struct fs_enet_private *fep) int ret = -EINVAL; fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) goto out; fep->fcc.fccp = of_iomap(ofdev->dev.of_node, 0); @@ -124,10 +124,8 @@ out: return ret; } -#define FCC_NAPI_RX_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB) -#define FCC_NAPI_TX_EVENT_MSK (FCC_ENET_TXB) -#define FCC_RX_EVENT (FCC_ENET_RXF) -#define FCC_TX_EVENT (FCC_ENET_TXB) +#define FCC_NAPI_EVENT_MSK (FCC_ENET_RXF | FCC_ENET_RXB | FCC_ENET_TXB) +#define FCC_EVENT (FCC_ENET_RXF | FCC_ENET_TXB) #define FCC_ERR_EVENT_MSK (FCC_ENET_TXE) static int setup_data(struct net_device *dev) @@ -137,10 +135,8 @@ static int setup_data(struct net_device *dev) if (do_pd_setup(fep) != 0) return -EINVAL; - fep->ev_napi_rx = FCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FCC_RX_EVENT; - fep->ev_tx = FCC_TX_EVENT; + fep->ev_napi = FCC_NAPI_EVENT_MSK; + fep->ev = FCC_EVENT; fep->ev_err = FCC_ERR_EVENT_MSK; return 0; @@ -424,52 +420,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - W16(fccp, fcc_fcce, FCC_NAPI_RX_EVENT_MSK); + W16(fccp, fcc_fcce, FCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - S16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); + S16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); fcc_t __iomem *fccp = fep->fcc.fccp; - C16(fccp, fcc_fccm, FCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - W16(fccp, fcc_fcce, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - S16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - fcc_t __iomem *fccp = fep->fcc.fccp; - - C16(fccp, fcc_fccm, FCC_NAPI_TX_EVENT_MSK); + C16(fccp, fcc_fccm, FCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -595,12 +567,9 @@ const struct fs_ops fs_fcc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c index 35a318ed3a62..777beffa1e1e 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-fec.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-fec.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->fec.fecp = of_iomap(ofdev->dev.of_node, 0); @@ -109,10 +109,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define FEC_NAPI_RX_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB) -#define FEC_NAPI_TX_EVENT_MSK (FEC_ENET_TXF) -#define FEC_RX_EVENT (FEC_ENET_RXF) -#define FEC_TX_EVENT (FEC_ENET_TXF) +#define FEC_NAPI_EVENT_MSK (FEC_ENET_RXF | FEC_ENET_RXB | FEC_ENET_TXF) +#define FEC_EVENT (FEC_ENET_RXF | FEC_ENET_TXF) #define FEC_ERR_EVENT_MSK (FEC_ENET_HBERR | FEC_ENET_BABR | \ FEC_ENET_BABT | FEC_ENET_EBERR) @@ -126,10 +124,8 @@ static int setup_data(struct net_device *dev) fep->fec.hthi = 0; fep->fec.htlo = 0; - fep->ev_napi_rx = FEC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = FEC_NAPI_TX_EVENT_MSK; - fep->ev_rx = FEC_RX_EVENT; - fep->ev_tx = FEC_TX_EVENT; + fep->ev_napi = FEC_NAPI_EVENT_MSK; + fep->ev = FEC_EVENT; fep->ev_err = FEC_ERR_EVENT_MSK; return 0; @@ -396,52 +392,28 @@ static void stop(struct net_device *dev) } } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FW(fecp, ievent, FEC_NAPI_RX_EVENT_MSK); + FW(fecp, ievent, FEC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FS(fecp, imask, FEC_NAPI_RX_EVENT_MSK); + FS(fecp, imask, FEC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); struct fec __iomem *fecp = fep->fec.fecp; - FC(fecp, imask, FEC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FW(fecp, ievent, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FS(fecp, imask, FEC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - struct fec __iomem *fecp = fep->fec.fecp; - - FC(fecp, imask, FEC_NAPI_TX_EVENT_MSK); + FC(fecp, imask, FEC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -513,12 +485,9 @@ const struct fs_ops fs_fec_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c index e8b9c33d35b4..15abd37d70e3 100644 --- a/drivers/net/ethernet/freescale/fs_enet/mac-scc.c +++ b/drivers/net/ethernet/freescale/fs_enet/mac-scc.c @@ -99,7 +99,7 @@ static int do_pd_setup(struct fs_enet_private *fep) struct platform_device *ofdev = to_platform_device(fep->dev); fep->interrupt = irq_of_parse_and_map(ofdev->dev.of_node, 0); - if (fep->interrupt == NO_IRQ) + if (!fep->interrupt) return -EINVAL; fep->scc.sccp = of_iomap(ofdev->dev.of_node, 0); @@ -115,10 +115,8 @@ static int do_pd_setup(struct fs_enet_private *fep) return 0; } -#define SCC_NAPI_RX_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB) -#define SCC_NAPI_TX_EVENT_MSK (SCCE_ENET_TXB) -#define SCC_RX_EVENT (SCCE_ENET_RXF) -#define SCC_TX_EVENT (SCCE_ENET_TXB) +#define SCC_NAPI_EVENT_MSK (SCCE_ENET_RXF | SCCE_ENET_RXB | SCCE_ENET_TXB) +#define SCC_EVENT (SCCE_ENET_RXF | SCCE_ENET_TXB) #define SCC_ERR_EVENT_MSK (SCCE_ENET_TXE | SCCE_ENET_BSY) static int setup_data(struct net_device *dev) @@ -130,10 +128,8 @@ static int setup_data(struct net_device *dev) fep->scc.hthi = 0; fep->scc.htlo = 0; - fep->ev_napi_rx = SCC_NAPI_RX_EVENT_MSK; - fep->ev_napi_tx = SCC_NAPI_TX_EVENT_MSK; - fep->ev_rx = SCC_RX_EVENT; - fep->ev_tx = SCC_TX_EVENT | SCCE_ENET_TXE; + fep->ev_napi = SCC_NAPI_EVENT_MSK; + fep->ev = SCC_EVENT | SCCE_ENET_TXE; fep->ev_err = SCC_ERR_EVENT_MSK; return 0; @@ -379,52 +375,28 @@ static void stop(struct net_device *dev) fs_cleanup_bds(dev); } -static void napi_clear_rx_event(struct net_device *dev) +static void napi_clear_event_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - W16(sccp, scc_scce, SCC_NAPI_RX_EVENT_MSK); + W16(sccp, scc_scce, SCC_NAPI_EVENT_MSK); } -static void napi_enable_rx(struct net_device *dev) +static void napi_enable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - S16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); + S16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } -static void napi_disable_rx(struct net_device *dev) +static void napi_disable_fs(struct net_device *dev) { struct fs_enet_private *fep = netdev_priv(dev); scc_t __iomem *sccp = fep->scc.sccp; - C16(sccp, scc_sccm, SCC_NAPI_RX_EVENT_MSK); -} - -static void napi_clear_tx_event(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - W16(sccp, scc_scce, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_enable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - S16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); -} - -static void napi_disable_tx(struct net_device *dev) -{ - struct fs_enet_private *fep = netdev_priv(dev); - scc_t __iomem *sccp = fep->scc.sccp; - - C16(sccp, scc_sccm, SCC_NAPI_TX_EVENT_MSK); + C16(sccp, scc_sccm, SCC_NAPI_EVENT_MSK); } static void rx_bd_done(struct net_device *dev) @@ -497,12 +469,9 @@ const struct fs_ops fs_scc_ops = { .set_multicast_list = set_multicast_list, .restart = restart, .stop = stop, - .napi_clear_rx_event = napi_clear_rx_event, - .napi_enable_rx = napi_enable_rx, - .napi_disable_rx = napi_disable_rx, - .napi_clear_tx_event = napi_clear_tx_event, - .napi_enable_tx = napi_enable_tx, - .napi_disable_tx = napi_disable_tx, + .napi_clear_event = napi_clear_event_fs, + .napi_enable = napi_enable_fs, + .napi_disable = napi_disable_fs, .rx_bd_done = rx_bd_done, .tx_kickstart = tx_kickstart, .get_int_events = get_int_events, diff --git a/drivers/net/ethernet/freescale/fsl_pq_mdio.c b/drivers/net/ethernet/freescale/fsl_pq_mdio.c index f3c63dce1e30..446c7b374ff5 100644 --- a/drivers/net/ethernet/freescale/fsl_pq_mdio.c +++ b/drivers/net/ethernet/freescale/fsl_pq_mdio.c @@ -195,7 +195,7 @@ static int fsl_pq_mdio_reset(struct mii_bus *bus) return 0; } -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) /* * Return the TBIPA address, starting from the address * of the mapped GFAR MDIO registers (struct gfar) @@ -228,7 +228,7 @@ static uint32_t __iomem *get_etsec_tbipa(void __iomem *p) } #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) /* * Return the TBIPAR address for a QE MDIO node, starting from the address * of the mapped MII registers (struct fsl_pq_mii) @@ -306,7 +306,7 @@ static void ucc_configure(phys_addr_t start, phys_addr_t end) #endif static const struct of_device_id fsl_pq_mdio_match[] = { -#if defined(CONFIG_GIANFAR) || defined(CONFIG_GIANFAR_MODULE) +#if IS_ENABLED(CONFIG_GIANFAR) { .compatible = "fsl,gianfar-tbi", .data = &(struct fsl_pq_mdio_data) { @@ -344,7 +344,7 @@ static const struct of_device_id fsl_pq_mdio_match[] = { }, }, #endif -#if defined(CONFIG_UCC_GETH) || defined(CONFIG_UCC_GETH_MODULE) +#if IS_ENABLED(CONFIG_UCC_GETH) { .compatible = "fsl,ucc-mdio", .data = &(struct fsl_pq_mdio_data) { diff --git a/drivers/net/ethernet/freescale/ucc_geth.c b/drivers/net/ethernet/freescale/ucc_geth.c index 5bf1ade28315..186ef8f16c80 100644 --- a/drivers/net/ethernet/freescale/ucc_geth.c +++ b/drivers/net/ethernet/freescale/ucc_geth.c @@ -3756,7 +3756,7 @@ static int ucc_geth_probe(struct platform_device* ofdev) return -EINVAL; } if ((*prop < QE_CLK_NONE) || (*prop > QE_CLK24)) { - pr_err("invalid rx-clock propperty\n"); + pr_err("invalid rx-clock property\n"); return -EINVAL; } ug_info->uf_info.rx_clock = *prop; diff --git a/drivers/net/ethernet/freescale/xgmac_mdio.c b/drivers/net/ethernet/freescale/xgmac_mdio.c index 7b8fe866f603..e03b30c60dcf 100644 --- a/drivers/net/ethernet/freescale/xgmac_mdio.c +++ b/drivers/net/ethernet/freescale/xgmac_mdio.c @@ -271,11 +271,8 @@ static int xgmac_mdio_probe(struct platform_device *pdev) goto err_ioremap; } - if (of_get_property(pdev->dev.of_node, - "little-endian", NULL)) - priv->is_little_endian = true; - else - priv->is_little_endian = false; + priv->is_little_endian = of_property_read_bool(pdev->dev.of_node, + "little-endian"); ret = of_mdiobus_register(bus, np); if (ret) { diff --git a/drivers/net/ethernet/hisilicon/hip04_eth.c b/drivers/net/ethernet/hisilicon/hip04_eth.c index 0c4afe95ef54..39778892b3b3 100644 --- a/drivers/net/ethernet/hisilicon/hip04_eth.c +++ b/drivers/net/ethernet/hisilicon/hip04_eth.c @@ -600,7 +600,7 @@ static irqreturn_t hip04_mac_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -enum hrtimer_restart tx_done(struct hrtimer *hrtimer) +static enum hrtimer_restart tx_done(struct hrtimer *hrtimer) { struct hip04_priv *priv; @@ -755,13 +755,13 @@ static void hip04_get_drvinfo(struct net_device *netdev, strlcpy(drvinfo->version, DRV_VERSION, sizeof(drvinfo->version)); } -static struct ethtool_ops hip04_ethtool_ops = { +static const struct ethtool_ops hip04_ethtool_ops = { .get_coalesce = hip04_get_coalesce, .set_coalesce = hip04_set_coalesce, .get_drvinfo = hip04_get_drvinfo, }; -static struct net_device_ops hip04_netdev_ops = { +static const struct net_device_ops hip04_netdev_ops = { .ndo_open = hip04_mac_open, .ndo_stop = hip04_mac_stop, .ndo_get_stats = hip04_get_stats, diff --git a/drivers/net/ethernet/hisilicon/hisi_femac.c b/drivers/net/ethernet/hisilicon/hisi_femac.c index b5d7ad0252a0..ced185962ef8 100644 --- a/drivers/net/ethernet/hisilicon/hisi_femac.c +++ b/drivers/net/ethernet/hisilicon/hisi_femac.c @@ -699,7 +699,7 @@ static int hisi_femac_net_ioctl(struct net_device *dev, return phy_mii_ioctl(dev->phydev, ifreq, cmd); } -static struct ethtool_ops hisi_femac_ethtools_ops = { +static const struct ethtool_ops hisi_femac_ethtools_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, @@ -940,8 +940,8 @@ static int hisi_femac_drv_remove(struct platform_device *pdev) } #ifdef CONFIG_PM -int hisi_femac_drv_suspend(struct platform_device *pdev, - pm_message_t state) +static int hisi_femac_drv_suspend(struct platform_device *pdev, + pm_message_t state) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); @@ -957,7 +957,7 @@ int hisi_femac_drv_suspend(struct platform_device *pdev, return 0; } -int hisi_femac_drv_resume(struct platform_device *pdev) +static int hisi_femac_drv_resume(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); struct hisi_femac_priv *priv = netdev_priv(ndev); diff --git a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c index 275618bb4646..e69a6bed31a9 100644 --- a/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c +++ b/drivers/net/ethernet/hisilicon/hix5hd2_gmac.c @@ -750,7 +750,7 @@ static const struct net_device_ops hix5hd2_netdev_ops = { .ndo_set_mac_address = hix5hd2_net_set_mac_address, }; -static struct ethtool_ops hix5hd2_ethtools_ops = { +static const struct ethtool_ops hix5hd2_ethtools_ops = { .get_link = ethtool_op_get_link, .get_link_ksettings = phy_ethtool_get_link_ksettings, .set_link_ksettings = phy_ethtool_set_link_ksettings, diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c index 5c8afe1a5ccb..a834774fdb02 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c @@ -684,8 +684,7 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb, if (!phy || IS_ERR(phy)) return -EIO; - if (mdio->irq) - phy->irq = mdio->irq[addr]; + phy->irq = mdio->irq[addr]; /* All data is now stored in the phy struct; * register it diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c index afb5daa3721d..eb448dff7564 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -115,10 +116,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sc_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sc_base)) { - dev_err(dsaf_dev->dev, "subctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sc_base)) return PTR_ERR(dsaf_dev->sc_base); - } res = platform_get_resource(pdev, IORESOURCE_MEM, res_idx++); @@ -129,10 +128,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) dsaf_dev->sds_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->sds_base)) { - dev_err(dsaf_dev->dev, "serdes-ctrl can not map!\n"); + if (IS_ERR(dsaf_dev->sds_base)) return PTR_ERR(dsaf_dev->sds_base); - } } else { dsaf_dev->sub_ctrl = syscon; } @@ -147,10 +144,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->ppe_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->ppe_base)) { - dev_err(dsaf_dev->dev, "ppe-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->ppe_base)) return PTR_ERR(dsaf_dev->ppe_base); - } dsaf_dev->ppe_paddr = res->start; if (!HNS_DSAF_IS_DEBUG(dsaf_dev)) { @@ -166,10 +161,8 @@ int hns_dsaf_get_cfg(struct dsaf_device *dsaf_dev) } } dsaf_dev->io_base = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(dsaf_dev->io_base)) { - dev_err(dsaf_dev->dev, "dsaf-base resource can not map!\n"); + if (IS_ERR(dsaf_dev->io_base)) return PTR_ERR(dsaf_dev->io_base); - } } ret = device_property_read_u32(dsaf_dev->dev, "desc-num", &desc_num); @@ -2781,6 +2774,89 @@ static struct platform_driver g_dsaf_driver = { module_platform_driver(g_dsaf_driver); +/** + * hns_dsaf_roce_reset - reset dsaf and roce + * @dsaf_fwnode: Pointer to framework node for the dasf + * @enable: false - request reset , true - drop reset + * retuen 0 - success , negative -fail + */ +int hns_dsaf_roce_reset(struct fwnode_handle *dsaf_fwnode, bool enable) +{ + struct dsaf_device *dsaf_dev; + struct platform_device *pdev; + u32 mp; + u32 sl; + u32 credit; + int i; + const u32 port_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + {DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1, DSAF_ROCE_PORT_0}, + {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_4, DSAF_ROCE_PORT_2, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, + {DSAF_ROCE_PORT_5, DSAF_ROCE_PORT_3, DSAF_ROCE_PORT_1}, + }; + const u32 sl_map[DSAF_ROCE_CREDIT_CHN][DSAF_ROCE_CHAN_MODE_NUM] = { + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_0}, + {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_1}, + {DSAF_ROCE_SL_0, DSAF_ROCE_SL_0, DSAF_ROCE_SL_2}, + {DSAF_ROCE_SL_1, DSAF_ROCE_SL_1, DSAF_ROCE_SL_3}, + }; + + if (!is_of_node(dsaf_fwnode)) { + pr_err("hisi_dsaf: Only support DT node!\n"); + return -EINVAL; + } + pdev = of_find_device_by_node(to_of_node(dsaf_fwnode)); + dsaf_dev = dev_get_drvdata(&pdev->dev); + if (AE_IS_VER1(dsaf_dev->dsaf_ver)) { + dev_err(dsaf_dev->dev, "%s v1 chip doesn't support RoCE!\n", + dsaf_dev->ae_dev.name); + return -ENODEV; + } + + if (!enable) { + /* Reset rocee-channels in dsaf and rocee */ + hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, false); + hns_dsaf_roce_srst(dsaf_dev, false); + } else { + /* Configure dsaf tx roce correspond to port map and sl map */ + mp = dsaf_read_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG); + for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) + dsaf_set_field(mp, 7 << i * 3, i * 3, + port_map[i][DSAF_ROCE_6PORT_MODE]); + dsaf_set_field(mp, 3 << i * 3, i * 3, 0); + dsaf_write_dev(dsaf_dev, DSAF_ROCE_PORT_MAP_REG, mp); + + sl = dsaf_read_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG); + for (i = 0; i < DSAF_ROCE_CREDIT_CHN; i++) + dsaf_set_field(sl, 3 << i * 2, i * 2, + sl_map[i][DSAF_ROCE_6PORT_MODE]); + dsaf_write_dev(dsaf_dev, DSAF_ROCE_SL_MAP_REG, sl); + + /* De-reset rocee-channels in dsaf and rocee */ + hns_dsaf_srst_chns(dsaf_dev, DSAF_CHNS_MASK, true); + msleep(SRST_TIME_INTERVAL); + hns_dsaf_roce_srst(dsaf_dev, true); + + /* Eanble dsaf channel rocee credit */ + credit = dsaf_read_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG); + dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 0); + dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); + + dsaf_set_bit(credit, DSAF_SBM_ROCEE_CFG_CRD_EN_B, 1); + dsaf_write_dev(dsaf_dev, DSAF_SBM_ROCEE_CFG_REG_REG, credit); + } + return 0; +} +EXPORT_SYMBOL(hns_dsaf_roce_reset); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Huawei Tech. Co., Ltd."); MODULE_DESCRIPTION("HNS DSAF driver"); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h index 1daf018d9071..f3681d566ae6 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_main.h @@ -43,6 +43,32 @@ struct hns_mac_cb; #define DSAF_PRIO_NR 8 #define DSAF_REG_PER_ZONE 3 +#define DSAF_ROCE_CREDIT_CHN 8 +#define DSAF_ROCE_CHAN_MODE 3 + +enum dsaf_roce_port_mode { + DSAF_ROCE_6PORT_MODE, + DSAF_ROCE_4PORT_MODE, + DSAF_ROCE_2PORT_MODE, + DSAF_ROCE_CHAN_MODE_NUM, +}; + +enum dsaf_roce_port_num { + DSAF_ROCE_PORT_0, + DSAF_ROCE_PORT_1, + DSAF_ROCE_PORT_2, + DSAF_ROCE_PORT_3, + DSAF_ROCE_PORT_4, + DSAF_ROCE_PORT_5, +}; + +enum dsaf_roce_qos_sl { + DSAF_ROCE_SL_0, + DSAF_ROCE_SL_1, + DSAF_ROCE_SL_2, + DSAF_ROCE_SL_3, +}; + #define DSAF_STATS_READ(p, offset) (*((u64 *)((u8 *)(p) + (offset)))) #define HNS_DSAF_IS_DEBUG(dev) (dev->dsaf_mode == DSAF_MODE_DISABLE_SP) @@ -419,6 +445,10 @@ int hns_dsaf_get_mac_entry_by_index( void hns_dsaf_fix_mac_mode(struct hns_mac_cb *mac_cb); +void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable); + +void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable); + int hns_dsaf_ae_init(struct dsaf_device *dsaf_dev); void hns_dsaf_ae_uninit(struct dsaf_device *dsaf_dev); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c index 611b67b6f450..36b9f791cf2f 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_misc.c @@ -231,6 +231,42 @@ static void hns_dsaf_xge_core_srst_by_port(struct dsaf_device *dsaf_dev, dsaf_write_sub(dsaf_dev, reg_addr, reg_val); } +/** + * hns_dsaf_srst_chns - reset dsaf channels + * @dsaf_dev: dsaf device struct pointer + * @msk: xbar channels mask value: + * bit0-5 for xge0-5 + * bit6-11 for ppe0-5 + * bit12-17 for roce0-5 + * bit18-19 for com/dfx + * @enable: false - request reset , true - drop reset + */ +void hns_dsaf_srst_chns(struct dsaf_device *dsaf_dev, u32 msk, bool enable) +{ + u32 reg_addr; + + if (!enable) + reg_addr = DSAF_SUB_SC_DSAF_RESET_REQ_REG; + else + reg_addr = DSAF_SUB_SC_DSAF_RESET_DREQ_REG; + + dsaf_write_sub(dsaf_dev, reg_addr, msk); +} + +void hns_dsaf_roce_srst(struct dsaf_device *dsaf_dev, bool enable) +{ + if (!enable) { + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_RESET_REQ_REG, 1); + } else { + dsaf_write_sub(dsaf_dev, + DSAF_SUB_SC_ROCEE_CLK_DIS_REG, 1); + dsaf_write_sub(dsaf_dev, + DSAF_SUB_SC_ROCEE_RESET_DREQ_REG, 1); + msleep(20); + dsaf_write_sub(dsaf_dev, DSAF_SUB_SC_ROCEE_CLK_EN_REG, 1); + } +} + static void hns_dsaf_xge_core_srst_by_port_acpi(struct dsaf_device *dsaf_dev, u32 port, bool dereset) diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h index 235f74444b1d..13c16ab7be48 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_reg.h @@ -77,6 +77,12 @@ #define DSAF_SUB_SC_PPE_RESET_DREQ_REG 0xA4C #define DSAF_SUB_SC_RCB_PPE_COM_RESET_REQ_REG 0xA88 #define DSAF_SUB_SC_RCB_PPE_COM_RESET_DREQ_REG 0xA8C +#define DSAF_SUB_SC_DSAF_RESET_REQ_REG 0xAA8 +#define DSAF_SUB_SC_ROCEE_RESET_REQ_REG 0xA50 +#define DSAF_SUB_SC_DSAF_RESET_DREQ_REG 0xAAC +#define DSAF_SUB_SC_ROCEE_CLK_DIS_REG 0x32C +#define DSAF_SUB_SC_ROCEE_RESET_DREQ_REG 0xA54 +#define DSAF_SUB_SC_ROCEE_CLK_EN_REG 0x328 #define DSAF_SUB_SC_LIGHT_MODULE_DETECT_EN_REG 0x2060 #define DSAF_SUB_SC_TCAM_MBIST_EN_REG 0x2300 #define DSAF_SUB_SC_DSAF_CLK_ST_REG 0x5300 @@ -133,6 +139,8 @@ #define DSAF_ROCEE_INT_STS_0_REG 0x200 #define DSAFV2_SERDES_LBK_0_REG 0x220 #define DSAF_PAUSE_CFG_REG 0x240 +#define DSAF_ROCE_PORT_MAP_REG 0x2A0 +#define DSAF_ROCE_SL_MAP_REG 0x2A4 #define DSAF_PPE_QID_CFG_0_REG 0x300 #define DSAF_SW_PORT_TYPE_0_REG 0x320 #define DSAF_STP_PORT_TYPE_0_REG 0x340 @@ -178,6 +186,7 @@ #define DSAF_SBM_BP_CFG_2_XGE_REG_0_REG 0x200C #define DSAF_SBM_BP_CFG_2_PPE_REG_0_REG 0x230C #define DSAF_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x260C +#define DSAF_SBM_ROCEE_CFG_REG_REG 0x2380 #define DSAFV2_SBM_BP_CFG_2_ROCEE_REG_0_REG 0x238C #define DSAF_SBM_FREE_CNT_0_0_REG 0x2010 #define DSAF_SBM_FREE_CNT_1_0_REG 0x2014 @@ -796,6 +805,9 @@ #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_S 9 #define DSAFV2_SBM_CFG4_RESET_BUF_NUM_NO_PFC_M (((1ULL << 9) - 1) << 9) +#define DSAF_CHNS_MASK 0x3f000 +#define DSAF_SBM_ROCEE_CFG_CRD_EN_B 2 +#define SRST_TIME_INTERVAL 20 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_S 0 #define DSAFV2_SBM_CFG2_ROCEE_SET_BUF_NUM_M (((1ULL << 8) - 1) << 0) #define DSAFV2_SBM_CFG2_ROCEE_RESET_BUF_NUM_S 8 diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.c b/drivers/net/ethernet/hisilicon/hns/hns_enet.c index d7e1f8c7ae92..059aaeda46b1 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.c @@ -994,10 +994,10 @@ static void hns_nic_adjust_link(struct net_device *ndev) struct hnae_handle *h = priv->ae_handle; int state = 1; - if (priv->phy) { + if (ndev->phydev) { h->dev->ops->adjust_link(h, ndev->phydev->speed, ndev->phydev->duplex); - state = priv->phy->link; + state = ndev->phydev->link; } state = state && h->dev->ops->get_status(h); @@ -1022,7 +1022,6 @@ static void hns_nic_adjust_link(struct net_device *ndev) */ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) { - struct hns_nic_priv *priv = netdev_priv(ndev); struct phy_device *phy_dev = h->phy_dev; int ret; @@ -1046,8 +1045,6 @@ int hns_nic_init_phy(struct net_device *ndev, struct hnae_handle *h) if (h->phy_if == PHY_INTERFACE_MODE_XGMII) phy_dev->autoneg = false; - priv->phy = phy_dev; - return 0; } @@ -1224,8 +1221,8 @@ static int hns_nic_net_up(struct net_device *ndev) if (ret) goto out_start_err; - if (priv->phy) - phy_start(priv->phy); + if (ndev->phydev) + phy_start(ndev->phydev); clear_bit(NIC_STATE_DOWN, &priv->state); (void)mod_timer(&priv->service_timer, jiffies + SERVICE_TIMER_HZ); @@ -1259,8 +1256,8 @@ static void hns_nic_net_down(struct net_device *ndev) netif_tx_disable(ndev); priv->link = 0; - if (priv->phy) - phy_stop(priv->phy); + if (ndev->phydev) + phy_stop(ndev->phydev); ops = priv->ae_handle->dev->ops; @@ -1359,8 +1356,7 @@ static void hns_nic_net_timeout(struct net_device *ndev) static int hns_nic_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; if (!netif_running(netdev)) return -EINVAL; @@ -2017,9 +2013,8 @@ static int hns_nic_dev_remove(struct platform_device *pdev) hns_nic_uninit_ring_data(priv); priv->ring_data = NULL; - if (priv->phy) - phy_disconnect(priv->phy); - priv->phy = NULL; + if (ndev->phydev) + phy_disconnect(ndev->phydev); if (!IS_ERR_OR_NULL(priv->ae_handle)) hnae_put_handle(priv->ae_handle); diff --git a/drivers/net/ethernet/hisilicon/hns/hns_enet.h b/drivers/net/ethernet/hisilicon/hns/hns_enet.h index 44bb3015eed3..5b412de350aa 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_enet.h +++ b/drivers/net/ethernet/hisilicon/hns/hns_enet.h @@ -59,7 +59,6 @@ struct hns_nic_priv { u32 port_id; int phy_mode; int phy_led_val; - struct phy_device *phy; struct net_device *netdev; struct device *dev; struct hnae_handle *ae_handle; diff --git a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c index ab33487a5321..47e59bbfd061 100644 --- a/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c +++ b/drivers/net/ethernet/hisilicon/hns/hns_ethtool.c @@ -48,9 +48,9 @@ static u32 hns_nic_get_link(struct net_device *net_dev) h = priv->ae_handle; - if (priv->phy) { - if (!genphy_read_status(priv->phy)) - link_stat = priv->phy->link; + if (net_dev->phydev) { + if (!genphy_read_status(net_dev->phydev)) + link_stat = net_dev->phydev->link; else link_stat = 0; } @@ -64,15 +64,14 @@ static u32 hns_nic_get_link(struct net_device *net_dev) } static void hns_get_mdix_mode(struct net_device *net_dev, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { int mdix_ctrl, mdix, retval, is_resolved; - struct hns_nic_priv *priv = netdev_priv(net_dev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = net_dev->phydev; if (!phy_dev || !phy_dev->mdio.bus) { - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; return; } @@ -89,35 +88,35 @@ static void hns_get_mdix_mode(struct net_device *net_dev, switch (mdix_ctrl) { case 0x0: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI; break; case 0x1: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_X; break; case 0x3: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; break; default: - cmd->eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix_ctrl = ETH_TP_MDI_INVALID; break; } if (!is_resolved) - cmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + cmd->base.eth_tp_mdix = ETH_TP_MDI_INVALID; else if (mdix) - cmd->eth_tp_mdix = ETH_TP_MDI_X; + cmd->base.eth_tp_mdix = ETH_TP_MDI_X; else - cmd->eth_tp_mdix = ETH_TP_MDI; + cmd->base.eth_tp_mdix = ETH_TP_MDI; } /** - *hns_nic_get_settings - implement ethtool get settings + *hns_nic_get_link_ksettings - implement ethtool get link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_get_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_get_link_ksettings(struct net_device *net_dev, + struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -125,6 +124,7 @@ static int hns_nic_get_settings(struct net_device *net_dev, int ret; u8 duplex; u16 speed; + u32 supported, advertising; if (!priv || !priv->ae_handle) return -ESRCH; @@ -139,38 +139,43 @@ static int hns_nic_get_settings(struct net_device *net_dev, return -EINVAL; } - /* When there is no phy, autoneg is off. */ - cmd->autoneg = false; - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + ethtool_convert_link_mode_to_legacy_u32(&supported, + cmd->link_modes.supported); + ethtool_convert_link_mode_to_legacy_u32(&advertising, + cmd->link_modes.advertising); - if (priv->phy) - (void)phy_ethtool_gset(priv->phy, cmd); + /* When there is no phy, autoneg is off. */ + cmd->base.autoneg = false; + cmd->base.cmd = speed; + cmd->base.duplex = duplex; + + if (net_dev->phydev) + (void)phy_ethtool_ksettings_get(net_dev->phydev, cmd); link_stat = hns_nic_get_link(net_dev); if (!link_stat) { - ethtool_cmd_speed_set(cmd, (u32)SPEED_UNKNOWN); - cmd->duplex = DUPLEX_UNKNOWN; + cmd->base.speed = (u32)SPEED_UNKNOWN; + cmd->base.duplex = DUPLEX_UNKNOWN; } - if (cmd->autoneg) - cmd->advertising |= ADVERTISED_Autoneg; + if (cmd->base.autoneg) + advertising |= ADVERTISED_Autoneg; - cmd->supported |= h->if_support; + supported |= h->if_support; if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - cmd->supported |= SUPPORTED_TP; - cmd->advertising |= ADVERTISED_1000baseT_Full; + supported |= SUPPORTED_TP; + advertising |= ADVERTISED_1000baseT_Full; } else if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - cmd->supported |= SUPPORTED_FIBRE; - cmd->advertising |= ADVERTISED_10000baseKR_Full; + supported |= SUPPORTED_FIBRE; + advertising |= ADVERTISED_10000baseKR_Full; } switch (h->media_type) { case HNAE_MEDIA_TYPE_FIBER: - cmd->port = PORT_FIBRE; + cmd->base.port = PORT_FIBRE; break; case HNAE_MEDIA_TYPE_COPPER: - cmd->port = PORT_TP; + cmd->base.port = PORT_TP; break; case HNAE_MEDIA_TYPE_UNKNOWN: default: @@ -178,23 +183,27 @@ static int hns_nic_get_settings(struct net_device *net_dev, } if (!(AE_IS_VER1(priv->enet_ver) && h->port_type == HNAE_PORT_DEBUG)) - cmd->supported |= SUPPORTED_Pause; + supported |= SUPPORTED_Pause; - cmd->transceiver = XCVR_EXTERNAL; - cmd->mdio_support = (ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.supported, + supported); + ethtool_convert_legacy_u32_to_link_mode(cmd->link_modes.advertising, + advertising); + + cmd->base.mdio_support = ETH_MDIO_SUPPORTS_C45 | ETH_MDIO_SUPPORTS_C22; hns_get_mdix_mode(net_dev, cmd); return 0; } /** - *hns_nic_set_settings - implement ethtool set settings + *hns_nic_set_link_settings - implement ethtool set link ksettings *@net_dev: net_device - *@cmd: ethtool_cmd + *@cmd: ethtool_link_ksettings *retuen 0 - success , negative --fail */ -static int hns_nic_set_settings(struct net_device *net_dev, - struct ethtool_cmd *cmd) +static int hns_nic_set_link_ksettings(struct net_device *net_dev, + const struct ethtool_link_ksettings *cmd) { struct hns_nic_priv *priv = netdev_priv(net_dev); struct hnae_handle *h; @@ -208,24 +217,25 @@ static int hns_nic_set_settings(struct net_device *net_dev, return -ENODEV; h = priv->ae_handle; - speed = ethtool_cmd_speed(cmd); + speed = cmd->base.speed; if (h->phy_if == PHY_INTERFACE_MODE_XGMII) { - if (cmd->autoneg == AUTONEG_ENABLE || speed != SPEED_10000 || - cmd->duplex != DUPLEX_FULL) + if (cmd->base.autoneg == AUTONEG_ENABLE || + speed != SPEED_10000 || + cmd->base.duplex != DUPLEX_FULL) return -EINVAL; } else if (h->phy_if == PHY_INTERFACE_MODE_SGMII) { - if (!priv->phy && cmd->autoneg == AUTONEG_ENABLE) + if (!net_dev->phydev && cmd->base.autoneg == AUTONEG_ENABLE) return -EINVAL; - if (speed == SPEED_1000 && cmd->duplex == DUPLEX_HALF) + if (speed == SPEED_1000 && cmd->base.duplex == DUPLEX_HALF) return -EINVAL; - if (priv->phy) - return phy_ethtool_sset(priv->phy, cmd); + if (net_dev->phydev) + return phy_ethtool_ksettings_set(net_dev->phydev, cmd); if ((speed != SPEED_10 && speed != SPEED_100 && - speed != SPEED_1000) || (cmd->duplex != DUPLEX_HALF && - cmd->duplex != DUPLEX_FULL)) + speed != SPEED_1000) || (cmd->base.duplex != DUPLEX_HALF && + cmd->base.duplex != DUPLEX_FULL)) return -EINVAL; } else { netdev_err(net_dev, "Not supported!"); @@ -233,7 +243,7 @@ static int hns_nic_set_settings(struct net_device *net_dev, } if (h->dev->ops->adjust_link) { - h->dev->ops->adjust_link(h, (int)speed, cmd->duplex); + h->dev->ops->adjust_link(h, (int)speed, cmd->base.duplex); return 0; } @@ -305,7 +315,7 @@ static int __lb_setup(struct net_device *ndev, { int ret = 0; struct hns_nic_priv *priv = netdev_priv(ndev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = ndev->phydev; struct hnae_handle *h = priv->ae_handle; switch (loop) { @@ -910,7 +920,7 @@ void hns_get_strings(struct net_device *netdev, u32 stringset, u8 *data) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_SERDES], ETH_GSTRING_LEN); buff += ETH_GSTRING_LEN; - if ((priv->phy) && (!priv->phy->is_c45)) + if ((netdev->phydev) && (!netdev->phydev->is_c45)) memcpy(buff, hns_nic_test_strs[MAC_INTERNALLOOP_PHY], ETH_GSTRING_LEN); @@ -996,7 +1006,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) if (priv->ae_handle->phy_if == PHY_INTERFACE_MODE_XGMII) cnt--; - if ((!priv->phy) || (priv->phy->is_c45)) + if ((!netdev->phydev) || (netdev->phydev->is_c45)) cnt--; return cnt; @@ -1015,8 +1025,7 @@ int hns_get_sset_count(struct net_device *netdev, int stringset) int hns_phy_led_set(struct net_device *netdev, int value) { int retval; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; retval = phy_write(phy_dev, HNS_PHY_PAGE_REG, HNS_PHY_PAGE_LED); retval |= phy_write(phy_dev, HNS_LED_FC_REG, value); @@ -1039,7 +1048,7 @@ int hns_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { struct hns_nic_priv *priv = netdev_priv(netdev); struct hnae_handle *h = priv->ae_handle; - struct phy_device *phy_dev = priv->phy; + struct phy_device *phy_dev = netdev->phydev; int ret; if (phy_dev) @@ -1159,8 +1168,7 @@ static int hns_get_regs_len(struct net_device *net_dev) static int hns_nic_nway_reset(struct net_device *netdev) { int ret = 0; - struct hns_nic_priv *priv = netdev_priv(netdev); - struct phy_device *phy = priv->phy; + struct phy_device *phy = netdev->phydev; if (netif_running(netdev)) { if (phy) @@ -1264,11 +1272,9 @@ static int hns_get_rxnfc(struct net_device *netdev, return 0; } -static struct ethtool_ops hns_ethtool_ops = { +static const struct ethtool_ops hns_ethtool_ops = { .get_drvinfo = hns_nic_get_drvinfo, .get_link = hns_nic_get_link, - .get_settings = hns_nic_get_settings, - .set_settings = hns_nic_set_settings, .get_ringparam = hns_get_ringparam, .get_pauseparam = hns_get_pauseparam, .set_pauseparam = hns_set_pauseparam, @@ -1288,6 +1294,8 @@ static struct ethtool_ops hns_ethtool_ops = { .get_rxfh = hns_get_rss, .set_rxfh = hns_set_rss, .get_rxnfc = hns_get_rxnfc, + .get_link_ksettings = hns_nic_get_link_ksettings, + .set_link_ksettings = hns_nic_set_link_ksettings, }; void hns_ethtool_set_ops(struct net_device *ndev) diff --git a/drivers/net/ethernet/i825xx/82596.c b/drivers/net/ethernet/i825xx/82596.c index befb4ac3e2b0..ce235b776793 100644 --- a/drivers/net/ethernet/i825xx/82596.c +++ b/drivers/net/ethernet/i825xx/82596.c @@ -89,10 +89,10 @@ static char version[] __initdata = #define DEB(x,y) if (i596_debug & (x)) y -#if defined(CONFIG_MVME16x_NET) || defined(CONFIG_MVME16x_NET_MODULE) +#if IS_ENABLED(CONFIG_MVME16x_NET) #define ENABLE_MVME16x_NET #endif -#if defined(CONFIG_BVME6000_NET) || defined(CONFIG_BVME6000_NET_MODULE) +#if IS_ENABLED(CONFIG_BVME6000_NET) #define ENABLE_BVME6000_NET #endif diff --git a/drivers/net/ethernet/ibm/emac/core.c b/drivers/net/ethernet/ibm/emac/core.c index 7af09cbc53f0..8f139197f1aa 100644 --- a/drivers/net/ethernet/ibm/emac/core.c +++ b/drivers/net/ethernet/ibm/emac/core.c @@ -2780,7 +2780,7 @@ static int emac_probe(struct platform_device *ofdev) /* Get interrupts. EMAC irq is mandatory, WOL irq is optional */ dev->emac_irq = irq_of_parse_and_map(np, 0); dev->wol_irq = irq_of_parse_and_map(np, 1); - if (dev->emac_irq == NO_IRQ) { + if (!dev->emac_irq) { printk(KERN_ERR "%s: Can't map main interrupt\n", np->full_name); goto err_free; } @@ -2943,9 +2943,9 @@ static int emac_probe(struct platform_device *ofdev) err_reg_unmap: iounmap(dev->emacp); err_irq_unmap: - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); err_free: free_netdev(ndev); @@ -2987,9 +2987,9 @@ static int emac_remove(struct platform_device *ofdev) emac_dbg_unregister(dev); iounmap(dev->emacp); - if (dev->wol_irq != NO_IRQ) + if (dev->wol_irq) irq_dispose_mapping(dev->wol_irq); - if (dev->emac_irq != NO_IRQ) + if (dev->emac_irq) irq_dispose_mapping(dev->emac_irq); free_netdev(dev->ndev); diff --git a/drivers/net/ethernet/ibm/emac/mal.c b/drivers/net/ethernet/ibm/emac/mal.c index fdb5cdb3cd15..aaf6fec566b5 100644 --- a/drivers/net/ethernet/ibm/emac/mal.c +++ b/drivers/net/ethernet/ibm/emac/mal.c @@ -597,9 +597,8 @@ static int mal_probe(struct platform_device *ofdev) mal->rxde_irq = irq_of_parse_and_map(ofdev->dev.of_node, 4); } - if (mal->txeob_irq == NO_IRQ || mal->rxeob_irq == NO_IRQ || - mal->serr_irq == NO_IRQ || mal->txde_irq == NO_IRQ || - mal->rxde_irq == NO_IRQ) { + if (!mal->txeob_irq || !mal->rxeob_irq || !mal->serr_irq || + !mal->txde_irq || !mal->rxde_irq) { printk(KERN_ERR "mal%d: failed to map interrupts !\n", index); err = -ENODEV; diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 88f3c85fb04a..bfe17d9c022d 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -203,7 +203,8 @@ static void free_long_term_buff(struct ibmvnic_adapter *adapter, struct device *dev = &adapter->vdev->dev; dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); - send_request_unmap(adapter, ltb->map_id); + if (!adapter->failover) + send_request_unmap(adapter, ltb->map_id); } static int alloc_rx_pool(struct ibmvnic_adapter *adapter, @@ -522,7 +523,8 @@ static int ibmvnic_close(struct net_device *netdev) for (i = 0; i < adapter->req_rx_queues; i++) napi_disable(&adapter->napi[i]); - netif_tx_stop_all_queues(netdev); + if (!adapter->failover) + netif_tx_stop_all_queues(netdev); if (adapter->bounce_buffer) { if (!dma_mapping_error(dev, adapter->bounce_buffer_dma)) { @@ -1422,7 +1424,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) scrq = adapter->tx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_tx_irq_failed; @@ -1442,7 +1444,7 @@ static int init_sub_crq_irqs(struct ibmvnic_adapter *adapter) for (i = 0; i < adapter->req_rx_queues; i++) { scrq = adapter->rx_scrq[i]; scrq->irq = irq_create_mapping(NULL, scrq->hw_irq); - if (scrq->irq == NO_IRQ) { + if (!scrq->irq) { rc = -EINVAL; dev_err(dev, "Error mapping irq\n"); goto req_rx_irq_failed; @@ -2777,12 +2779,6 @@ static void handle_control_ras_rsp(union ibmvnic_crq *crq, } } -static int ibmvnic_fw_comp_open(struct inode *inode, struct file *file) -{ - file->private_data = inode->i_private; - return 0; -} - static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, loff_t *ppos) { @@ -2834,7 +2830,7 @@ static ssize_t trace_read(struct file *file, char __user *user_buf, size_t len, static const struct file_operations trace_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_read, }; @@ -2884,7 +2880,7 @@ static ssize_t paused_write(struct file *file, const char __user *user_buf, static const struct file_operations paused_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = paused_read, .write = paused_write, }; @@ -2932,7 +2928,7 @@ static ssize_t tracing_write(struct file *file, const char __user *user_buf, static const struct file_operations tracing_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = tracing_read, .write = tracing_write, }; @@ -2985,7 +2981,7 @@ static ssize_t error_level_write(struct file *file, const char __user *user_buf, static const struct file_operations error_level_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = error_level_read, .write = error_level_write, }; @@ -3036,7 +3032,7 @@ static ssize_t trace_level_write(struct file *file, const char __user *user_buf, static const struct file_operations trace_level_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_level_read, .write = trace_level_write, }; @@ -3089,7 +3085,7 @@ static ssize_t trace_buff_size_write(struct file *file, static const struct file_operations trace_size_ops = { .owner = THIS_MODULE, - .open = ibmvnic_fw_comp_open, + .open = simple_open, .read = trace_buff_size_read, .write = trace_buff_size_write, }; @@ -3280,6 +3276,10 @@ static void ibmvnic_handle_crq(union ibmvnic_crq *crq, rc = ibmvnic_send_crq_init(adapter); if (rc) dev_err(dev, "Error sending init rc=%ld\n", rc); + } else if (gen_crq->cmd == IBMVNIC_DEVICE_FAILOVER) { + dev_info(dev, "Backing device failover detected\n"); + netif_carrier_off(netdev); + adapter->failover = true; } else { /* The adapter lost the connection */ dev_err(dev, "Virtual Adapter failed (rc=%d)\n", @@ -3615,8 +3615,18 @@ static void handle_crq_init_rsp(struct work_struct *work) struct device *dev = &adapter->vdev->dev; struct net_device *netdev = adapter->netdev; unsigned long timeout = msecs_to_jiffies(30000); + bool restart = false; int rc; + if (adapter->failover) { + release_sub_crqs(adapter); + if (netif_running(netdev)) { + netif_tx_disable(netdev); + ibmvnic_close(netdev); + restart = true; + } + } + send_version_xchg(adapter); reinit_completion(&adapter->init_done); if (!wait_for_completion_timeout(&adapter->init_done, timeout)) { @@ -3645,6 +3655,17 @@ static void handle_crq_init_rsp(struct work_struct *work) netdev->real_num_tx_queues = adapter->req_tx_queues; + if (adapter->failover) { + adapter->failover = false; + if (restart) { + rc = ibmvnic_open(netdev); + if (rc) + goto restart_failed; + } + netif_carrier_on(netdev); + return; + } + rc = register_netdev(netdev); if (rc) { dev_err(dev, @@ -3655,6 +3676,8 @@ static void handle_crq_init_rsp(struct work_struct *work) return; +restart_failed: + dev_err(dev, "Failed to restart ibmvnic, rc=%d\n", rc); register_failed: release_sub_crqs(adapter); task_failed: @@ -3692,6 +3715,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) dev_set_drvdata(&dev->dev, netdev); adapter->vdev = dev; adapter->netdev = netdev; + adapter->failover = false; ether_addr_copy(adapter->mac_addr, mac_addr_p); ether_addr_copy(netdev->dev_addr, adapter->mac_addr); @@ -3721,6 +3745,7 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) if (dma_mapping_error(&dev->dev, adapter->stats_token)) { if (!firmware_has_feature(FW_FEATURE_CMO)) dev_err(&dev->dev, "Couldn't map stats buffer\n"); + rc = -ENOMEM; goto free_crq; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.h b/drivers/net/ethernet/ibm/ibmvnic.h index e82898fd518e..bfc84c7d0e11 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.h +++ b/drivers/net/ethernet/ibm/ibmvnic.h @@ -830,6 +830,7 @@ enum ibmvfc_crq_format { IBMVNIC_CRQ_INIT = 0x01, IBMVNIC_CRQ_INIT_COMPLETE = 0x02, IBMVNIC_PARTITION_MIGRATED = 0x06, + IBMVNIC_DEVICE_FAILOVER = 0x08, }; struct ibmvnic_crq_queue { @@ -1047,4 +1048,5 @@ struct ibmvnic_adapter { u8 map_id; struct work_struct vnic_crq_init; + bool failover; }; diff --git a/drivers/net/ethernet/intel/e1000e/ptp.c b/drivers/net/ethernet/intel/e1000e/ptp.c index 2e1b17ad52a3..ad03763e009a 100644 --- a/drivers/net/ethernet/intel/e1000e/ptp.c +++ b/drivers/net/ethernet/intel/e1000e/ptp.c @@ -334,7 +334,7 @@ void e1000e_ptp_init(struct e1000_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; e_err("ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { e_info("registered PHC clock\n"); } } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k.h b/drivers/net/ethernet/intel/fm10k/fm10k.h index c4cf08dcf5af..4d19e46f7c55 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k.h @@ -240,9 +240,7 @@ struct fm10k_iov_data { struct fm10k_vf_info vf_info[0]; }; -#define fm10k_vxlan_port_for_each(vp, intfc) \ - list_for_each_entry(vp, &(intfc)->vxlan_port, list) -struct fm10k_vxlan_port { +struct fm10k_udp_port { struct list_head list; sa_family_t sa_family; __be16 port; @@ -335,8 +333,9 @@ struct fm10k_intfc { u32 reta[FM10K_RETA_SIZE]; u32 rssrk[FM10K_RSSRK_SIZE]; - /* VXLAN port tracking information */ + /* UDP encapsulation port tracking information */ struct list_head vxlan_port; + struct list_head geneve_port; #ifdef CONFIG_DEBUG_FS struct dentry *dbg_intfc; @@ -458,7 +457,7 @@ __be16 fm10k_tx_encap_offload(struct sk_buff *skb); netdev_tx_t fm10k_xmit_frame_ring(struct sk_buff *skb, struct fm10k_ring *tx_ring); void fm10k_tx_timeout_reset(struct fm10k_intfc *interface); -u64 fm10k_get_tx_pending(struct fm10k_ring *ring); +u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw); bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring); void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count); @@ -496,7 +495,6 @@ int fm10k_close(struct net_device *netdev); /* Ethtool */ void fm10k_set_ethtool_ops(struct net_device *dev); -u32 fm10k_get_reta_size(struct net_device *netdev); void fm10k_write_reta(struct fm10k_intfc *interface, const u32 *indir); /* IOV */ @@ -509,7 +507,7 @@ int fm10k_iov_configure(struct pci_dev *pdev, int num_vfs); s32 fm10k_iov_update_pvid(struct fm10k_intfc *interface, u16 glort, u16 pvid); int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac); int fm10k_ndo_set_vf_vlan(struct net_device *netdev, - int vf_idx, u16 vid, u8 qos); + int vf_idx, u16 vid, u8 qos, __be16 vlan_proto); int fm10k_ndo_set_vf_bw(struct net_device *netdev, int vf_idx, int rate, int unused); int fm10k_ndo_get_vf_config(struct net_device *netdev, diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.c b/drivers/net/ethernet/intel/fm10k/fm10k_common.c index d6baaea8bc7c..dd95ac4f4c64 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.c @@ -207,6 +207,9 @@ s32 fm10k_disable_queues_generic(struct fm10k_hw *hw, u16 q_cnt) /* clear tx_ready to prevent any false hits for reset */ hw->mac.tx_ready = false; + if (FM10K_REMOVED(hw->hw_addr)) + return 0; + /* clear the enable bit for all rings */ for (i = 0; i < q_cnt; i++) { reg = fm10k_read_reg(hw, FM10K_TXDCTL(i)); diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_common.h b/drivers/net/ethernet/intel/fm10k/fm10k_common.h index 50f71e997448..d51f9c7a47ff 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_common.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_common.h @@ -34,7 +34,7 @@ u32 fm10k_read_reg(struct fm10k_hw *hw, int reg); /* write operations, indexed using DWORDS */ #define fm10k_write_reg(hw, reg, val) \ do { \ - u32 __iomem *hw_addr = ACCESS_ONCE((hw)->hw_addr); \ + u32 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \ if (!FM10K_REMOVED(hw_addr)) \ writel((val), &hw_addr[(reg)]); \ } while (0) @@ -42,7 +42,7 @@ do { \ /* Switch register write operations, index using DWORDS */ #define fm10k_write_sw_reg(hw, reg, val) \ do { \ - u32 __iomem *sw_addr = ACCESS_ONCE((hw)->sw_addr); \ + u32 __iomem *sw_addr = READ_ONCE((hw)->sw_addr); \ if (!FM10K_REMOVED(sw_addr)) \ writel((val), &sw_addr[(reg)]); \ } while (0) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c index c04cbe9c9f7c..5241e0873397 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c @@ -966,7 +966,7 @@ static int fm10k_set_priv_flags(struct net_device *netdev, u32 priv_flags) return 0; } -u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) +static u32 fm10k_get_reta_size(struct net_device __always_unused *netdev) { return FM10K_RETA_SIZE * FM10K_RETA_ENTRIES_PER_REG; } @@ -1182,6 +1182,7 @@ static const struct ethtool_ops fm10k_ethtool_ops = { .set_rxfh = fm10k_set_rssh, .get_channels = fm10k_get_channels, .set_channels = fm10k_set_channels, + .get_ts_info = ethtool_op_get_ts_info, }; void fm10k_set_ethtool_ops(struct net_device *dev) diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c index 47f0743ec03b..5f4dac0d36ef 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_iov.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_iov.c @@ -51,7 +51,7 @@ s32 fm10k_iov_event(struct fm10k_intfc *interface) int i; /* if there is no iov_data then there is no mailbox to process */ - if (!ACCESS_ONCE(interface->iov_data)) + if (!READ_ONCE(interface->iov_data)) return 0; rcu_read_lock(); @@ -99,7 +99,7 @@ s32 fm10k_iov_mbx(struct fm10k_intfc *interface) int i; /* if there is no iov_data then there is no mailbox to process */ - if (!ACCESS_ONCE(interface->iov_data)) + if (!READ_ONCE(interface->iov_data)) return 0; rcu_read_lock(); @@ -445,7 +445,7 @@ int fm10k_ndo_set_vf_mac(struct net_device *netdev, int vf_idx, u8 *mac) } int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, - u8 qos) + u8 qos, __be16 vlan_proto) { struct fm10k_intfc *interface = netdev_priv(netdev); struct fm10k_iov_data *iov_data = interface->iov_data; @@ -460,6 +460,10 @@ int fm10k_ndo_set_vf_vlan(struct net_device *netdev, int vf_idx, u16 vid, if (qos || (vid > (VLAN_VID_MASK - 1))) return -EINVAL; + /* VF VLAN Protocol part to default is unsupported */ + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + vf_info = &iov_data->vf_info[vf_idx]; /* exit if there is nothing to do */ diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c index e9767b6366a8..5de937852436 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c @@ -56,7 +56,8 @@ static int __init fm10k_init_module(void) pr_info("%s\n", fm10k_copyright); /* create driver workqueue */ - fm10k_workqueue = alloc_workqueue("fm10k", WQ_MEM_RECLAIM, 0); + fm10k_workqueue = alloc_workqueue("%s", WQ_MEM_RECLAIM, 0, + fm10k_driver_name); fm10k_dbg_init(); @@ -651,11 +652,11 @@ static int fm10k_clean_rx_irq(struct fm10k_q_vector *q_vector, static struct ethhdr *fm10k_port_is_vxlan(struct sk_buff *skb) { struct fm10k_intfc *interface = netdev_priv(skb->dev); - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *vxlan_port; /* we can only offload a vxlan if we recognize it as such */ vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); + struct fm10k_udp_port, list); if (!vxlan_port) return NULL; @@ -1128,13 +1129,24 @@ static u64 fm10k_get_tx_completed(struct fm10k_ring *ring) return ring->stats.packets; } -u64 fm10k_get_tx_pending(struct fm10k_ring *ring) +/** + * fm10k_get_tx_pending - how many Tx descriptors not processed + * @ring: the ring structure + * @in_sw: is tx_pending being checked in SW or in HW? + */ +u64 fm10k_get_tx_pending(struct fm10k_ring *ring, bool in_sw) { struct fm10k_intfc *interface = ring->q_vector->interface; struct fm10k_hw *hw = &interface->hw; + u32 head, tail; - u32 head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); - u32 tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); + if (likely(in_sw)) { + head = ring->next_to_clean; + tail = ring->next_to_use; + } else { + head = fm10k_read_reg(hw, FM10K_TDH(ring->reg_idx)); + tail = fm10k_read_reg(hw, FM10K_TDT(ring->reg_idx)); + } return ((head <= tail) ? tail : tail + ring->count) - head; } @@ -1143,7 +1155,7 @@ bool fm10k_check_tx_hang(struct fm10k_ring *tx_ring) { u32 tx_done = fm10k_get_tx_completed(tx_ring); u32 tx_done_old = tx_ring->tx_stats.tx_done_old; - u32 tx_pending = fm10k_get_tx_pending(tx_ring); + u32 tx_pending = fm10k_get_tx_pending(tx_ring, true); clear_check_for_tx_hang(tx_ring); @@ -1397,7 +1409,7 @@ static void fm10k_update_itr(struct fm10k_ring_container *ring_container) * that the calculation will never get below a 1. The bit shift * accounts for changes in the ITR due to PCIe link speed. */ - itr_round = ACCESS_ONCE(ring_container->itr_scale) + 8; + itr_round = READ_ONCE(ring_container->itr_scale) + 8; avg_wire_size += BIT(itr_round) - 1; avg_wire_size >>= itr_round; @@ -1473,7 +1485,7 @@ static int fm10k_poll(struct napi_struct *napi, int budget) /* re-enable the q_vector */ fm10k_qv_enable(q_vector); - return 0; + return min(work_done, budget - 1); } /** diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 20a5bbe3f536..05629381be6b 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -384,129 +384,171 @@ static void fm10k_request_glort_range(struct fm10k_intfc *interface) } /** - * fm10k_del_vxlan_port_all + * fm10k_free_udp_port_info * @interface: board private structure * - * This function frees the entire vxlan_port list + * This function frees both geneve_port and vxlan_port structures **/ -static void fm10k_del_vxlan_port_all(struct fm10k_intfc *interface) +static void fm10k_free_udp_port_info(struct fm10k_intfc *interface) { - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port; - /* flush all entries from list */ - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); - while (vxlan_port) { - list_del(&vxlan_port->list); - kfree(vxlan_port); - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, - list); + /* flush all entries from vxlan list */ + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, list); + while (port) { + list_del(&port->list); + kfree(port); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, + list); + } + + /* flush all entries from geneve list */ + port = list_first_entry_or_null(&interface->geneve_port, + struct fm10k_udp_port, list); + while (port) { + list_del(&port->list); + kfree(port); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, + list); } } /** - * fm10k_restore_vxlan_port + * fm10k_restore_udp_port_info * @interface: board private structure * - * This function restores the value in the tunnel_cfg register after reset + * This function restores the value in the tunnel_cfg register(s) after reset **/ -static void fm10k_restore_vxlan_port(struct fm10k_intfc *interface) +static void fm10k_restore_udp_port_info(struct fm10k_intfc *interface) { struct fm10k_hw *hw = &interface->hw; - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port; /* only the PF supports configuring tunnels */ if (hw->mac.type != fm10k_mac_pf) return; - vxlan_port = list_first_entry_or_null(&interface->vxlan_port, - struct fm10k_vxlan_port, list); + port = list_first_entry_or_null(&interface->vxlan_port, + struct fm10k_udp_port, list); /* restore tunnel configuration register */ fm10k_write_reg(hw, FM10K_TUNNEL_CFG, - (vxlan_port ? ntohs(vxlan_port->port) : 0) | + (port ? ntohs(port->port) : 0) | (ETH_P_TEB << FM10K_TUNNEL_CFG_NVGRE_SHIFT)); + + port = list_first_entry_or_null(&interface->geneve_port, + struct fm10k_udp_port, list); + + /* restore Geneve tunnel configuration register */ + fm10k_write_reg(hw, FM10K_TUNNEL_CFG_GENEVE, + (port ? ntohs(port->port) : 0)); +} + +static struct fm10k_udp_port * +fm10k_remove_tunnel_port(struct list_head *ports, + struct udp_tunnel_info *ti) +{ + struct fm10k_udp_port *port; + + list_for_each_entry(port, ports, list) { + if ((port->port == ti->port) && + (port->sa_family == ti->sa_family)) { + list_del(&port->list); + return port; + } + } + + return NULL; +} + +static void fm10k_insert_tunnel_port(struct list_head *ports, + struct udp_tunnel_info *ti) +{ + struct fm10k_udp_port *port; + + /* remove existing port entry from the list so that the newest items + * are always at the tail of the list. + */ + port = fm10k_remove_tunnel_port(ports, ti); + if (!port) { + port = kmalloc(sizeof(*port), GFP_ATOMIC); + if (!port) + return; + port->port = ti->port; + port->sa_family = ti->sa_family; + } + + list_add_tail(&port->list, ports); } /** - * fm10k_add_vxlan_port + * fm10k_udp_tunnel_add * @netdev: network interface device structure * @ti: Tunnel endpoint information * - * This function is called when a new VXLAN interface has added a new port - * number to the range that is currently in use for VXLAN. The new port - * number is always added to the tail so that the port number list should - * match the order in which the ports were allocated. The head of the list - * is always used as the VXLAN port number for offloads. + * This function is called when a new UDP tunnel port has been added. + * Due to hardware restrictions, only one port per type can be offloaded at + * once. **/ -static void fm10k_add_vxlan_port(struct net_device *dev, +static void fm10k_udp_tunnel_add(struct net_device *dev, struct udp_tunnel_info *ti) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_vxlan_port *vxlan_port; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; /* only the PF supports configuring tunnels */ if (interface->hw.mac.type != fm10k_mac_pf) return; - /* existing ports are pulled out so our new entry is always last */ - fm10k_vxlan_port_for_each(vxlan_port, interface) { - if ((vxlan_port->port == ti->port) && - (vxlan_port->sa_family == ti->sa_family)) { - list_del(&vxlan_port->list); - goto insert_tail; - } + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + fm10k_insert_tunnel_port(&interface->vxlan_port, ti); + break; + case UDP_TUNNEL_TYPE_GENEVE: + fm10k_insert_tunnel_port(&interface->geneve_port, ti); + break; + default: + return; } - /* allocate memory to track ports */ - vxlan_port = kmalloc(sizeof(*vxlan_port), GFP_ATOMIC); - if (!vxlan_port) - return; - vxlan_port->port = ti->port; - vxlan_port->sa_family = ti->sa_family; - -insert_tail: - /* add new port value to list */ - list_add_tail(&vxlan_port->list, &interface->vxlan_port); - - fm10k_restore_vxlan_port(interface); + fm10k_restore_udp_port_info(interface); } /** - * fm10k_del_vxlan_port + * fm10k_udp_tunnel_del * @netdev: network interface device structure * @ti: Tunnel endpoint information * - * This function is called when a new VXLAN interface has freed a port - * number from the range that is currently in use for VXLAN. The freed - * port is removed from the list and the new head is used to determine - * the port number for offloads. + * This function is called when a new UDP tunnel port is deleted. The freed + * port will be removed from the list, then we reprogram the offloaded port + * based on the head of the list. **/ -static void fm10k_del_vxlan_port(struct net_device *dev, +static void fm10k_udp_tunnel_del(struct net_device *dev, struct udp_tunnel_info *ti) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_vxlan_port *vxlan_port; + struct fm10k_udp_port *port = NULL; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; if (interface->hw.mac.type != fm10k_mac_pf) return; - /* find the port in the list and free it */ - fm10k_vxlan_port_for_each(vxlan_port, interface) { - if ((vxlan_port->port == ti->port) && - (vxlan_port->sa_family == ti->sa_family)) { - list_del(&vxlan_port->list); - kfree(vxlan_port); - break; - } + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + port = fm10k_remove_tunnel_port(&interface->vxlan_port, ti); + break; + case UDP_TUNNEL_TYPE_GENEVE: + port = fm10k_remove_tunnel_port(&interface->geneve_port, ti); + break; + default: + return; } - fm10k_restore_vxlan_port(interface); + /* if we did remove a port we need to free its memory */ + kfree(port); + + fm10k_restore_udp_port_info(interface); } /** @@ -555,7 +597,6 @@ int fm10k_open(struct net_device *netdev) if (err) goto err_set_queues; - /* update VXLAN port configuration */ udp_tunnel_get_rx_info(netdev); fm10k_up(interface); @@ -591,7 +632,7 @@ int fm10k_close(struct net_device *netdev) fm10k_qv_free_irq(interface); - fm10k_del_vxlan_port_all(interface); + fm10k_free_udp_port_info(interface); fm10k_free_all_tx_resources(interface); fm10k_free_all_rx_resources(interface); @@ -1055,7 +1096,7 @@ void fm10k_restore_rx_state(struct fm10k_intfc *interface) interface->xcast_mode = xcast_mode; /* Restore tunnel configuration */ - fm10k_restore_vxlan_port(interface); + fm10k_restore_udp_port_info(interface); } void fm10k_reset_rx_state(struct fm10k_intfc *interface) @@ -1098,7 +1139,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, rcu_read_lock(); for (i = 0; i < interface->num_rx_queues; i++) { - ring = ACCESS_ONCE(interface->rx_ring[i]); + ring = READ_ONCE(interface->rx_ring[i]); if (!ring) continue; @@ -1114,7 +1155,7 @@ static struct rtnl_link_stats64 *fm10k_get_stats64(struct net_device *netdev, } for (i = 0; i < interface->num_tx_queues; i++) { - ring = ACCESS_ONCE(interface->tx_ring[i]); + ring = READ_ONCE(interface->tx_ring[i]); if (!ring) continue; @@ -1299,7 +1340,7 @@ static void *fm10k_dfwd_add_station(struct net_device *dev, static void fm10k_dfwd_del_station(struct net_device *dev, void *priv) { struct fm10k_intfc *interface = netdev_priv(dev); - struct fm10k_l2_accel *l2_accel = ACCESS_ONCE(interface->l2_accel); + struct fm10k_l2_accel *l2_accel = READ_ONCE(interface->l2_accel); struct fm10k_dglort_cfg dglort = { 0 }; struct fm10k_hw *hw = &interface->hw; struct net_device *sdev = priv; @@ -1375,8 +1416,8 @@ static const struct net_device_ops fm10k_netdev_ops = { .ndo_set_vf_vlan = fm10k_ndo_set_vf_vlan, .ndo_set_vf_rate = fm10k_ndo_set_vf_bw, .ndo_get_vf_config = fm10k_ndo_get_vf_config, - .ndo_udp_tunnel_add = fm10k_add_vxlan_port, - .ndo_udp_tunnel_del = fm10k_del_vxlan_port, + .ndo_udp_tunnel_add = fm10k_udp_tunnel_add, + .ndo_udp_tunnel_del = fm10k_udp_tunnel_del, .ndo_dfwd_add_station = fm10k_dfwd_add_station, .ndo_dfwd_del_station = fm10k_dfwd_del_station, #ifdef CONFIG_NET_POLL_CONTROLLER diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index 774a5654bf42..b1a2f8437d59 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -62,7 +62,7 @@ u16 fm10k_read_pci_cfg_word(struct fm10k_hw *hw, u32 reg) u32 fm10k_read_reg(struct fm10k_hw *hw, int reg) { - u32 __iomem *hw_addr = ACCESS_ONCE(hw->hw_addr); + u32 __iomem *hw_addr = READ_ONCE(hw->hw_addr); u32 value = 0; if (FM10K_REMOVED(hw_addr)) @@ -133,7 +133,7 @@ static void fm10k_detach_subtask(struct fm10k_intfc *interface) /* check the real address space to see if we've recovered */ hw_addr = READ_ONCE(interface->uc_addr); value = readl(hw_addr); - if ((~value)) { + if (~value) { interface->hw.hw_addr = interface->uc_addr; netif_device_attach(netdev); interface->flags |= FM10K_FLAG_RESET_REQUESTED; @@ -734,15 +734,15 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, u64 rdba = ring->dma; struct fm10k_hw *hw = &interface->hw; u32 size = ring->count * sizeof(union fm10k_rx_desc); - u32 rxqctl = FM10K_RXQCTL_ENABLE | FM10K_RXQCTL_PF; - u32 rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY; + u32 rxqctl, rxdctl = FM10K_RXDCTL_WRITE_BACK_MIN_DELAY; u32 srrctl = FM10K_SRRCTL_BUFFER_CHAINING_EN; u32 rxint = FM10K_INT_MAP_DISABLE; u8 rx_pause = interface->rx_pause; u8 reg_idx = ring->reg_idx; /* disable queue to avoid issues while updating state */ - fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), 0); + rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx)); + rxqctl &= ~FM10K_RXQCTL_ENABLE; fm10k_write_flush(hw); /* possible poll here to verify ring resources have been cleaned */ @@ -797,6 +797,8 @@ static void fm10k_configure_rx_ring(struct fm10k_intfc *interface, fm10k_write_reg(hw, FM10K_RXINT(reg_idx), rxint); /* enable queue */ + rxqctl = fm10k_read_reg(hw, FM10K_RXQCTL(reg_idx)); + rxqctl |= FM10K_RXQCTL_ENABLE; fm10k_write_reg(hw, FM10K_RXQCTL(reg_idx), rxqctl); /* place buffers on ring for receive data */ @@ -1699,7 +1701,7 @@ void fm10k_down(struct fm10k_intfc *interface) /* start checking at the last ring to have pending Tx */ for (; i < interface->num_tx_queues; i++) - if (fm10k_get_tx_pending(interface->tx_ring[i])) + if (fm10k_get_tx_pending(interface->tx_ring[i], false)) break; /* if all the queues are drained, we can break now */ @@ -1835,8 +1837,9 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, interface->tx_itr = FM10K_TX_ITR_DEFAULT; interface->rx_itr = FM10K_ITR_ADAPTIVE | FM10K_RX_ITR_DEFAULT; - /* initialize vxlan_port list */ + /* initialize udp port lists */ INIT_LIST_HEAD(&interface->vxlan_port); + INIT_LIST_HEAD(&interface->geneve_port); netdev_rss_key_fill(rss_key, sizeof(rss_key)); memcpy(interface->rssrk, rss_key, sizeof(rss_key)); @@ -1950,9 +1953,18 @@ static int fm10k_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct fm10k_intfc *interface; int err; + if (pdev->error_state != pci_channel_io_normal) { + dev_err(&pdev->dev, + "PCI device still in an error state. Unable to load...\n"); + return -EIO; + } + err = pci_enable_device_mem(pdev); - if (err) + if (err) { + dev_err(&pdev->dev, + "PCI enable device failed: %d\n", err); return err; + } err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(48)); if (err) @@ -2275,7 +2287,7 @@ static pci_ers_result_t fm10k_io_slot_reset(struct pci_dev *pdev) { pci_ers_result_t result; - if (pci_enable_device_mem(pdev)) { + if (pci_reenable_device(pdev)) { dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n"); result = PCI_ERS_RESULT_DISCONNECT; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c index 682299dd0ce4..23fb319fd2a0 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pf.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pf.c @@ -867,10 +867,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, vf_q_idx = fm10k_vf_queue_index(hw, vf_idx); qmap_idx = qmap_stride * vf_idx; - /* MAP Tx queue back to 0 temporarily, and disable it */ - fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0); - fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0); - /* Determine correct default VLAN ID. The FM10K_VLAN_OVERRIDE bit is * used here to indicate to the VF that it will not have privilege to * write VLAN_TABLE. All policy is enforced on the PF but this allows @@ -886,9 +882,35 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, fm10k_tlv_attr_put_mac_vlan(msg, FM10K_MAC_VLAN_MSG_DEFAULT_MAC, vf_info->mac, vf_vid); - /* load onto outgoing mailbox, ignore any errors on enqueue */ - if (vf_info->mbx.ops.enqueue_tx) - vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg); + /* Configure Queue control register with new VLAN ID. The TXQCTL + * register is RO from the VF, so the PF must do this even in the + * case of notifying the VF of a new VID via the mailbox. + */ + txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & + FM10K_TXQCTL_VID_MASK; + txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | + FM10K_TXQCTL_VF | vf_idx; + + for (i = 0; i < queues_per_pool; i++) + fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl); + + /* try loading a message onto outgoing mailbox first */ + if (vf_info->mbx.ops.enqueue_tx) { + err = vf_info->mbx.ops.enqueue_tx(hw, &vf_info->mbx, msg); + if (err != FM10K_MBX_ERR_NO_MBX) + return err; + err = 0; + } + + /* If we aren't connected to a mailbox, this is most likely because + * the VF driver is not running. It should thus be safe to re-map + * queues and use the registers to pass the MAC address so that the VF + * driver gets correct information during its initialization. + */ + + /* MAP Tx queue back to 0 temporarily, and disable it */ + fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), 0); + fm10k_write_reg(hw, FM10K_TXDCTL(vf_q_idx), 0); /* verify ring has disabled before modifying base address registers */ txdctl = fm10k_read_reg(hw, FM10K_TXDCTL(vf_q_idx)); @@ -927,16 +949,6 @@ static s32 fm10k_iov_assign_default_mac_vlan_pf(struct fm10k_hw *hw, FM10K_TDLEN_ITR_SCALE_SHIFT); err_out: - /* configure Queue control register */ - txqctl = ((u32)vf_vid << FM10K_TXQCTL_VID_SHIFT) & - FM10K_TXQCTL_VID_MASK; - txqctl |= (vf_idx << FM10K_TXQCTL_TC_SHIFT) | - FM10K_TXQCTL_VF | vf_idx; - - /* assign VLAN ID */ - for (i = 0; i < queues_per_pool; i++) - fm10k_write_reg(hw, FM10K_TXQCTL(vf_q_idx + i), txqctl); - /* restore the queue back to VF ownership */ fm10k_write_reg(hw, FM10K_TQMAP(qmap_idx), vf_q_idx); return err; diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_type.h b/drivers/net/ethernet/intel/fm10k/fm10k_type.h index f4e75c498287..6bb16c13d9d6 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_type.h +++ b/drivers/net/ethernet/intel/fm10k/fm10k_type.h @@ -154,6 +154,7 @@ struct fm10k_hw; #define FM10K_DGLORTDEC_INNERRSS_ENABLE 0x08000000 #define FM10K_TUNNEL_CFG 0x0040 #define FM10K_TUNNEL_CFG_NVGRE_SHIFT 16 +#define FM10K_TUNNEL_CFG_GENEVE 0x0041 #define FM10K_SWPRI_MAP(_n) ((_n) + 0x0050) #define FM10K_SWPRI_MAX 16 #define FM10K_RSSRK(_n, _m) (((_n) * 0x10) + (_m) + 0x0800) diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 2a882916b4f6..2030d7c1dc94 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -65,76 +65,72 @@ #include "i40e_dcb.h" /* Useful i40e defaults */ -#define I40E_MAX_VEB 16 +#define I40E_MAX_VEB 16 -#define I40E_MAX_NUM_DESCRIPTORS 4096 -#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024) -#define I40E_DEFAULT_NUM_DESCRIPTORS 512 -#define I40E_REQ_DESCRIPTOR_MULTIPLE 32 -#define I40E_MIN_NUM_DESCRIPTORS 64 -#define I40E_MIN_MSIX 2 -#define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ -#define I40E_MIN_VSI_ALLOC 51 /* LAN, ATR, FCOE, 32 VF, 16 VMDQ */ +#define I40E_MAX_NUM_DESCRIPTORS 4096 +#define I40E_MAX_CSR_SPACE (4 * 1024 * 1024 - 64 * 1024) +#define I40E_DEFAULT_NUM_DESCRIPTORS 512 +#define I40E_REQ_DESCRIPTOR_MULTIPLE 32 +#define I40E_MIN_NUM_DESCRIPTORS 64 +#define I40E_MIN_MSIX 2 +#define I40E_DEFAULT_NUM_VMDQ_VSI 8 /* max 256 VSIs */ +#define I40E_MIN_VSI_ALLOC 83 /* LAN, ATR, FCOE, 64 VF */ /* max 16 qps */ #define i40e_default_queues_per_vmdq(pf) \ (((pf)->flags & I40E_FLAG_RSS_AQ_CAPABLE) ? 4 : 1) -#define I40E_DEFAULT_QUEUES_PER_VF 4 -#define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ +#define I40E_DEFAULT_QUEUES_PER_VF 4 +#define I40E_DEFAULT_QUEUES_PER_TC 1 /* should be a power of 2 */ #define i40e_pf_get_max_q_per_tc(pf) \ (((pf)->flags & I40E_FLAG_128_QP_RSS_CAPABLE) ? 128 : 64) -#define I40E_FDIR_RING 0 -#define I40E_FDIR_RING_COUNT 32 +#define I40E_FDIR_RING 0 +#define I40E_FDIR_RING_COUNT 32 #ifdef I40E_FCOE -#define I40E_DEFAULT_FCOE 8 /* default number of QPs for FCoE */ -#define I40E_MINIMUM_FCOE 1 /* minimum number of QPs for FCoE */ +#define I40E_DEFAULT_FCOE 8 /* default number of QPs for FCoE */ +#define I40E_MINIMUM_FCOE 1 /* minimum number of QPs for FCoE */ #endif /* I40E_FCOE */ -#define I40E_MAX_AQ_BUF_SIZE 4096 -#define I40E_AQ_LEN 256 -#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ -#define I40E_MAX_USER_PRIORITY 8 -#define I40E_DEFAULT_MSG_ENABLE 4 -#define I40E_QUEUE_WAIT_RETRY_LIMIT 10 -#define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) +#define I40E_MAX_AQ_BUF_SIZE 4096 +#define I40E_AQ_LEN 256 +#define I40E_AQ_WORK_LIMIT 66 /* max number of VFs + a little */ +#define I40E_MAX_USER_PRIORITY 8 +#define I40E_DEFAULT_MSG_ENABLE 4 +#define I40E_QUEUE_WAIT_RETRY_LIMIT 10 +#define I40E_INT_NAME_STR_LEN (IFNAMSIZ + 16) /* Ethtool Private Flags */ -#define I40E_PRIV_FLAGS_MFP_FLAG BIT(0) -#define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) +#define I40E_PRIV_FLAGS_MFP_FLAG BIT(0) +#define I40E_PRIV_FLAGS_LINKPOLL_FLAG BIT(1) #define I40E_PRIV_FLAGS_FD_ATR BIT(2) #define I40E_PRIV_FLAGS_VEB_STATS BIT(3) #define I40E_PRIV_FLAGS_HW_ATR_EVICT BIT(4) #define I40E_PRIV_FLAGS_TRUE_PROMISC_SUPPORT BIT(5) -#define I40E_NVM_VERSION_LO_SHIFT 0 -#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) -#define I40E_NVM_VERSION_HI_SHIFT 12 -#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT) -#define I40E_OEM_VER_BUILD_MASK 0xffff -#define I40E_OEM_VER_PATCH_MASK 0xff -#define I40E_OEM_VER_BUILD_SHIFT 8 -#define I40E_OEM_VER_SHIFT 24 +#define I40E_NVM_VERSION_LO_SHIFT 0 +#define I40E_NVM_VERSION_LO_MASK (0xff << I40E_NVM_VERSION_LO_SHIFT) +#define I40E_NVM_VERSION_HI_SHIFT 12 +#define I40E_NVM_VERSION_HI_MASK (0xf << I40E_NVM_VERSION_HI_SHIFT) +#define I40E_OEM_VER_BUILD_MASK 0xffff +#define I40E_OEM_VER_PATCH_MASK 0xff +#define I40E_OEM_VER_BUILD_SHIFT 8 +#define I40E_OEM_VER_SHIFT 24 #define I40E_PHY_DEBUG_ALL \ (I40E_AQ_PHY_DEBUG_DISABLE_LINK_FW | \ I40E_AQ_PHY_DEBUG_DISABLE_ALL_LINK_FW) /* The values in here are decimal coded as hex as is the case in the NVM map*/ -#define I40E_CURRENT_NVM_VERSION_HI 0x2 -#define I40E_CURRENT_NVM_VERSION_LO 0x40 +#define I40E_CURRENT_NVM_VERSION_HI 0x2 +#define I40E_CURRENT_NVM_VERSION_LO 0x40 -/* magic for getting defines into strings */ -#define STRINGIFY(foo) #foo -#define XSTRINGIFY(bar) STRINGIFY(bar) - -#define I40E_RX_DESC(R, i) \ +#define I40E_RX_DESC(R, i) \ (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])) -#define I40E_TX_DESC(R, i) \ +#define I40E_TX_DESC(R, i) \ (&(((struct i40e_tx_desc *)((R)->desc))[i])) -#define I40E_TX_CTXTDESC(R, i) \ +#define I40E_TX_CTXTDESC(R, i) \ (&(((struct i40e_tx_context_desc *)((R)->desc))[i])) -#define I40E_TX_FDIRDESC(R, i) \ +#define I40E_TX_FDIRDESC(R, i) \ (&(((struct i40e_filter_program_desc *)((R)->desc))[i])) /* default to trying for four seconds */ -#define I40E_TRY_LINK_TIMEOUT (4 * HZ) +#define I40E_TRY_LINK_TIMEOUT (4 * HZ) /** * i40e_is_mac_710 - Return true if MAC is X710/XL710 @@ -199,9 +195,9 @@ struct i40e_lump_tracking { #define I40E_FDIR_BUFFER_HEAD_ROOM 32 #define I40E_FDIR_BUFFER_HEAD_ROOM_FOR_ATR (I40E_FDIR_BUFFER_HEAD_ROOM * 4) -#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) -#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) -#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) +#define I40E_HKEY_ARRAY_SIZE ((I40E_PFQF_HKEY_MAX_INDEX + 1) * 4) +#define I40E_HLUT_ARRAY_SIZE ((I40E_PFQF_HLUT_MAX_INDEX + 1) * 4) +#define I40E_VF_HLUT_ARRAY_SIZE ((I40E_VFQF_HLUT1_MAX_INDEX + 1) * 4) enum i40e_fd_stat_idx { I40E_FD_STAT_ATR, @@ -387,8 +383,8 @@ struct i40e_pf { struct mutex switch_mutex; u16 lan_vsi; /* our default LAN VSI */ u16 lan_veb; /* initial relay, if exists */ -#define I40E_NO_VEB 0xffff -#define I40E_NO_VSI 0xffff +#define I40E_NO_VEB 0xffff +#define I40E_NO_VSI 0xffff u16 next_vsi; /* Next unallocated VSI - 0-based! */ struct i40e_vsi **vsi; struct i40e_veb *veb[I40E_MAX_VEB]; @@ -423,8 +419,8 @@ struct i40e_pf { */ u16 dcbx_cap; - u32 fcoe_hmc_filt_num; - u32 fcoe_hmc_cntx_num; + u32 fcoe_hmc_filt_num; + u32 fcoe_hmc_cntx_num; struct i40e_filter_control_settings filter_settings; struct ptp_clock *ptp_clock; @@ -470,10 +466,10 @@ struct i40e_mac_filter { struct i40e_veb { struct i40e_pf *pf; u16 idx; - u16 veb_idx; /* index of VEB parent */ + u16 veb_idx; /* index of VEB parent */ u16 seid; u16 uplink_seid; - u16 stats_idx; /* index of VEB parent */ + u16 stats_idx; /* index of VEB parent */ u8 enabled_tc; u16 bridge_mode; /* Bridge Mode (VEB/VEPA) */ u16 flags; @@ -534,12 +530,13 @@ struct i40e_vsi { u32 promisc_threshold; u16 work_limit; - u16 int_rate_limit; /* value in usecs */ + u16 int_rate_limit; /* value in usecs */ + + u16 rss_table_size; /* HW RSS table size */ + u16 rss_size; /* Allocated RSS queues */ + u8 *rss_hkey_user; /* User configured hash keys */ + u8 *rss_lut_user; /* User configured lookup table entries */ - u16 rss_table_size; /* HW RSS table size */ - u16 rss_size; /* Allocated RSS queues */ - u8 *rss_hkey_user; /* User configured hash keys */ - u8 *rss_lut_user; /* User configured lookup table entries */ u16 max_frame; u16 rx_buf_len; @@ -550,14 +547,14 @@ struct i40e_vsi { int base_vector; bool irqs_ready; - u16 seid; /* HW index of this VSI (absolute index) */ - u16 id; /* VSI number */ + u16 seid; /* HW index of this VSI (absolute index) */ + u16 id; /* VSI number */ u16 uplink_seid; - u16 base_queue; /* vsi's first queue in hw array */ - u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */ - u16 req_queue_pairs; /* User requested queue pairs */ - u16 num_queue_pairs; /* Used tx and rx pairs */ + u16 base_queue; /* vsi's first queue in hw array */ + u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */ + u16 req_queue_pairs; /* User requested queue pairs */ + u16 num_queue_pairs; /* Used tx and rx pairs */ u16 num_desc; enum i40e_vsi_type type; /* VSI type, e.g., LAN, FCoE, etc */ s16 vf_id; /* Virtual function ID for SRIOV VSIs */ @@ -576,19 +573,16 @@ struct i40e_vsi { /* TC BW limit max quanta within VSI */ u8 bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS]; - struct i40e_pf *back; /* Backreference to associated PF */ - u16 idx; /* index in pf->vsi[] */ - u16 veb_idx; /* index of VEB parent */ - struct kobject *kobj; /* sysfs object */ - bool current_isup; /* Sync 'link up' logging */ + struct i40e_pf *back; /* Backreference to associated PF */ + u16 idx; /* index in pf->vsi[] */ + u16 veb_idx; /* index of VEB parent */ + struct kobject *kobj; /* sysfs object */ + bool current_isup; /* Sync 'link up' logging */ void *priv; /* client driver data reference. */ /* VSI specific handlers */ irqreturn_t (*irq_handler)(int irq, void *data); - - /* current rxnfc data */ - struct ethtool_rxnfc rxnfc; /* current rss hash opts */ } ____cacheline_internodealigned_in_smp; struct i40e_netdev_priv { @@ -707,6 +701,8 @@ void i40e_do_reset_safe(struct i40e_pf *pf, u32 reset_flags); void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags); int i40e_config_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size); +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size); struct i40e_vsi *i40e_find_vsi_from_id(struct i40e_pf *pf, u16 id); void i40e_update_stats(struct i40e_vsi *vsi); void i40e_update_eth_stats(struct i40e_vsi *vsi); @@ -714,8 +710,6 @@ struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi); int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig); -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add); int i40e_add_del_fdir(struct i40e_vsi *vsi, struct i40e_fdir_filter *input, bool add); void i40e_fdir_check_and_reenable(struct i40e_pf *pf); diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h index 11cf1a5ebccf..67e396b2b347 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h @@ -204,6 +204,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -450,13 +453,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -471,17 +476,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; @@ -1582,6 +1589,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.c b/drivers/net/ethernet/intel/i40e/i40e_client.c index 618f18436618..250db0b244b7 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.c +++ b/drivers/net/ethernet/intel/i40e/i40e_client.c @@ -148,6 +148,11 @@ i40e_notify_client_of_vf_msg(struct i40e_vsi *vsi, u32 vf_id, u8 *msg, u16 len) "Cannot locate client instance virtual channel receive routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort virtchnl_receive\n"); + continue; + } cdev->client->ops->virtchnl_receive(&cdev->lan_info, cdev->client, vf_id, msg, len); @@ -181,6 +186,11 @@ void i40e_notify_client_of_l2_param_changes(struct i40e_vsi *vsi) "Cannot locate client instance l2_param_change routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&vsi->back->pdev->dev, "Client is not open, abort l2 param change\n"); + continue; + } cdev->lan_info.params = params; cdev->client->ops->l2_param_change(&cdev->lan_info, cdev->client, @@ -306,6 +316,11 @@ void i40e_notify_client_of_vf_reset(struct i40e_pf *pf, u32 vf_id) "Cannot locate client instance VF reset routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-reset\n"); + continue; + } cdev->client->ops->vf_reset(&cdev->lan_info, cdev->client, vf_id); } @@ -336,6 +351,11 @@ void i40e_notify_client_of_vf_enable(struct i40e_pf *pf, u32 num_vfs) "Cannot locate client instance VF enable routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-enable\n"); + continue; + } cdev->client->ops->vf_enable(&cdev->lan_info, cdev->client, num_vfs); } @@ -370,6 +390,11 @@ int i40e_vf_client_capable(struct i40e_pf *pf, u32 vf_id, "Cannot locate client instance VF capability routine\n"); continue; } + if (!test_bit(__I40E_CLIENT_INSTANCE_OPENED, + &cdev->state)) { + dev_dbg(&pf->pdev->dev, "Client is not open, abort vf-capable\n"); + continue; + } capable = cdev->client->ops->vf_capable(&cdev->lan_info, cdev->client, vf_id); @@ -559,6 +584,7 @@ void i40e_client_subtask(struct i40e_pf *pf) pf->hw.bus.device, pf->hw.bus.func); } + mutex_lock(&i40e_client_instance_mutex); /* Send an Open request to the client */ atomic_inc(&cdev->ref_cnt); if (client->ops && client->ops->open) @@ -568,10 +594,12 @@ void i40e_client_subtask(struct i40e_pf *pf) set_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state); } else { /* remove client instance */ + mutex_unlock(&i40e_client_instance_mutex); i40e_client_del_instance(pf, client); atomic_dec(&client->ref_cnt); continue; } + mutex_unlock(&i40e_client_instance_mutex); } mutex_unlock(&i40e_client_mutex); } @@ -654,7 +682,7 @@ int i40e_lan_del_device(struct i40e_pf *pf) static int i40e_client_release(struct i40e_client *client) { struct i40e_client_instance *cdev, *tmp; - struct i40e_pf *pf = NULL; + struct i40e_pf *pf; int ret = 0; LIST_HEAD(cdevs_tmp); @@ -664,12 +692,12 @@ static int i40e_client_release(struct i40e_client *client) if (strncmp(cdev->client->name, client->name, I40E_CLIENT_STR_LENGTH)) continue; + pf = (struct i40e_pf *)cdev->lan_info.pf; if (test_bit(__I40E_CLIENT_INSTANCE_OPENED, &cdev->state)) { if (atomic_read(&cdev->ref_cnt) > 0) { ret = I40E_ERR_NOT_READY; goto out; } - pf = (struct i40e_pf *)cdev->lan_info.pf; if (client->ops && client->ops->close) client->ops->close(&cdev->lan_info, client, false); @@ -681,8 +709,7 @@ static int i40e_client_release(struct i40e_client *client) client->name, pf->hw.pf_id); } /* delete the client instance from the list */ - list_del(&cdev->list); - list_add(&cdev->list, &cdevs_tmp); + list_move(&cdev->list, &cdevs_tmp); atomic_dec(&client->ref_cnt); dev_info(&pf->pdev->dev, "Deleted client instance of Client %s\n", client->name); @@ -811,7 +838,8 @@ static int i40e_client_setup_qvlist(struct i40e_info *ldev, wr32(hw, I40E_PFINT_AEQCTL, reg); } } - + /* Mitigate sync problems with iwarp VF driver */ + i40e_flush(hw); return 0; err: kfree(ldev->qvlist_info); @@ -1009,7 +1037,6 @@ int i40e_unregister_client(struct i40e_client *client) if (!i40e_client_is_registered(client)) { pr_info("i40e: Client %s has not been registered\n", client->name); - mutex_unlock(&i40e_client_mutex); ret = -ENODEV; goto out; } diff --git a/drivers/net/ethernet/intel/i40e/i40e_client.h b/drivers/net/ethernet/intel/i40e/i40e_client.h index a4601d97fb24..38a6c36a6a0e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_client.h +++ b/drivers/net/ethernet/intel/i40e/i40e_client.h @@ -36,9 +36,9 @@ #define I40E_CLIENT_VERSION_MINOR 01 #define I40E_CLIENT_VERSION_BUILD 00 #define I40E_CLIENT_VERSION_STR \ - XSTRINGIFY(I40E_CLIENT_VERSION_MAJOR) "." \ - XSTRINGIFY(I40E_CLIENT_VERSION_MINOR) "." \ - XSTRINGIFY(I40E_CLIENT_VERSION_BUILD) + __stringify(I40E_CLIENT_VERSION_MAJOR) "." \ + __stringify(I40E_CLIENT_VERSION_MINOR) "." \ + __stringify(I40E_CLIENT_VERSION_BUILD) struct i40e_client_version { u8 major; diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c index 05cf9a719bab..0c1875b5b16d 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c +++ b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c @@ -1054,6 +1054,7 @@ static ssize_t i40e_dbg_command_write(struct file *filp, struct i40e_dcbx_config *r_cfg = &pf->hw.remote_dcbx_config; int i, ret; + u32 switch_id; bw_data = kzalloc(sizeof( struct i40e_aqc_query_port_ets_config_resp), @@ -1063,8 +1064,12 @@ static ssize_t i40e_dbg_command_write(struct file *filp, goto command_write_done; } + vsi = pf->vsi[pf->lan_vsi]; + switch_id = + vsi->info.switch_id & I40E_AQ_VSI_SW_ID_MASK; + ret = i40e_aq_query_port_ets_config(&pf->hw, - pf->mac_seid, + switch_id, bw_data, NULL); if (ret) { dev_info(&pf->pdev->dev, @@ -1425,84 +1430,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, buff = NULL; kfree(desc); desc = NULL; - } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) || - (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) { - struct i40e_fdir_filter fd_data; - u16 packet_len, i, j = 0; - char *asc_packet; - u8 *raw_packet; - bool add = false; - int ret; - - if (!(pf->flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - if (strncmp(cmd_buf, "add", 3) == 0) - add = true; - - if (add && (pf->auto_disable_flags & I40E_FLAG_FD_SB_ENABLED)) - goto command_write_done; - - asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - if (!asc_packet) - goto command_write_done; - - raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_SIZE, - GFP_KERNEL); - - if (!raw_packet) { - kfree(asc_packet); - asc_packet = NULL; - goto command_write_done; - } - - cnt = sscanf(&cmd_buf[13], - "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %511s", - &fd_data.q_index, - &fd_data.flex_off, &fd_data.pctype, - &fd_data.dest_vsi, &fd_data.dest_ctl, - &fd_data.fd_status, &fd_data.cnt_index, - &fd_data.fd_id, &packet_len, asc_packet); - if (cnt != 10) { - dev_info(&pf->pdev->dev, - "program fd_filter: bad command string, cnt=%d\n", - cnt); - kfree(asc_packet); - asc_packet = NULL; - kfree(raw_packet); - goto command_write_done; - } - - /* fix packet length if user entered 0 */ - if (packet_len == 0) - packet_len = I40E_FDIR_MAX_RAW_PACKET_SIZE; - - /* make sure to check the max as well */ - packet_len = min_t(u16, - packet_len, I40E_FDIR_MAX_RAW_PACKET_SIZE); - - for (i = 0; i < packet_len; i++) { - cnt = sscanf(&asc_packet[j], "%2hhx ", &raw_packet[i]); - if (!cnt) - break; - j += 3; - } - dev_info(&pf->pdev->dev, "FD raw packet dump\n"); - print_hex_dump(KERN_INFO, "FD raw packet: ", - DUMP_PREFIX_OFFSET, 16, 1, - raw_packet, packet_len, true); - ret = i40e_program_fdir_filter(&fd_data, raw_packet, pf, add); - if (!ret) { - dev_info(&pf->pdev->dev, "Filter command send Status : Success\n"); - } else { - dev_info(&pf->pdev->dev, - "Filter command send failed %d\n", ret); - } - kfree(raw_packet); - raw_packet = NULL; - kfree(asc_packet); - asc_packet = NULL; } else if (strncmp(cmd_buf, "fd current cnt", 14) == 0) { dev_info(&pf->pdev->dev, "FD current total filter count for this interface: %d\n", i40e_get_current_fd_count(pf)); @@ -1727,8 +1654,6 @@ static ssize_t i40e_dbg_command_write(struct file *filp, dev_info(&pf->pdev->dev, " globr\n"); dev_info(&pf->pdev->dev, " send aq_cmd \n"); dev_info(&pf->pdev->dev, " send indirect aq_cmd \n"); - dev_info(&pf->pdev->dev, " add fd_filter \n"); - dev_info(&pf->pdev->dev, " rem fd_filter \n"); dev_info(&pf->pdev->dev, " fd current cnt"); dev_info(&pf->pdev->dev, " lldp start\n"); dev_info(&pf->pdev->dev, " lldp stop\n"); diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c index c912e041d102..92bc8846f1ba 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c @@ -1560,13 +1560,13 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } #endif for (i = 0; i < vsi->num_queue_pairs; i++) { - snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i); + snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "tx-%d.tx_bytes", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i); + snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_packets", i); p += ETH_GSTRING_LEN; - snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i); + snprintf(p, ETH_GSTRING_LEN, "rx-%d.rx_bytes", i); p += ETH_GSTRING_LEN; } if (vsi != pf->vsi[pf->lan_vsi] || pf->hw.partition_id != 1) @@ -1581,16 +1581,16 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_tx_packets", i); + "veb.tc_%d_tx_packets", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_tx_bytes", i); + "veb.tc_%d_tx_bytes", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_rx_packets", i); + "veb.tc_%d_rx_packets", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "veb.tc_%u_rx_bytes", i); + "veb.tc_%d_rx_bytes", i); p += ETH_GSTRING_LEN; } } @@ -1601,23 +1601,23 @@ static void i40e_get_strings(struct net_device *netdev, u32 stringset, } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.tx_priority_%u_xon", i); + "port.tx_priority_%d_xon", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.tx_priority_%u_xoff", i); + "port.tx_priority_%d_xoff", i); p += ETH_GSTRING_LEN; } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xon", i); + "port.rx_priority_%d_xon", i); p += ETH_GSTRING_LEN; snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xoff", i); + "port.rx_priority_%d_xoff", i); p += ETH_GSTRING_LEN; } for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) { snprintf(p, ETH_GSTRING_LEN, - "port.rx_priority_%u_xon_2_xoff", i); + "port.rx_priority_%d_xon_2_xoff", i); p += ETH_GSTRING_LEN; } /* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */ @@ -1970,11 +1970,22 @@ static int i40e_set_phys_id(struct net_device *netdev, * 125us (8000 interrupts per second) == ITR(62) */ +/** + * __i40e_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ static int __i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) { struct i40e_netdev_priv *np = netdev_priv(netdev); + struct i40e_ring *rx_ring, *tx_ring; struct i40e_vsi *vsi = np->vsi; ec->tx_max_coalesced_frames_irq = vsi->work_limit; @@ -1989,14 +2000,18 @@ static int __i40e_get_coalesce(struct net_device *netdev, return -EINVAL; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[queue]->rx_itr_setting)) + rx_ring = vsi->rx_rings[queue]; + tx_ring = vsi->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) ec->use_adaptive_rx_coalesce = 1; - if (ITR_IS_DYNAMIC(vsi->tx_rings[queue]->tx_itr_setting)) + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) ec->use_adaptive_tx_coalesce = 1; - ec->rx_coalesce_usecs = vsi->rx_rings[queue]->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_rings[queue]->tx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* we use the _usecs_high to store/set the interrupt rate limit * that the hardware supports, that almost but not quite @@ -2010,18 +2025,44 @@ static int __i40e_get_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_get_coalesce - get a netdev's coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * + * Gets the coalesce settings for a particular netdev. Note that if user has + * modified per-queue settings, this only guarantees to represent queue 0. See + * __i40e_get_coalesce for more details. + **/ static int i40e_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, -1); } +/** + * i40e_get_per_queue_coalesce - gets coalesce settings for particular queue + * @netdev: netdev structure + * @ec: ethtool's coalesce settings + * @queue: the particular queue to read + * + * Will read a specific queue's coalesce settings + **/ static int i40e_get_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { return __i40e_get_coalesce(netdev, ec, queue); } +/** + * i40e_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ + static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, struct ethtool_coalesce *ec, int queue) @@ -2060,6 +2101,14 @@ static void i40e_set_itr_per_queue(struct i40e_vsi *vsi, i40e_flush(hw); } +/** + * __i40e_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ static int __i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, int queue) @@ -2120,12 +2169,27 @@ static int __i40e_set_coalesce(struct net_device *netdev, return 0; } +/** + * i40e_set_coalesce - set coalesce settings for every queue on the netdev + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * + * This will set each queue to the same coalesce settings. + **/ static int i40e_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { return __i40e_set_coalesce(netdev, ec, -1); } +/** + * i40e_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to change + * + * Sets the specified queue's coalesce settings. + **/ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, struct ethtool_coalesce *ec) { @@ -2141,41 +2205,72 @@ static int i40e_set_per_queue_coalesce(struct net_device *netdev, u32 queue, **/ static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd) { + struct i40e_hw *hw = &pf->hw; + u8 flow_pctype = 0; + u64 i_set = 0; + cmd->data = 0; - if (pf->vsi[pf->lan_vsi]->rxnfc.data != 0) { - cmd->data = pf->vsi[pf->lan_vsi]->rxnfc.data; - cmd->flow_type = pf->vsi[pf->lan_vsi]->rxnfc.flow_type; - return 0; - } - /* Report default options for RSS on i40e */ switch (cmd->flow_type) { case TCP_V4_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + break; case UDP_V4_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through to add IP fields */ + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + break; + case TCP_V6_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + break; + case UDP_V6_FLOW: + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + break; case SCTP_V4_FLOW: case AH_ESP_V4_FLOW: case AH_V4_FLOW: case ESP_V4_FLOW: case IPV4_FLOW: - cmd->data |= RXH_IP_SRC | RXH_IP_DST; - break; - case TCP_V6_FLOW: - case UDP_V6_FLOW: - cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; - /* fall through to add IP fields */ case SCTP_V6_FLOW: case AH_ESP_V6_FLOW: case AH_V6_FLOW: case ESP_V6_FLOW: case IPV6_FLOW: + /* Default is src/dest for IP, no matter the L4 hashing */ cmd->data |= RXH_IP_SRC | RXH_IP_DST; break; default: return -EINVAL; } + /* Read flow based hash input set register */ + if (flow_pctype) { + i_set = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, + flow_pctype)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, + flow_pctype)) << 32); + } + + /* Process bits of hash input set */ + if (i_set) { + if (i_set & I40E_L4_SRC_MASK) + cmd->data |= RXH_L4_B_0_1; + if (i_set & I40E_L4_DST_MASK) + cmd->data |= RXH_L4_B_2_3; + + if (cmd->flow_type == TCP_V4_FLOW || + cmd->flow_type == UDP_V4_FLOW) { + if (i_set & I40E_L3_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_L3_DST_MASK) + cmd->data |= RXH_IP_DST; + } else if (cmd->flow_type == TCP_V6_FLOW || + cmd->flow_type == UDP_V6_FLOW) { + if (i_set & I40E_L3_V6_SRC_MASK) + cmd->data |= RXH_IP_SRC; + if (i_set & I40E_L3_V6_DST_MASK) + cmd->data |= RXH_IP_DST; + } + } + return 0; } @@ -2317,6 +2412,51 @@ static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd, return ret; } +/** + * i40e_get_rss_hash_bits - Read RSS Hash bits from register + * @nfc: pointer to user request + * @i_setc bits currently set + * + * Returns value of bits to be set per user request + **/ +static u64 i40e_get_rss_hash_bits(struct ethtool_rxnfc *nfc, u64 i_setc) +{ + u64 i_set = i_setc; + u64 src_l3 = 0, dst_l3 = 0; + + if (nfc->data & RXH_L4_B_0_1) + i_set |= I40E_L4_SRC_MASK; + else + i_set &= ~I40E_L4_SRC_MASK; + if (nfc->data & RXH_L4_B_2_3) + i_set |= I40E_L4_DST_MASK; + else + i_set &= ~I40E_L4_DST_MASK; + + if (nfc->flow_type == TCP_V6_FLOW || nfc->flow_type == UDP_V6_FLOW) { + src_l3 = I40E_L3_V6_SRC_MASK; + dst_l3 = I40E_L3_V6_DST_MASK; + } else if (nfc->flow_type == TCP_V4_FLOW || + nfc->flow_type == UDP_V4_FLOW) { + src_l3 = I40E_L3_SRC_MASK; + dst_l3 = I40E_L3_DST_MASK; + } else { + /* Any other flow type are not supported here */ + return i_set; + } + + if (nfc->data & RXH_IP_SRC) + i_set |= src_l3; + else + i_set &= ~src_l3; + if (nfc->data & RXH_IP_DST) + i_set |= dst_l3; + else + i_set &= ~dst_l3; + + return i_set; +} + /** * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash * @pf: pointer to the physical function struct @@ -2329,6 +2469,8 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) struct i40e_hw *hw = &pf->hw; u64 hena = (u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(0)) | ((u64)i40e_read_rx_ctl(hw, I40E_PFQF_HENA(1)) << 32); + u8 flow_pctype = 0; + u64 i_set, i_setc; /* RSS does not support anything other than hashing * to queues on src and dst IPs and ports @@ -2337,75 +2479,39 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) RXH_L4_B_0_1 | RXH_L4_B_2_3)) return -EINVAL; - /* We need at least the IP SRC and DEST fields for hashing */ - if (!(nfc->data & RXH_IP_SRC) || - !(nfc->data & RXH_IP_DST)) - return -EINVAL; - switch (nfc->flow_type) { case TCP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP); - break; - default: - return -EINVAL; - } + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); break; case TCP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); - - hena |= BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP); - break; - default: - return -EINVAL; - } + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_TCP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN_NO_ACK); + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN_NO_ACK); break; case UDP_V4_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV4_UDP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP); - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV4_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4)); - break; - default: - return -EINVAL; - } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV4); break; case UDP_V6_FLOW: - switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { - case 0: - return -EINVAL; - case (RXH_L4_B_0_1 | RXH_L4_B_2_3): - if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) - hena |= - BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); + flow_pctype = I40E_FILTER_PCTYPE_NONF_IPV6_UDP; + if (pf->flags & I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE) + hena |= + BIT_ULL(I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) | + BIT_ULL(I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP); - hena |= (BIT_ULL(I40E_FILTER_PCTYPE_NONF_IPV6_UDP) | - BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6)); - break; - default: - return -EINVAL; - } + hena |= BIT_ULL(I40E_FILTER_PCTYPE_FRAG_IPV6); break; case AH_ESP_V4_FLOW: case AH_V4_FLOW: @@ -2437,13 +2543,23 @@ static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc) return -EINVAL; } + if (flow_pctype) { + i_setc = (u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, + flow_pctype)) | + ((u64)i40e_read_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, + flow_pctype)) << 32); + i_set = i40e_get_rss_hash_bits(nfc, i_setc); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(0, flow_pctype), + (u32)i_set); + i40e_write_rx_ctl(hw, I40E_GLQF_HASH_INSET(1, flow_pctype), + (u32)(i_set >> 32)); + hena |= BIT_ULL(flow_pctype); + } + i40e_write_rx_ctl(hw, I40E_PFQF_HENA(0), (u32)hena); i40e_write_rx_ctl(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32)); i40e_flush(hw); - /* Save setting for future output/update */ - pf->vsi[pf->lan_vsi]->rxnfc = *nfc; - return 0; } @@ -2744,11 +2860,15 @@ static void i40e_get_channels(struct net_device *dev, static int i40e_set_channels(struct net_device *dev, struct ethtool_channels *ch) { + const u8 drop = I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET; struct i40e_netdev_priv *np = netdev_priv(dev); unsigned int count = ch->combined_count; struct i40e_vsi *vsi = np->vsi; struct i40e_pf *pf = vsi->back; + struct i40e_fdir_filter *rule; + struct hlist_node *node2; int new_count; + int err = 0; /* We do not support setting channels for any other VSI at present */ if (vsi->type != I40E_VSI_MAIN) @@ -2766,6 +2886,26 @@ static int i40e_set_channels(struct net_device *dev, if (count > i40e_max_channels(vsi)) return -EINVAL; + /* verify that the number of channels does not invalidate any current + * flow director rules + */ + hlist_for_each_entry_safe(rule, node2, + &pf->fdir_filter_list, fdir_node) { + if (rule->dest_ctl != drop && count <= rule->q_index) { + dev_warn(&pf->pdev->dev, + "Existing user defined filter %d assigns flow to queue %d\n", + rule->fd_id, rule->q_index); + err = -EINVAL; + } + } + + if (err) { + dev_err(&pf->pdev->dev, + "Existing filter rules must be deleted to reduce combined channel count to %d\n", + count); + return err; + } + /* update feature limits from largest to smallest supported values */ /* TODO: Flow director limit, DCB etc */ @@ -2846,15 +2986,13 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, { struct i40e_netdev_priv *np = netdev_priv(netdev); struct i40e_vsi *vsi = np->vsi; + struct i40e_pf *pf = vsi->back; u8 *seed = NULL; u16 i; if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (!indir) - return 0; - if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = kzalloc(I40E_HKEY_ARRAY_SIZE, @@ -2872,8 +3010,12 @@ static int i40e_set_rxfh(struct net_device *netdev, const u32 *indir, } /* Each 32 bits pointed by 'indir' is stored with a lut entry */ - for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) - vsi->rss_lut_user[i] = (u8)(indir[i]); + if (indir) + for (i = 0; i < I40E_HLUT_ARRAY_SIZE; i++) + vsi->rss_lut_user[i] = (u8)(indir[i]); + else + i40e_fill_rss_lut(pf, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE, + vsi->rss_size); return i40e_config_rss(vsi, seed, vsi->rss_lut_user, I40E_HLUT_ARRAY_SIZE); @@ -2943,6 +3085,9 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags) } else { pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; + + /* flush current ATR settings */ + set_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); } if ((flags & I40E_PRIV_FLAGS_VEB_STATS) && diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index d0b3a1bb82ca..ac1faee2a5b8 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -41,7 +41,7 @@ static const char i40e_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 11 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) DRV_KERN @@ -57,8 +57,6 @@ static int i40e_setup_pf_switch(struct i40e_pf *pf, bool reinit); static int i40e_setup_misc_vector(struct i40e_pf *pf); static void i40e_determine_queue_usage(struct i40e_pf *pf); static int i40e_setup_pf_filter_control(struct i40e_pf *pf); -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size); static void i40e_fdir_sb_setup(struct i40e_pf *pf); static int i40e_veb_get_bw_info(struct i40e_veb *veb); @@ -527,6 +525,7 @@ void i40e_pf_reset_stats(struct i40e_pf *pf) pf->veb[i]->stat_offsets_loaded = false; } } + pf->hw_csum_rx_error = 0; } /** @@ -1316,7 +1315,7 @@ static void i40e_rm_default_mac_filter(struct i40e_vsi *vsi, u8 *macaddr) element.vlan_tag = 0; /* ...and some firmware does it this way. */ element.flags = I40E_AQC_MACVLAN_DEL_PERFECT_MATCH | - I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; i40e_aq_remove_macvlan(&pf->hw, vsi->seid, &element, 1, NULL); } @@ -1909,7 +1908,7 @@ int i40e_sync_vsi_filters(struct i40e_vsi *vsi) ether_addr_copy(del_list[num_del].mac_addr, f->macaddr); if (f->vlan == I40E_VLAN_ANY) { del_list[num_del].vlan_tag = 0; - cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN; + cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN; } else { del_list[num_del].vlan_tag = cpu_to_le16((u16)(f->vlan)); @@ -4616,7 +4615,7 @@ static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg) static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) { struct i40e_hw *hw = &pf->hw; - u8 i, enabled_tc; + u8 i, enabled_tc = 1; u8 num_tc = 0; struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config; @@ -4634,8 +4633,6 @@ static u8 i40e_pf_get_num_tc(struct i40e_pf *pf) else return 1; /* Only TC0 */ - /* At least have TC0 */ - enabled_tc = (enabled_tc ? enabled_tc : 0x1); for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) { if (enabled_tc & BIT(i)) num_tc++; @@ -5245,7 +5242,7 @@ static int i40e_up_complete(struct i40e_vsi *vsi) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = 0; if (pf->fd_tcp_rule > 0) { - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 exist\n"); pf->fd_tcp_rule = 0; @@ -5942,13 +5939,17 @@ void i40e_fdir_check_and_reenable(struct i40e_pf *pf) dev_info(&pf->pdev->dev, "FD Sideband/ntuple is being enabled since we have space in the table now\n"); } } - /* Wait for some more space to be available to turn on ATR */ + + /* Wait for some more space to be available to turn on ATR. We also + * must check that no existing ntuple rules for TCP are in effect + */ if (fcnt_prog < (fcnt_avail - I40E_FDIR_BUFFER_HEAD_ROOM * 2)) { if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED) && + (pf->fd_tcp_rule == 0)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table now\n"); + dev_info(&pf->pdev->dev, "ATR is being enabled since we have space in the table and there are no conflicting ntuple rules\n"); } } @@ -5979,9 +5980,6 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) int fd_room; int reg; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (!time_after(jiffies, pf->fd_flush_timestamp + (I40E_MIN_FD_FLUSH_INTERVAL * HZ))) return; @@ -6001,7 +5999,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) } pf->fd_flush_timestamp = jiffies; - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; /* flush all filters */ wr32(&pf->hw, I40E_PFQF_CTL_1, I40E_PFQF_CTL_1_CLEARFDTABLE_MASK); @@ -6021,7 +6019,7 @@ static void i40e_fdir_flush_and_replay(struct i40e_pf *pf) /* replay sideband filters */ i40e_fdir_filter_restore(pf->vsi[pf->lan_vsi]); if (!disable_atr) - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; clear_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state); if (I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "FD Filter table flushed and FD-SB replayed.\n"); @@ -6055,9 +6053,6 @@ static void i40e_fdir_reinit_subtask(struct i40e_pf *pf) if (test_bit(__I40E_DOWN, &pf->state)) return; - if (!(pf->flags & (I40E_FLAG_FD_SB_ENABLED | I40E_FLAG_FD_ATR_ENABLED))) - return; - if (test_bit(__I40E_FD_FLUSH_REQUESTED, &pf->state)) i40e_fdir_flush_and_replay(pf); @@ -7157,9 +7152,9 @@ static void i40e_sync_udp_filters_subtask(struct i40e_pf *pf) pf->pending_udp_bitmap &= ~BIT_ULL(i); port = pf->udp_ports[i].index; if (port) - ret = i40e_aq_add_udp_tunnel(hw, ntohs(port), - pf->udp_ports[i].type, - NULL, NULL); + ret = i40e_aq_add_udp_tunnel(hw, port, + pf->udp_ports[i].type, + NULL, NULL); else ret = i40e_aq_del_udp_tunnel(hw, i, NULL); @@ -7646,7 +7641,6 @@ static int i40e_init_msix(struct i40e_pf *pf) vectors_left--; } else { pf->num_fdsb_msix = 0; - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; } } @@ -7666,6 +7660,8 @@ static int i40e_init_msix(struct i40e_pf *pf) #endif /* can we reserve enough for iWARP? */ if (pf->flags & I40E_FLAG_IWARP_ENABLED) { + iwarp_requested = pf->num_iwarp_msix; + if (!vectors_left) pf->num_iwarp_msix = 0; else if (vectors_left < pf->num_iwarp_msix) @@ -7679,18 +7675,23 @@ static int i40e_init_msix(struct i40e_pf *pf) int vmdq_vecs_wanted = pf->num_vmdq_vsis * pf->num_vmdq_qps; int vmdq_vecs = min_t(int, vectors_left, vmdq_vecs_wanted); - /* if we're short on vectors for what's desired, we limit - * the queues per vmdq. If this is still more than are - * available, the user will need to change the number of - * queues/vectors used by the PF later with the ethtool - * channels command - */ - if (vmdq_vecs < vmdq_vecs_wanted) - pf->num_vmdq_qps = 1; - pf->num_vmdq_msix = pf->num_vmdq_qps; + if (!vectors_left) { + pf->num_vmdq_msix = 0; + pf->num_vmdq_qps = 0; + } else { + /* if we're short on vectors for what's desired, we limit + * the queues per vmdq. If this is still more than are + * available, the user will need to change the number of + * queues/vectors used by the PF later with the ethtool + * channels command + */ + if (vmdq_vecs < vmdq_vecs_wanted) + pf->num_vmdq_qps = 1; + pf->num_vmdq_msix = pf->num_vmdq_qps; - v_budget += vmdq_vecs; - vectors_left -= vmdq_vecs; + v_budget += vmdq_vecs; + vectors_left -= vmdq_vecs; + } } pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry), @@ -7702,21 +7703,6 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->msix_entries[i].entry = i; v_actual = i40e_reserve_msix_vectors(pf, v_budget); - if (v_actual != v_budget) { - /* If we have limited resources, we will start with no vectors - * for the special features and then allocate vectors to some - * of these features based on the policy and at the end disable - * the features that did not get any vectors. - */ - iwarp_requested = pf->num_iwarp_msix; - pf->num_iwarp_msix = 0; -#ifdef I40E_FCOE - pf->num_fcoe_qps = 0; - pf->num_fcoe_msix = 0; -#endif - pf->num_vmdq_msix = 0; - } - if (v_actual < I40E_MIN_MSIX) { pf->flags &= ~I40E_FLAG_MSIX_ENABLED; kfree(pf->msix_entries); @@ -7730,9 +7716,16 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_lan_qps = 1; pf->num_lan_msix = 1; - } else if (v_actual != v_budget) { + } else if (!vectors_left) { + /* If we have limited resources, we will start with no vectors + * for the special features and then allocate vectors to some + * of these features based on the policy and at the end disable + * the features that did not get any vectors. + */ int vec; + dev_info(&pf->pdev->dev, + "MSI-X vector limit reached, attempting to redistribute vectors\n"); /* reserve the misc vector */ vec = v_actual - 1; @@ -7740,7 +7733,10 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_msix = 1; /* force VMDqs to only one vector */ pf->num_vmdq_vsis = 1; pf->num_vmdq_qps = 1; - pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; +#ifdef I40E_FCOE + pf->num_fcoe_qps = 0; + pf->num_fcoe_msix = 0; +#endif /* partition out the remaining vectors */ switch (vec) { @@ -7772,9 +7768,14 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->num_vmdq_vsis = min_t(int, (vec / 2), I40E_DEFAULT_NUM_VMDQ_VSI); } + if (pf->flags & I40E_FLAG_FD_SB_ENABLED) { + pf->num_fdsb_msix = 1; + vec--; + } pf->num_lan_msix = min_t(int, (vec - (pf->num_iwarp_msix + pf->num_vmdq_vsis)), pf->num_lan_msix); + pf->num_lan_qps = pf->num_lan_msix; #ifdef I40E_FCOE /* give one vector to FCoE */ if (pf->flags & I40E_FLAG_FCOE_ENABLED) { @@ -7786,6 +7787,11 @@ static int i40e_init_msix(struct i40e_pf *pf) } } + if ((pf->flags & I40E_FLAG_FD_SB_ENABLED) && + (pf->num_fdsb_msix == 0)) { + dev_info(&pf->pdev->dev, "Sideband Flowdir disabled, not enough MSI-X vectors\n"); + pf->flags &= ~I40E_FLAG_FD_SB_ENABLED; + } if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) && (pf->num_vmdq_msix == 0)) { dev_info(&pf->pdev->dev, "VMDq disabled, not enough MSI-X vectors\n"); @@ -7804,6 +7810,13 @@ static int i40e_init_msix(struct i40e_pf *pf) pf->flags &= ~I40E_FLAG_FCOE_ENABLED; } #endif + i40e_debug(&pf->hw, I40E_DEBUG_INIT, + "MSI-X vector distribution: PF %d, VMDq %d, FDSB %d, iWARP %d\n", + pf->num_lan_msix, + pf->num_vmdq_msix * pf->num_vmdq_vsis, + pf->num_fdsb_msix, + pf->num_iwarp_msix); + return v_actual; } @@ -7990,72 +8003,34 @@ static int i40e_setup_misc_vector(struct i40e_pf *pf) static int i40e_config_rss_aq(struct i40e_vsi *vsi, const u8 *seed, u8 *lut, u16 lut_size) { - struct i40e_aqc_get_set_rss_key_data rss_key; struct i40e_pf *pf = vsi->back; struct i40e_hw *hw = &pf->hw; - bool pf_lut = false; - u8 *rss_lut; - int ret, i; + int ret = 0; - memcpy(&rss_key, seed, sizeof(rss_key)); - - rss_lut = kzalloc(pf->rss_table_size, GFP_KERNEL); - if (!rss_lut) - return -ENOMEM; - - /* Populate the LUT with max no. of queues in round robin fashion */ - for (i = 0; i < vsi->rss_table_size; i++) - rss_lut[i] = i % vsi->rss_size; - - ret = i40e_aq_set_rss_key(hw, vsi->id, &rss_key); - if (ret) { - dev_info(&pf->pdev->dev, - "Cannot set RSS key, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - goto config_rss_aq_out; + if (seed) { + struct i40e_aqc_get_set_rss_key_data *seed_dw = + (struct i40e_aqc_get_set_rss_key_data *)seed; + ret = i40e_aq_set_rss_key(hw, vsi->id, seed_dw); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS key, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } } + if (lut) { + bool pf_lut = vsi->type == I40E_VSI_MAIN ? true : false; - if (vsi->type == I40E_VSI_MAIN) - pf_lut = true; - - ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, rss_lut, - vsi->rss_table_size); - if (ret) - dev_info(&pf->pdev->dev, - "Cannot set RSS lut, err %s aq_err %s\n", - i40e_stat_str(&pf->hw, ret), - i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status)); - -config_rss_aq_out: - kfree(rss_lut); - return ret; -} - -/** - * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used - * @vsi: VSI structure - **/ -static int i40e_vsi_config_rss(struct i40e_vsi *vsi) -{ - u8 seed[I40E_HKEY_ARRAY_SIZE]; - struct i40e_pf *pf = vsi->back; - u8 *lut; - int ret; - - if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) - return 0; - - lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); - if (!lut) - return -ENOMEM; - - i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); - netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); - vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); - ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); - kfree(lut); - + ret = i40e_aq_set_rss_lut(hw, vsi->id, pf_lut, lut, lut_size); + if (ret) { + dev_info(&pf->pdev->dev, + "Cannot set RSS lut, err %s aq_err %s\n", + i40e_stat_str(hw, ret), + i40e_aq_str(hw, hw->aq.asq_last_status)); + return ret; + } + } return ret; } @@ -8105,6 +8080,46 @@ static int i40e_get_rss_aq(struct i40e_vsi *vsi, const u8 *seed, return ret; } +/** + * i40e_vsi_config_rss - Prepare for VSI(VMDq) RSS if used + * @vsi: VSI structure + **/ +static int i40e_vsi_config_rss(struct i40e_vsi *vsi) +{ + u8 seed[I40E_HKEY_ARRAY_SIZE]; + struct i40e_pf *pf = vsi->back; + u8 *lut; + int ret; + + if (!(pf->flags & I40E_FLAG_RSS_AQ_CAPABLE)) + return 0; + + if (!vsi->rss_size) + vsi->rss_size = min_t(int, pf->alloc_rss_size, + vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; + + lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); + if (!lut) + return -ENOMEM; + /* Use the user configured hash keys and lookup table if there is one, + * otherwise use default + */ + if (vsi->rss_lut_user) + memcpy(lut, vsi->rss_lut_user, vsi->rss_table_size); + else + i40e_fill_rss_lut(pf, lut, vsi->rss_table_size, vsi->rss_size); + if (vsi->rss_hkey_user) + memcpy(seed, vsi->rss_hkey_user, I40E_HKEY_ARRAY_SIZE); + else + netdev_rss_key_fill((void *)seed, I40E_HKEY_ARRAY_SIZE); + ret = i40e_config_rss_aq(vsi, seed, lut, vsi->rss_table_size); + kfree(lut); + + return ret; +} + /** * i40e_config_rss_reg - Configure RSS keys and lut by writing registers * @vsi: Pointer to vsi structure @@ -8243,8 +8258,8 @@ int i40e_get_rss(struct i40e_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size) * @rss_table_size: Lookup table size * @rss_size: Range of queue number for hashing */ -static void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, - u16 rss_table_size, u16 rss_size) +void i40e_fill_rss_lut(struct i40e_pf *pf, u8 *lut, + u16 rss_table_size, u16 rss_size) { u16 i; @@ -8285,6 +8300,8 @@ static int i40e_pf_config_rss(struct i40e_pf *pf) if (!vsi->rss_size) vsi->rss_size = min_t(int, pf->alloc_rss_size, vsi->num_queue_pairs); + if (!vsi->rss_size) + return -EINVAL; lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) @@ -8609,7 +8626,6 @@ static int i40e_sw_init(struct i40e_pf *pf) I40E_FLAG_WB_ON_ITR_CAPABLE | I40E_FLAG_MULTIPLE_TCP_UDP_RSS_PCTYPE | I40E_FLAG_NO_PCI_LINK_CHECK | - I40E_FLAG_100M_SGMII_CAPABLE | I40E_FLAG_USE_SET_LLDP_MIB | I40E_FLAG_GENEVE_OFFLOAD_CAPABLE; } else if ((pf->hw.aq.api_maj_ver > 1) || @@ -8684,17 +8700,39 @@ bool i40e_set_ntuple(struct i40e_pf *pf, netdev_features_t features) /* reset fd counters */ pf->fd_add_err = pf->fd_atr_cnt = pf->fd_tcp_rule = 0; pf->fdir_pf_active_filters = 0; - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); /* if ATR was auto disabled it can be re-enabled. */ if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && - (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) + (pf->auto_disable_flags & I40E_FLAG_FD_ATR_ENABLED)) { pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; + if (I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "ATR re-enabled.\n"); + } } return need_reset; } +/** + * i40e_clear_rss_lut - clear the rx hash lookup table + * @vsi: the VSI being configured + **/ +static void i40e_clear_rss_lut(struct i40e_vsi *vsi) +{ + struct i40e_pf *pf = vsi->back; + struct i40e_hw *hw = &pf->hw; + u16 vf_id = vsi->vf_id; + u8 i; + + if (vsi->type == I40E_VSI_MAIN) { + for (i = 0; i <= I40E_PFQF_HLUT_MAX_INDEX; i++) + wr32(hw, I40E_PFQF_HLUT(i), 0); + } else if (vsi->type == I40E_VSI_SRIOV) { + for (i = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) + i40e_write_rx_ctl(hw, I40E_VFQF_HLUT1(i, vf_id), 0); + } else { + dev_err(&pf->pdev->dev, "Cannot set RSS LUT - invalid VSI type\n"); + } +} + /** * i40e_set_features - set the netdev feature flags * @netdev: ptr to the netdev being adjusted @@ -8708,6 +8746,12 @@ static int i40e_set_features(struct net_device *netdev, struct i40e_pf *pf = vsi->back; bool need_reset; + if (features & NETIF_F_RXHASH && !(netdev->features & NETIF_F_RXHASH)) + i40e_pf_config_rss(pf); + else if (!(features & NETIF_F_RXHASH) && + netdev->features & NETIF_F_RXHASH) + i40e_clear_rss_lut(vsi); + if (features & NETIF_F_HW_VLAN_CTAG_RX) i40e_vlan_stripping_enable(vsi); else @@ -11309,11 +11353,7 @@ static void i40e_remove(struct pci_dev *pdev) } /* shutdown the adminq */ - ret_code = i40e_shutdown_adminq(hw); - if (ret_code) - dev_warn(&pdev->dev, - "Failed to destroy the Admin Queue resources: %d\n", - ret_code); + i40e_shutdown_adminq(hw); /* destroy the locks only once, here */ mutex_destroy(&hw->aq.arq_mutex); @@ -11360,6 +11400,12 @@ static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s: error %d\n", __func__, error); + if (!pf) { + dev_info(&pdev->dev, + "Cannot recover - error happened during device probe\n"); + return PCI_ERS_RESULT_DISCONNECT; + } + /* shutdown all operations */ if (!test_bit(__I40E_SUSPENDED, &pf->state)) { rtnl_lock(); @@ -11582,7 +11628,8 @@ static int __init i40e_init_module(void) * it can't be any worse than using the system workqueue which * was already single threaded */ - i40e_wq = create_singlethread_workqueue(i40e_driver_name); + i40e_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, + i40e_driver_name); if (!i40e_wq) { pr_err("%s: Failed to create workqueue\n", i40e_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40e/i40e_ptp.c b/drivers/net/ethernet/intel/i40e/i40e_ptp.c index ed39cbad24bd..f1feceab758a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_ptp.c +++ b/drivers/net/ethernet/intel/i40e/i40e_ptp.c @@ -669,7 +669,7 @@ void i40e_ptp_init(struct i40e_pf *pf) pf->ptp_clock = NULL; dev_err(&pf->pdev->dev, "%s: ptp_clock_register failed\n", __func__); - } else { + } else if (pf->ptp_clock) { struct timespec64 ts; u32 regval; diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index df7ecc9578c9..6287bf63c43c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -40,6 +40,69 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, } #define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS) +/** + * i40e_fdir - Generate a Flow Director descriptor based on fdata + * @tx_ring: Tx ring to send buffer on + * @fdata: Flow director filter data + * @add: Indicate if we are adding a rule or deleting one + * + **/ +static void i40e_fdir(struct i40e_ring *tx_ring, + struct i40e_fdir_filter *fdata, bool add) +{ + struct i40e_filter_program_desc *fdir_desc; + struct i40e_pf *pf = tx_ring->vsi->back; + u32 flex_ptype, dtype_cmd; + u16 i; + + /* grab the next descriptor */ + i = tx_ring->next_to_use; + fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + flex_ptype = I40E_TXD_FLTR_QW0_QINDEX_MASK & + (fdata->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_FLEXOFF_MASK & + (fdata->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT); + + flex_ptype |= I40E_TXD_FLTR_QW0_PCTYPE_MASK & + (fdata->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT); + + /* Use LAN VSI Id if not programmed by user */ + flex_ptype |= I40E_TXD_FLTR_QW0_DEST_VSI_MASK & + ((u32)(fdata->dest_vsi ? : pf->vsi[pf->lan_vsi]->id) << + I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT); + + dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG; + + dtype_cmd |= add ? + I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT : + I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << + I40E_TXD_FLTR_QW1_PCMD_SHIFT; + + dtype_cmd |= I40E_TXD_FLTR_QW1_DEST_MASK & + (fdata->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT); + + dtype_cmd |= I40E_TXD_FLTR_QW1_FD_STATUS_MASK & + (fdata->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT); + + if (fdata->cnt_index) { + dtype_cmd |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; + dtype_cmd |= I40E_TXD_FLTR_QW1_CNTINDEX_MASK & + ((u32)fdata->cnt_index << + I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT); + } + + fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype); + fdir_desc->rsvd = cpu_to_le32(0); + fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd); + fdir_desc->fd_id = cpu_to_le32(fdata->fd_id); +} + #define I40E_FD_CLEAN_DELAY 10 /** * i40e_program_fdir_filter - Program a Flow Director filter @@ -48,14 +111,13 @@ static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size, * @pf: The PF pointer * @add: True for add/update, False for remove **/ -int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, - struct i40e_pf *pf, bool add) +static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, + u8 *raw_packet, struct i40e_pf *pf, + bool add) { - struct i40e_filter_program_desc *fdir_desc; struct i40e_tx_buffer *tx_buf, *first; struct i40e_tx_desc *tx_desc; struct i40e_ring *tx_ring; - unsigned int fpt, dcc; struct i40e_vsi *vsi; struct device *dev; dma_addr_t dma; @@ -92,56 +154,8 @@ int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data, u8 *raw_packet, /* grab the next descriptor */ i = tx_ring->next_to_use; - fdir_desc = I40E_TX_FDIRDESC(tx_ring, i); first = &tx_ring->tx_bi[i]; - memset(first, 0, sizeof(struct i40e_tx_buffer)); - - tx_ring->next_to_use = ((i + 1) < tx_ring->count) ? i + 1 : 0; - - fpt = (fdir_data->q_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) & - I40E_TXD_FLTR_QW0_QINDEX_MASK; - - fpt |= (fdir_data->flex_off << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT) & - I40E_TXD_FLTR_QW0_FLEXOFF_MASK; - - fpt |= (fdir_data->pctype << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) & - I40E_TXD_FLTR_QW0_PCTYPE_MASK; - - /* Use LAN VSI Id if not programmed by user */ - if (fdir_data->dest_vsi == 0) - fpt |= (pf->vsi[pf->lan_vsi]->id) << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT; - else - fpt |= ((u32)fdir_data->dest_vsi << - I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT) & - I40E_TXD_FLTR_QW0_DEST_VSI_MASK; - - dcc = I40E_TX_DESC_DTYPE_FILTER_PROG; - - if (add) - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - else - dcc |= I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE << - I40E_TXD_FLTR_QW1_PCMD_SHIFT; - - dcc |= (fdir_data->dest_ctl << I40E_TXD_FLTR_QW1_DEST_SHIFT) & - I40E_TXD_FLTR_QW1_DEST_MASK; - - dcc |= (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT) & - I40E_TXD_FLTR_QW1_FD_STATUS_MASK; - - if (fdir_data->cnt_index != 0) { - dcc |= I40E_TXD_FLTR_QW1_CNT_ENA_MASK; - dcc |= ((u32)fdir_data->cnt_index << - I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT) & - I40E_TXD_FLTR_QW1_CNTINDEX_MASK; - } - - fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(fpt); - fdir_desc->rsvd = cpu_to_le32(0); - fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dcc); - fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id); + i40e_fdir(tx_ring, fdir_data, add); /* Now program a dummy descriptor */ i = tx_ring->next_to_use; @@ -282,18 +296,18 @@ static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi, if (add) { pf->fd_tcp_rule++; - if (pf->flags & I40E_FLAG_FD_ATR_ENABLED) { - if (I40E_DEBUG_FD & pf->hw.debug_mask) - dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); - pf->flags &= ~I40E_FLAG_FD_ATR_ENABLED; - } + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) + dev_info(&pf->pdev->dev, "Forcing ATR off, sideband rules for TCP/IPv4 flow being applied\n"); + pf->auto_disable_flags |= I40E_FLAG_FD_ATR_ENABLED; } else { pf->fd_tcp_rule = (pf->fd_tcp_rule > 0) ? (pf->fd_tcp_rule - 1) : 0; if (pf->fd_tcp_rule == 0) { - pf->flags |= I40E_FLAG_FD_ATR_ENABLED; - if (I40E_DEBUG_FD & pf->hw.debug_mask) + if ((pf->flags & I40E_FLAG_FD_ATR_ENABLED) && + I40E_DEBUG_FD & pf->hw.debug_mask) dev_info(&pf->pdev->dev, "ATR re-enabled due to no sideband TCP/IPv4 rules\n"); + pf->auto_disable_flags &= ~I40E_FLAG_FD_ATR_ENABLED; } } @@ -532,7 +546,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -545,9 +562,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -584,8 +598,7 @@ void i40e_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -754,8 +767,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -1864,6 +1877,15 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_PFINT_DYN_CTLN +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->rx_rings[idx]->rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + return !!(vsi->tx_rings[idx]->tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1879,6 +1901,7 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, u32 rxval, txval; int vector; int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1887,18 +1910,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_rings[idx]->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_rings[idx]->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -2621,9 +2647,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -2654,8 +2678,7 @@ bool __i40e_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); @@ -2787,9 +2810,7 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2814,13 +2835,11 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { @@ -2840,10 +2859,9 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_QW1_CMD_SHIFT); /* notify HW of packet */ - if (!tail_bump) + if (!tail_bump) { prefetchw(tx_desc + 1); - - if (tail_bump) { + } else { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2852,7 +2870,6 @@ static inline void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, wmb(); writel(i, tx_ring->tail); } - return; dma_error: diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h index b78c810d1835..508840585645 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h @@ -463,4 +463,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h index c92a3bdee229..f861d3109d1a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h @@ -163,6 +163,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c index 6fcbf764f32b..54b8ee2583f1 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c @@ -502,8 +502,16 @@ static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_id, u32 qtx_ctl; int ret = 0; + if (!i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + ret = -ENOENT; + goto error_context; + } pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_id, vsi_queue_id); vsi = i40e_find_vsi_from_id(pf, vsi_id); + if (!vsi) { + ret = -ENOENT; + goto error_context; + } /* clear the context structure first */ memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq)); @@ -991,7 +999,10 @@ complete_reset: i40e_enable_vf_mappings(vf); set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states); clear_bit(I40E_VF_STAT_DISABLED, &vf->vf_states); - i40e_notify_client_of_vf_reset(pf, abs_vf_id); + /* Do not notify the client during VF init */ + if (vf->pf->num_alloc_vfs) + i40e_notify_client_of_vf_reset(pf, abs_vf_id); + vf->num_vlan = 0; } /* tell the VF the reset is done */ wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_VFACTIVE); @@ -1089,7 +1100,6 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) goto err_iov; } } - i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); /* allocate memory */ vfs = kcalloc(num_alloc_vfs, sizeof(struct i40e_vf), GFP_KERNEL); if (!vfs) { @@ -1113,6 +1123,8 @@ int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs) } pf->num_alloc_vfs = num_alloc_vfs; + i40e_notify_client_of_vf_enable(pf, num_alloc_vfs); + err_alloc: if (ret) i40e_free_vfs(pf); @@ -1472,7 +1484,8 @@ static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf, vsi = i40e_find_vsi_from_id(pf, info->vsi_id); if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) || - !i40e_vc_isvalid_vsi_id(vf, info->vsi_id)) { + !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) || + !vsi) { aq_ret = I40E_ERR_PARAM; goto error_param; } @@ -2213,8 +2226,8 @@ static int i40e_vc_iwarp_qvmap_msg(struct i40e_vf *vf, u8 *msg, u16 msglen, error_param: /* send the response to the VF */ return i40e_vc_send_resp_to_vf(vf, - config ? I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP : - I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP, + config ? I40E_VIRTCHNL_OP_CONFIG_IWARP_IRQ_MAP : + I40E_VIRTCHNL_OP_RELEASE_IWARP_IRQ_MAP, aq_ret); } @@ -2314,6 +2327,7 @@ err: /* send the response back to the VF */ aq_ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_RSS_HENA_CAPS, aq_ret, (u8 *)vrh, len); + kfree(vrh); return aq_ret; } @@ -2742,11 +2756,12 @@ error_param: * @vf_id: VF identifier * @vlan_id: mac address * @qos: priority setting + * @vlan_proto: vlan protocol * * program VF vlan id and/or qos **/ -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos) +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto) { u16 vlanprio = vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT); struct i40e_netdev_priv *np = netdev_priv(netdev); @@ -2769,6 +2784,12 @@ int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, goto error_pvid; } + if (vlan_proto != htons(ETH_P_8021Q)) { + dev_err(&pf->pdev->dev, "VF VLAN protocol is not supported\n"); + ret = -EPROTONOSUPPORT; + goto error_pvid; + } + vf = &(pf->vf[vf_id]); vsi = pf->vsi[vf->lan_vsi_idx]; if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) { @@ -2995,6 +3016,7 @@ int i40e_ndo_get_vf_config(struct net_device *netdev, else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; ivi->spoofchk = vf->spoofchk; + ivi->trusted = vf->trusted; ret = 0; error_param: diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h index 875174141451..4012d069939a 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h @@ -129,8 +129,8 @@ void i40e_vc_notify_vf_reset(struct i40e_vf *vf); /* VF configuration related iplink handlers */ int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); -int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, - int vf_id, u16 vlan_id, u8 qos); +int i40e_ndo_set_vf_port_vlan(struct net_device *netdev, int vf_id, + u16 vlan_id, u8 qos, __be16 vlan_proto); int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, int max_tx_rate); int i40e_ndo_set_vf_trust(struct net_device *netdev, int vf_id, bool setting); diff --git a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h index 3114dcfa1724..40b0eafd0c71 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_adminq_cmd.h @@ -204,6 +204,9 @@ enum i40e_admin_queue_opc { i40e_aqc_opc_suspend_port_tx = 0x041B, i40e_aqc_opc_resume_port_tx = 0x041C, i40e_aqc_opc_configure_partition_bw = 0x041D, + /* hmc */ + i40e_aqc_opc_query_hmc_resource_profile = 0x0500, + i40e_aqc_opc_set_hmc_resource_profile = 0x0501, /* phy commands*/ i40e_aqc_opc_get_phy_abilities = 0x0600, @@ -447,13 +450,15 @@ I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration); /* Set ARP Proxy command / response (indirect 0x0104) */ struct i40e_aqc_arp_proxy_data { __le16 command_flags; -#define I40E_AQ_ARP_INIT_IPV4 0x0008 -#define I40E_AQ_ARP_UNSUP_CTL 0x0010 -#define I40E_AQ_ARP_ENA 0x0020 -#define I40E_AQ_ARP_ADD_IPV4 0x0040 -#define I40E_AQ_ARP_DEL_IPV4 0x0080 +#define I40E_AQ_ARP_INIT_IPV4 0x0800 +#define I40E_AQ_ARP_UNSUP_CTL 0x1000 +#define I40E_AQ_ARP_ENA 0x2000 +#define I40E_AQ_ARP_ADD_IPV4 0x4000 +#define I40E_AQ_ARP_DEL_IPV4 0x8000 __le16 table_id; - __le32 pfpm_proxyfc; + __le32 enabled_offloads; +#define I40E_AQ_ARP_DIRECTED_OFFLOAD_ENABLE 0x00000020 +#define I40E_AQ_ARP_OFFLOAD_ENABLE 0x00000800 __le32 ip_addr; u8 mac_addr[6]; u8 reserved[2]; @@ -468,17 +473,19 @@ struct i40e_aqc_ns_proxy_data { __le16 table_idx_ipv6_0; __le16 table_idx_ipv6_1; __le16 control; -#define I40E_AQ_NS_PROXY_ADD_0 0x0100 -#define I40E_AQ_NS_PROXY_DEL_0 0x0200 -#define I40E_AQ_NS_PROXY_ADD_1 0x0400 -#define I40E_AQ_NS_PROXY_DEL_1 0x0800 -#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x1000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x2000 -#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x4000 -#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x8000 -#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0001 -#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0002 -#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0004 +#define I40E_AQ_NS_PROXY_ADD_0 0x0001 +#define I40E_AQ_NS_PROXY_DEL_0 0x0002 +#define I40E_AQ_NS_PROXY_ADD_1 0x0004 +#define I40E_AQ_NS_PROXY_DEL_1 0x0008 +#define I40E_AQ_NS_PROXY_ADD_IPV6_0 0x0010 +#define I40E_AQ_NS_PROXY_DEL_IPV6_0 0x0020 +#define I40E_AQ_NS_PROXY_ADD_IPV6_1 0x0040 +#define I40E_AQ_NS_PROXY_DEL_IPV6_1 0x0080 +#define I40E_AQ_NS_PROXY_COMMAND_SEQ 0x0100 +#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL 0x0200 +#define I40E_AQ_NS_PROXY_INIT_MAC_TBL 0x0400 +#define I40E_AQ_NS_PROXY_OFFLOAD_ENABLE 0x0800 +#define I40E_AQ_NS_PROXY_DIRECTED_OFFLOAD_ENABLE 0x1000 u8 mac_addr_0[6]; u8 mac_addr_1[6]; u8 local_mac_addr[6]; @@ -1579,6 +1586,24 @@ struct i40e_aqc_configure_partition_bw_data { I40E_CHECK_STRUCT_LEN(0x22, i40e_aqc_configure_partition_bw_data); +/* Get and set the active HMC resource profile and status. + * (direct 0x0500) and (direct 0x0501) + */ +struct i40e_aq_get_set_hmc_resource_profile { + u8 pm_profile; + u8 pe_vf_enabled; + u8 reserved[14]; +}; + +I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile); + +enum i40e_aq_hmc_profile { + /* I40E_HMC_PROFILE_NO_CHANGE = 0, reserved */ + I40E_HMC_PROFILE_DEFAULT = 1, + I40E_HMC_PROFILE_FAVOR_VF = 2, + I40E_HMC_PROFILE_EQUAL = 3, +}; + /* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */ /* set in param0 for get phy abilities to report qualified modules */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_common.c b/drivers/net/ethernet/intel/i40evf/i40e_common.c index 4db0c0326185..7953c13451b9 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_common.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_common.c @@ -302,7 +302,6 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, void *buffer, u16 buf_len) { struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc; - u16 len = le16_to_cpu(aq_desc->datalen); u8 *buf = (u8 *)buffer; u16 i = 0; @@ -326,6 +325,8 @@ void i40evf_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc, le32_to_cpu(aq_desc->params.external.addr_low)); if ((buffer != NULL) && (aq_desc->datalen != 0)) { + u16 len = le16_to_cpu(aq_desc->datalen); + i40e_debug(hw, mask, "AQ CMD Buffer:\n"); if (buf_len < len) len = buf_len; diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c index a579193b2c21..75f2a2cdd738 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c @@ -51,7 +51,10 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, struct i40e_tx_buffer *tx_buffer) { if (tx_buffer->skb) { - dev_kfree_skb_any(tx_buffer->skb); + if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) + kfree(tx_buffer->raw_buf); + else + dev_kfree_skb_any(tx_buffer->skb); if (dma_unmap_len(tx_buffer, len)) dma_unmap_single(ring->dev, dma_unmap_addr(tx_buffer, dma), @@ -64,9 +67,6 @@ static void i40e_unmap_and_free_tx_resource(struct i40e_ring *ring, DMA_TO_DEVICE); } - if (tx_buffer->tx_flags & I40E_TX_FLAGS_FD_SB) - kfree(tx_buffer->raw_buf); - tx_buffer->next_to_watch = NULL; tx_buffer->skb = NULL; dma_unmap_len_set(tx_buffer, len, 0); @@ -103,8 +103,7 @@ void i40evf_clean_tx_ring(struct i40e_ring *tx_ring) return; /* cleanup Tx queue statistics */ - netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)); + netdev_tx_reset_queue(txring_txq(tx_ring)); } /** @@ -273,8 +272,8 @@ static bool i40e_clean_tx_irq(struct i40e_vsi *vsi, tx_ring->arm_wb = true; } - netdev_tx_completed_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), + /* notify netdev of completed buffers */ + netdev_tx_completed_queue(txring_txq(tx_ring), total_packets, total_bytes); #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) @@ -1312,6 +1311,19 @@ static u32 i40e_buildreg_itr(const int type, const u16 itr) /* a small macro to shorten up some long lines */ #define INTREG I40E_VFINT_DYN_CTLN1 +static inline int get_rx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->rx_rings[idx].rx_itr_setting); +} + +static inline int get_tx_itr_enabled(struct i40e_vsi *vsi, int idx) +{ + struct i40evf_adapter *adapter = vsi->back; + + return !!(adapter->tx_rings[idx].tx_itr_setting); +} /** * i40e_update_enable_itr - Update itr and re-enable MSIX interrupt @@ -1326,6 +1338,8 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, bool rx = false, tx = false; u32 rxval, txval; int vector; + int idx = q_vector->v_idx; + int rx_itr_setting, tx_itr_setting; vector = (q_vector->v_idx + vsi->base_vector); @@ -1334,18 +1348,21 @@ static inline void i40e_update_enable_itr(struct i40e_vsi *vsi, */ rxval = txval = i40e_buildreg_itr(I40E_ITR_NONE, 0); + rx_itr_setting = get_rx_itr_enabled(vsi, idx); + tx_itr_setting = get_tx_itr_enabled(vsi, idx); + if (q_vector->itr_countdown > 0 || - (!ITR_IS_DYNAMIC(vsi->rx_itr_setting) && - !ITR_IS_DYNAMIC(vsi->tx_itr_setting))) { + (!ITR_IS_DYNAMIC(rx_itr_setting) && + !ITR_IS_DYNAMIC(tx_itr_setting))) { goto enable_int; } - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) { + if (ITR_IS_DYNAMIC(rx_itr_setting)) { rx = i40e_set_new_dynamic_itr(&q_vector->rx); rxval = i40e_buildreg_itr(I40E_RX_ITR, q_vector->rx.itr); } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) { + if (ITR_IS_DYNAMIC(tx_itr_setting)) { tx = i40e_set_new_dynamic_itr(&q_vector->tx); txval = i40e_buildreg_itr(I40E_TX_ITR, q_vector->tx.itr); } @@ -1832,9 +1849,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) return false; /* We need to walk through the list and validate that each group - * of 6 fragments totals at least gso_size. However we don't need - * to perform such validation on the last 6 since the last 6 cannot - * inherit any data from a descriptor after them. + * of 6 fragments totals at least gso_size. */ nr_frags -= I40E_MAX_BUFFER_TXD - 2; frag = &skb_shinfo(skb)->frags[0]; @@ -1865,8 +1880,7 @@ bool __i40evf_chk_linearize(struct sk_buff *skb) if (sum < 0) return true; - /* use pre-decrement to avoid processing last fragment */ - if (!--nr_frags) + if (!nr_frags--) break; sum -= skb_frag_size(stale++); @@ -2015,9 +2029,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, tx_ring->next_to_use = i; - netdev_tx_sent_queue(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index), - first->bytecount); + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); i40e_maybe_stop_tx(tx_ring, DESC_NEEDED); /* Algorithm to optimize tail and RS bit setting: @@ -2042,13 +2054,11 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, * trigger a force WB. */ if (skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index))) { + !netif_xmit_stopped(txring_txq(tx_ring))) { tx_ring->flags |= I40E_TXR_FLAGS_LAST_XMIT_MORE_SET; tail_bump = false; } else if (!skb->xmit_more && - !netif_xmit_stopped(netdev_get_tx_queue(tx_ring->netdev, - tx_ring->queue_index)) && + !netif_xmit_stopped(txring_txq(tx_ring)) && (!(tx_ring->flags & I40E_TXR_FLAGS_LAST_XMIT_MORE_SET)) && (tx_ring->packet_stride < WB_STRIDE) && (desc_count < WB_STRIDE)) { @@ -2068,10 +2078,9 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, I40E_TXD_QW1_CMD_SHIFT); /* notify HW of packet */ - if (!tail_bump) + if (!tail_bump) { prefetchw(tx_desc + 1); - - if (tail_bump) { + } else { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only * applicable for weak-ordered memory model archs, @@ -2080,7 +2089,6 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb, wmb(); writel(i, tx_ring->tail); } - return; dma_error: diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h index 0112277e5882..abcdecabbc56 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h @@ -287,6 +287,14 @@ struct i40e_ring { u8 dcb_tc; /* Traffic class of ring */ u8 __iomem *tail; + /* high bit set means dynamic, use accessors routines to read/write. + * hardware only supports 2us resolution for the ITR registers. + * these values always store the USER setting, and must be converted + * before programming to a register. + */ + u16 rx_itr_setting; + u16 tx_itr_setting; + u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 rx_buf_len; @@ -445,4 +453,13 @@ static inline bool i40e_rx_is_fcoe(u16 ptype) return (ptype >= I40E_RX_PTYPE_L2_FCOE_PAY3) && (ptype <= I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER); } + +/** + * txring_txq - Find the netdev Tx ring based on the i40e Tx ring + * @ring: Tx ring to find the netdev equivalent of + **/ +static inline struct netdev_queue *txring_txq(const struct i40e_ring *ring) +{ + return netdev_get_tx_queue(ring->netdev, ring->queue_index); +} #endif /* _I40E_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h index f04ce6cb70dc..bd691ad86673 100644 --- a/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h +++ b/drivers/net/ethernet/intel/i40evf/i40e_virtchnl.h @@ -160,6 +160,7 @@ struct i40e_virtchnl_vsi_resource { #define I40E_VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 #define I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF 0X00080000 +#define I40E_VIRTCHNL_VF_OFFLOAD_ENCAP_CSUM 0X00100000 struct i40e_virtchnl_vf_resource { u16 num_vsis; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf.h b/drivers/net/ethernet/intel/i40evf/i40evf.h index 76ed97db28e2..c5fd724313c7 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf.h +++ b/drivers/net/ethernet/intel/i40evf/i40evf.h @@ -59,32 +59,25 @@ struct i40e_vsi { unsigned long state; int base_vector; u16 work_limit; - /* high bit set means dynamic, use accessor routines to read/write. - * hardware only supports 2us resolution for the ITR registers. - * these values always store the USER setting, and must be converted - * before programming to a register. - */ - u16 rx_itr_setting; - u16 tx_itr_setting; u16 qs_handle; }; /* How many Rx Buffers do we bundle into one write to the hardware ? */ #define I40EVF_RX_BUFFER_WRITE 16 /* Must be power of 2 */ -#define I40EVF_DEFAULT_TXD 512 -#define I40EVF_DEFAULT_RXD 512 -#define I40EVF_MAX_TXD 4096 -#define I40EVF_MIN_TXD 64 -#define I40EVF_MAX_RXD 4096 -#define I40EVF_MIN_RXD 64 -#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32 +#define I40EVF_DEFAULT_TXD 512 +#define I40EVF_DEFAULT_RXD 512 +#define I40EVF_MAX_TXD 4096 +#define I40EVF_MIN_TXD 64 +#define I40EVF_MAX_RXD 4096 +#define I40EVF_MIN_RXD 64 +#define I40EVF_REQ_DESCRIPTOR_MULTIPLE 32 /* Supported Rx Buffer Sizes */ -#define I40EVF_RXBUFFER_2048 2048 -#define I40EVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ -#define I40EVF_MAX_AQ_BUF_SIZE 4096 -#define I40EVF_AQ_LEN 32 -#define I40EVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ +#define I40EVF_RXBUFFER_2048 2048 +#define I40EVF_MAX_RXBUFFER 16384 /* largest size for single descriptor */ +#define I40EVF_MAX_AQ_BUF_SIZE 4096 +#define I40EVF_AQ_LEN 32 +#define I40EVF_AQ_MAX_ERR 20 /* times to try before resetting AQ */ #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) @@ -111,7 +104,7 @@ struct i40e_q_vector { u8 num_ringpairs; /* total number of ring pairs in vector */ #define ITR_COUNTDOWN_START 100 u8 itr_countdown; /* when 0 or 1 update ITR */ - int v_idx; /* vector index in list */ + int v_idx; /* vector index in list */ char name[IFNAMSIZ + 9]; bool arm_wb_state; cpumask_var_t affinity_mask; @@ -129,11 +122,11 @@ struct i40e_q_vector { ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \ (R)->next_to_clean - (R)->next_to_use - 1) -#define I40EVF_RX_DESC_ADV(R, i) \ +#define I40EVF_RX_DESC_ADV(R, i) \ (&(((union i40e_adv_rx_desc *)((R).desc))[i])) -#define I40EVF_TX_DESC_ADV(R, i) \ +#define I40EVF_TX_DESC_ADV(R, i) \ (&(((union i40e_adv_tx_desc *)((R).desc))[i])) -#define I40EVF_TX_CTXTDESC_ADV(R, i) \ +#define I40EVF_TX_CTXTDESC_ADV(R, i) \ (&(((struct i40e_adv_tx_context_desc *)((R).desc))[i])) #define OTHER_VECTOR 1 @@ -204,22 +197,25 @@ struct i40evf_adapter { struct msix_entry *msix_entries; u32 flags; -#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) -#define I40EVF_FLAG_IMIR_ENABLED BIT(5) -#define I40EVF_FLAG_MQ_CAPABLE BIT(6) -#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7) -#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) -#define I40EVF_FLAG_RESET_PENDING BIT(9) -#define I40EVF_FLAG_RESET_NEEDED BIT(10) +#define I40EVF_FLAG_RX_CSUM_ENABLED BIT(0) +#define I40EVF_FLAG_IN_NETPOLL BIT(4) +#define I40EVF_FLAG_IMIR_ENABLED BIT(5) +#define I40EVF_FLAG_MQ_CAPABLE BIT(6) +#define I40EVF_FLAG_NEED_LINK_UPDATE BIT(7) +#define I40EVF_FLAG_PF_COMMS_FAILED BIT(8) +#define I40EVF_FLAG_RESET_PENDING BIT(9) +#define I40EVF_FLAG_RESET_NEEDED BIT(10) #define I40EVF_FLAG_WB_ON_ITR_CAPABLE BIT(11) #define I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE BIT(12) #define I40EVF_FLAG_ADDR_SET_BY_PF BIT(13) +#define I40EVF_FLAG_SERVICE_CLIENT_REQUESTED BIT(14) #define I40EVF_FLAG_PROMISC_ON BIT(15) #define I40EVF_FLAG_ALLMULTI_ON BIT(16) /* duplicates for common code */ -#define I40E_FLAG_FDIR_ATR_ENABLED 0 -#define I40E_FLAG_DCB_ENABLED 0 -#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED +#define I40E_FLAG_FDIR_ATR_ENABLED 0 +#define I40E_FLAG_DCB_ENABLED 0 +#define I40E_FLAG_IN_NETPOLL I40EVF_FLAG_IN_NETPOLL +#define I40E_FLAG_RX_CSUM_ENABLED I40EVF_FLAG_RX_CSUM_ENABLED #define I40E_FLAG_WB_ON_ITR_CAPABLE I40EVF_FLAG_WB_ON_ITR_CAPABLE #define I40E_FLAG_OUTER_UDP_CSUM_CAPABLE I40EVF_FLAG_OUTER_UDP_CSUM_CAPABLE /* flags for admin queue service task */ @@ -233,7 +229,7 @@ struct i40evf_adapter { #define I40EVF_FLAG_AQ_CONFIGURE_QUEUES BIT(6) #define I40EVF_FLAG_AQ_MAP_VECTORS BIT(7) #define I40EVF_FLAG_AQ_HANDLE_RESET BIT(8) -#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ +#define I40EVF_FLAG_AQ_CONFIGURE_RSS BIT(9) /* direct AQ config */ #define I40EVF_FLAG_AQ_GET_CONFIG BIT(10) /* Newer style, RSS done by the PF so we can ignore hardware vagaries. */ #define I40EVF_FLAG_AQ_GET_HENA BIT(11) @@ -258,6 +254,7 @@ struct i40evf_adapter { struct work_struct watchdog_task; bool netdev_registered; bool link_up; + enum i40e_aq_link_speed link_speed; enum i40e_virtchnl_ops current_op; #define CLIENT_ENABLED(_a) ((_a)->vf_res ? \ (_a)->vf_res->vf_offload_flags & \ diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c index c9c202f6c521..a9940154eead 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c @@ -74,13 +74,33 @@ static const struct i40evf_stats i40evf_gstrings_stats[] = { static int i40evf_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) { - /* In the future the VF will be able to query the PF for - * some information - for now use a dummy value - */ + struct i40evf_adapter *adapter = netdev_priv(netdev); + ecmd->supported = 0; ecmd->autoneg = AUTONEG_DISABLE; ecmd->transceiver = XCVR_DUMMY1; ecmd->port = PORT_NONE; + /* Set speed and duplex */ + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + ethtool_cmd_speed_set(ecmd, SPEED_40000); + break; + case I40E_LINK_SPEED_20GB: + ethtool_cmd_speed_set(ecmd, SPEED_20000); + break; + case I40E_LINK_SPEED_10GB: + ethtool_cmd_speed_set(ecmd, SPEED_10000); + break; + case I40E_LINK_SPEED_1GB: + ethtool_cmd_speed_set(ecmd, SPEED_1000); + break; + case I40E_LINK_SPEED_100MB: + ethtool_cmd_speed_set(ecmd, SPEED_100); + break; + default: + break; + } + ecmd->duplex = DUPLEX_FULL; return 0; } @@ -275,6 +295,50 @@ static int i40evf_set_ringparam(struct net_device *netdev, return 0; } +/** + * __i40evf_get_coalesce - get per-queue coalesce settings + * @netdev: the netdev to check + * @ec: ethtool coalesce data structure + * @queue: which queue to pick + * + * Gets the per-queue settings for coalescence. Specifically Rx and Tx usecs + * are per queue. If queue is <0 then we default to queue 0 as the + * representative value. + **/ +static int __i40evf_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40evf_adapter *adapter = netdev_priv(netdev); + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_ring *rx_ring, *tx_ring; + + ec->tx_max_coalesced_frames = vsi->work_limit; + ec->rx_max_coalesced_frames = vsi->work_limit; + + /* Rx and Tx usecs per queue value. If user doesn't specify the + * queue, return queue 0's value to represent. + */ + if (queue < 0) + queue = 0; + else if (queue >= adapter->num_active_queues) + return -EINVAL; + + rx_ring = &adapter->rx_rings[queue]; + tx_ring = &adapter->tx_rings[queue]; + + if (ITR_IS_DYNAMIC(rx_ring->rx_itr_setting)) + ec->use_adaptive_rx_coalesce = 1; + + if (ITR_IS_DYNAMIC(tx_ring->tx_itr_setting)) + ec->use_adaptive_tx_coalesce = 1; + + ec->rx_coalesce_usecs = rx_ring->rx_itr_setting & ~I40E_ITR_DYNAMIC; + ec->tx_coalesce_usecs = tx_ring->tx_itr_setting & ~I40E_ITR_DYNAMIC; + + return 0; +} + /** * i40evf_get_coalesce - Get interrupt coalescing settings * @netdev: network interface device structure @@ -282,25 +346,124 @@ static int i40evf_set_ringparam(struct net_device *netdev, * * Returns current coalescing settings. This is referred to elsewhere in the * driver as Interrupt Throttle Rate, as this is how the hardware describes - * this functionality. + * this functionality. Note that if per-queue settings have been modified this + * only represents the settings of queue 0. **/ static int i40evf_get_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, -1); +} + +/** + * i40evf_get_per_queue_coalesce - get coalesce values for specific queue + * @netdev: netdev to read + * @ec: coalesce settings from ethtool + * @queue: the queue to read + * + * Read specific queue's coalesce settings. + **/ +static int i40evf_get_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_get_coalesce(netdev, ec, queue); +} + +/** + * i40evf_set_itr_per_queue - set ITR values for specific queue + * @vsi: the VSI to set values for + * @ec: coalesce settings from ethtool + * @queue: the queue to modify + * + * Change the ITR settings for a specific queue. + **/ +static void i40evf_set_itr_per_queue(struct i40evf_adapter *adapter, + struct ethtool_coalesce *ec, + int queue) +{ + struct i40e_vsi *vsi = &adapter->vsi; + struct i40e_hw *hw = &adapter->hw; + struct i40e_q_vector *q_vector; + u16 vector; + + adapter->rx_rings[queue].rx_itr_setting = ec->rx_coalesce_usecs; + adapter->tx_rings[queue].tx_itr_setting = ec->tx_coalesce_usecs; + + if (ec->use_adaptive_rx_coalesce) + adapter->rx_rings[queue].rx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->rx_rings[queue].rx_itr_setting &= ~I40E_ITR_DYNAMIC; + + if (ec->use_adaptive_tx_coalesce) + adapter->tx_rings[queue].tx_itr_setting |= I40E_ITR_DYNAMIC; + else + adapter->tx_rings[queue].tx_itr_setting &= ~I40E_ITR_DYNAMIC; + + q_vector = adapter->rx_rings[queue].q_vector; + q_vector->rx.itr = ITR_TO_REG(adapter->rx_rings[queue].rx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, vector - 1), q_vector->rx.itr); + + q_vector = adapter->tx_rings[queue].q_vector; + q_vector->tx.itr = ITR_TO_REG(adapter->tx_rings[queue].tx_itr_setting); + vector = vsi->base_vector + q_vector->v_idx; + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, vector - 1), q_vector->tx.itr); + + i40e_flush(hw); +} + +/** + * __i40evf_set_coalesce - set coalesce settings for particular queue + * @netdev: the netdev to change + * @ec: ethtool coalesce settings + * @queue: the queue to change + * + * Sets the coalesce settings for a particular queue. + **/ +static int __i40evf_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec, + int queue) { struct i40evf_adapter *adapter = netdev_priv(netdev); struct i40e_vsi *vsi = &adapter->vsi; + int i; - ec->tx_max_coalesced_frames = vsi->work_limit; - ec->rx_max_coalesced_frames = vsi->work_limit; + if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) + vsi->work_limit = ec->tx_max_coalesced_frames_irq; - if (ITR_IS_DYNAMIC(vsi->rx_itr_setting)) - ec->use_adaptive_rx_coalesce = 1; + if (ec->rx_coalesce_usecs == 0) { + if (ec->use_adaptive_rx_coalesce) + netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n"); + } else if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, rx-usecs range is 0-8160\n"); + return -EINVAL; + } - if (ITR_IS_DYNAMIC(vsi->tx_itr_setting)) - ec->use_adaptive_tx_coalesce = 1; + else + if (ec->tx_coalesce_usecs == 0) { + if (ec->use_adaptive_tx_coalesce) + netif_info(adapter, drv, netdev, "tx-usecs=0, need to disable adaptive-tx for a complete disable\n"); + } else if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) || + (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1))) { + netif_info(adapter, drv, netdev, "Invalid value, tx-usecs range is 0-8160\n"); + return -EINVAL; + } - ec->rx_coalesce_usecs = vsi->rx_itr_setting & ~I40E_ITR_DYNAMIC; - ec->tx_coalesce_usecs = vsi->tx_itr_setting & ~I40E_ITR_DYNAMIC; + /* Rx and Tx usecs has per queue value. If user doesn't specify the + * queue, apply to all queues. + */ + if (queue < 0) { + for (i = 0; i < adapter->num_active_queues; i++) + i40evf_set_itr_per_queue(adapter, ec, i); + } else if (queue < adapter->num_active_queues) { + i40evf_set_itr_per_queue(adapter, ec, queue); + } else { + netif_info(adapter, drv, netdev, "Invalid queue value, queue range is 0 - %d\n", + adapter->num_active_queues - 1); + return -EINVAL; + } return 0; } @@ -310,56 +473,27 @@ static int i40evf_get_coalesce(struct net_device *netdev, * @netdev: network interface device structure * @ec: ethtool coalesce structure * - * Change current coalescing settings. + * Change current coalescing settings for every queue. **/ static int i40evf_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec) { - struct i40evf_adapter *adapter = netdev_priv(netdev); - struct i40e_hw *hw = &adapter->hw; - struct i40e_vsi *vsi = &adapter->vsi; - struct i40e_q_vector *q_vector; - int i; + return __i40evf_set_coalesce(netdev, ec, -1); +} - if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq) - vsi->work_limit = ec->tx_max_coalesced_frames_irq; - - if ((ec->rx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->rx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->rx_itr_setting = ec->rx_coalesce_usecs; - - else - return -EINVAL; - - if ((ec->tx_coalesce_usecs >= (I40E_MIN_ITR << 1)) && - (ec->tx_coalesce_usecs <= (I40E_MAX_ITR << 1))) - vsi->tx_itr_setting = ec->tx_coalesce_usecs; - else if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - else - return -EINVAL; - - if (ec->use_adaptive_rx_coalesce) - vsi->rx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->rx_itr_setting &= ~I40E_ITR_DYNAMIC; - - if (ec->use_adaptive_tx_coalesce) - vsi->tx_itr_setting |= I40E_ITR_DYNAMIC; - else - vsi->tx_itr_setting &= ~I40E_ITR_DYNAMIC; - - for (i = 0; i < adapter->num_msix_vectors - NONQ_VECS; i++) { - q_vector = &adapter->q_vectors[i]; - q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(0, i), q_vector->rx.itr); - q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting); - wr32(hw, I40E_VFINT_ITRN1(1, i), q_vector->tx.itr); - i40e_flush(hw); - } - - return 0; +/** + * i40evf_set_per_queue_coalesce - set specific queue's coalesce settings + * @netdev: the netdev to change + * @ec: ethtool's coalesce settings + * @queue: the queue to modify + * + * Modifies a specific queue's coalesce settings. + */ +static int i40evf_set_per_queue_coalesce(struct net_device *netdev, + u32 queue, + struct ethtool_coalesce *ec) +{ + return __i40evf_set_coalesce(netdev, ec, queue); } /** @@ -513,6 +647,8 @@ static const struct ethtool_ops i40evf_ethtool_ops = { .set_msglevel = i40evf_set_msglevel, .get_coalesce = i40evf_get_coalesce, .set_coalesce = i40evf_set_coalesce, + .get_per_queue_coalesce = i40evf_get_per_queue_coalesce, + .set_per_queue_coalesce = i40evf_set_per_queue_coalesce, .get_rxnfc = i40evf_get_rxnfc, .get_rxfh_indir_size = i40evf_get_rxfh_indir_size, .get_rxfh = i40evf_get_rxfh, diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_main.c b/drivers/net/ethernet/intel/i40evf/i40evf_main.c index 600fb9c4a7f0..14372810fc27 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_main.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_main.c @@ -38,7 +38,7 @@ static const char i40evf_driver_string[] = #define DRV_VERSION_MAJOR 1 #define DRV_VERSION_MINOR 6 -#define DRV_VERSION_BUILD 11 +#define DRV_VERSION_BUILD 16 #define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \ __stringify(DRV_VERSION_MINOR) "." \ __stringify(DRV_VERSION_BUILD) \ @@ -370,6 +370,7 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *rx_ring = &adapter->rx_rings[r_idx]; + struct i40e_hw *hw = &adapter->hw; rx_ring->q_vector = q_vector; rx_ring->next = q_vector->rx.ring; @@ -377,7 +378,10 @@ i40evf_map_vector_to_rxq(struct i40evf_adapter *adapter, int v_idx, int r_idx) q_vector->rx.ring = rx_ring; q_vector->rx.count++; q_vector->rx.latency_range = I40E_LOW_LATENCY; + q_vector->rx.itr = ITR_TO_REG(rx_ring->rx_itr_setting); + q_vector->ring_mask |= BIT(r_idx); q_vector->itr_countdown = ITR_COUNTDOWN_START; + wr32(hw, I40E_VFINT_ITRN1(I40E_RX_ITR, v_idx - 1), q_vector->rx.itr); } /** @@ -391,6 +395,7 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) { struct i40e_q_vector *q_vector = &adapter->q_vectors[v_idx]; struct i40e_ring *tx_ring = &adapter->tx_rings[t_idx]; + struct i40e_hw *hw = &adapter->hw; tx_ring->q_vector = q_vector; tx_ring->next = q_vector->tx.ring; @@ -398,9 +403,10 @@ i40evf_map_vector_to_txq(struct i40evf_adapter *adapter, int v_idx, int t_idx) q_vector->tx.ring = tx_ring; q_vector->tx.count++; q_vector->tx.latency_range = I40E_LOW_LATENCY; + q_vector->tx.itr = ITR_TO_REG(tx_ring->tx_itr_setting); q_vector->itr_countdown = ITR_COUNTDOWN_START; q_vector->num_ringpairs++; - q_vector->ring_mask |= BIT(t_idx); + wr32(hw, I40E_VFINT_ITRN1(I40E_TX_ITR, v_idx - 1), q_vector->tx.itr); } /** @@ -1007,7 +1013,7 @@ static void i40evf_configure(struct i40evf_adapter *adapter) * i40evf_up_complete - Finish the last steps of bringing up a connection * @adapter: board private structure **/ -static int i40evf_up_complete(struct i40evf_adapter *adapter) +static void i40evf_up_complete(struct i40evf_adapter *adapter) { adapter->state = __I40EVF_RUNNING; clear_bit(__I40E_DOWN, &adapter->vsi.state); @@ -1016,7 +1022,6 @@ static int i40evf_up_complete(struct i40evf_adapter *adapter) adapter->aq_required |= I40EVF_FLAG_AQ_ENABLE_QUEUES; mod_timer_pending(&adapter->watchdog_timer, jiffies + 1); - return 0; } /** @@ -1037,6 +1042,7 @@ void i40evf_down(struct i40evf_adapter *adapter) netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); @@ -1154,6 +1160,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) tx_ring->netdev = adapter->netdev; tx_ring->dev = &adapter->pdev->dev; tx_ring->count = adapter->tx_desc_count; + tx_ring->tx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_TX_DEF); if (adapter->flags & I40E_FLAG_WB_ON_ITR_CAPABLE) tx_ring->flags |= I40E_TXR_FLAGS_WB_ON_ITR; @@ -1162,6 +1169,7 @@ static int i40evf_alloc_queues(struct i40evf_adapter *adapter) rx_ring->netdev = adapter->netdev; rx_ring->dev = &adapter->pdev->dev; rx_ring->count = adapter->rx_desc_count; + rx_ring->rx_itr_setting = (I40E_ITR_DYNAMIC | I40E_ITR_RX_DEF); } return 0; @@ -1420,7 +1428,9 @@ int i40evf_init_interrupt_scheme(struct i40evf_adapter *adapter) { int err; + rtnl_lock(); err = i40evf_set_interrupt_capability(adapter); + rtnl_unlock(); if (err) { dev_err(&adapter->pdev->dev, "Unable to setup interrupt capabilities\n"); @@ -1729,6 +1739,7 @@ static void i40evf_reset_task(struct work_struct *work) set_bit(__I40E_DOWN, &adapter->vsi.state); netif_carrier_off(netdev); netif_tx_disable(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); i40evf_irq_disable(adapter); i40evf_free_traffic_irqs(adapter); @@ -1767,6 +1778,7 @@ continue_reset: if (netif_running(adapter->netdev)) { netif_carrier_off(netdev); netif_tx_stop_all_queues(netdev); + adapter->link_up = false; i40evf_napi_disable_all(adapter); } i40evf_irq_disable(adapter); @@ -1781,8 +1793,7 @@ continue_reset: i40evf_free_all_tx_resources(adapter); /* kill and reinit the admin queue */ - if (i40evf_shutdown_adminq(hw)) - dev_warn(&adapter->pdev->dev, "Failed to shut down adminq\n"); + i40evf_shutdown_adminq(hw); adapter->current_op = I40E_VIRTCHNL_OP_UNKNOWN; err = i40evf_init_adminq(hw); if (err) @@ -1802,6 +1813,8 @@ continue_reset: } adapter->aq_required |= I40EVF_FLAG_AQ_ADD_MAC_FILTER; adapter->aq_required |= I40EVF_FLAG_AQ_ADD_VLAN_FILTER; + /* Open RDMA Client again */ + adapter->aq_required |= I40EVF_FLAG_SERVICE_CLIENT_REQUESTED; clear_bit(__I40EVF_IN_CRITICAL_TASK, &adapter->crit_section); i40evf_misc_irq_enable(adapter); @@ -1820,9 +1833,7 @@ continue_reset: i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto reset_err; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); } else { @@ -2052,9 +2063,7 @@ static int i40evf_open(struct net_device *netdev) i40evf_add_filter(adapter, adapter->hw.mac.addr); i40evf_configure(adapter); - err = i40evf_up_complete(adapter); - if (err) - goto err_req_irq; + i40evf_up_complete(adapter); i40evf_irq_enable(adapter, true); @@ -2268,10 +2277,6 @@ int i40evf_process_config(struct i40evf_adapter *adapter) adapter->vsi.back = adapter; adapter->vsi.base_vector = 1; adapter->vsi.work_limit = I40E_DEFAULT_IRQ_WORK; - adapter->vsi.rx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_RX_DEF)); - adapter->vsi.tx_itr_setting = (I40E_ITR_DYNAMIC | - ITR_REG_TO_USEC(I40E_ITR_TX_DEF)); vsi->netdev = adapter->netdev; vsi->qs_handle = adapter->vsi_res->qset_handle; if (vfres->vf_offload_flags & I40E_VIRTCHNL_VF_OFFLOAD_RSS_PF) { @@ -2453,6 +2458,7 @@ static void i40evf_init_task(struct work_struct *work) goto err_sw_init; netif_carrier_off(netdev); + adapter->link_up = false; if (!adapter->netdev_registered) { err = register_netdev(netdev); @@ -2831,7 +2837,8 @@ static int __init i40evf_init_module(void) pr_info("%s\n", i40evf_copyright); - i40evf_wq = create_singlethread_workqueue(i40evf_driver_name); + i40evf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, + i40evf_driver_name); if (!i40evf_wq) { pr_err("%s: Failed to create workqueue\n", i40evf_driver_name); return -ENOMEM; diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c index d76c221d4c8a..ddf478d6322b 100644 --- a/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c +++ b/drivers/net/ethernet/intel/i40evf/i40evf_virtchnl.c @@ -816,6 +816,45 @@ void i40evf_set_rss_lut(struct i40evf_adapter *adapter) kfree(vrl); } +/** + * i40evf_print_link_message - print link up or down + * @adapter: adapter structure + * + * Log a message telling the world of our wonderous link status + */ +static void i40evf_print_link_message(struct i40evf_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + char *speed = "Unknown "; + + if (!adapter->link_up) { + netdev_info(netdev, "NIC Link is Down\n"); + return; + } + + switch (adapter->link_speed) { + case I40E_LINK_SPEED_40GB: + speed = "40 G"; + break; + case I40E_LINK_SPEED_20GB: + speed = "20 G"; + break; + case I40E_LINK_SPEED_10GB: + speed = "10 G"; + break; + case I40E_LINK_SPEED_1GB: + speed = "1000 M"; + break; + case I40E_LINK_SPEED_100MB: + speed = "100 M"; + break; + default: + break; + } + + netdev_info(netdev, "NIC Link is Up %sbps Full Duplex\n", speed); +} + /** * i40evf_request_reset * @adapter: adapter structure @@ -853,16 +892,20 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, (struct i40e_virtchnl_pf_event *)msg; switch (vpe->event) { case I40E_VIRTCHNL_EVENT_LINK_CHANGE: - adapter->link_up = - vpe->event_data.link_event.link_status; - if (adapter->link_up && !netif_carrier_ok(netdev)) { - dev_info(&adapter->pdev->dev, "NIC Link is Up\n"); - netif_carrier_on(netdev); - netif_tx_wake_all_queues(netdev); - } else if (!adapter->link_up) { - dev_info(&adapter->pdev->dev, "NIC Link is Down\n"); - netif_carrier_off(netdev); - netif_tx_stop_all_queues(netdev); + adapter->link_speed = + vpe->event_data.link_event.link_speed; + if (adapter->link_up != + vpe->event_data.link_event.link_status) { + adapter->link_up = + vpe->event_data.link_event.link_status; + if (adapter->link_up) { + netif_tx_start_all_queues(netdev); + netif_carrier_on(netdev); + } else { + netif_tx_stop_all_queues(netdev); + netif_carrier_off(netdev); + } + i40evf_print_link_message(adapter); } break; case I40E_VIRTCHNL_EVENT_RESET_IMPENDING: @@ -937,8 +980,6 @@ void i40evf_virtchnl_completion(struct i40evf_adapter *adapter, case I40E_VIRTCHNL_OP_ENABLE_QUEUES: /* enable transmits */ i40evf_irq_enable(adapter, true); - netif_tx_start_all_queues(adapter->netdev); - netif_carrier_on(adapter->netdev); break; case I40E_VIRTCHNL_OP_DISABLE_QUEUES: i40evf_free_all_tx_resources(adapter); diff --git a/drivers/net/ethernet/intel/igb/e1000_82575.h b/drivers/net/ethernet/intel/igb/e1000_82575.h index 199ff98209cf..acf06051e111 100644 --- a/drivers/net/ethernet/intel/igb/e1000_82575.h +++ b/drivers/net/ethernet/intel/igb/e1000_82575.h @@ -188,6 +188,11 @@ struct e1000_adv_tx_context_desc { /* ETQF register bit definitions */ #define E1000_ETQF_FILTER_ENABLE BIT(26) #define E1000_ETQF_1588 BIT(30) +#define E1000_ETQF_IMM_INT BIT(29) +#define E1000_ETQF_QUEUE_ENABLE BIT(31) +#define E1000_ETQF_QUEUE_SHIFT 16 +#define E1000_ETQF_QUEUE_MASK 0x00070000 +#define E1000_ETQF_ETYPE_MASK 0x0000FFFF /* FTQF register bit definitions */ #define E1000_FTQF_VF_BP 0x00008000 diff --git a/drivers/net/ethernet/intel/igb/e1000_defines.h b/drivers/net/ethernet/intel/igb/e1000_defines.h index 2997c443c5dc..2688180a7acd 100644 --- a/drivers/net/ethernet/intel/igb/e1000_defines.h +++ b/drivers/net/ethernet/intel/igb/e1000_defines.h @@ -1024,4 +1024,8 @@ #define E1000_RTTBCNRC_RF_INT_MASK \ (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) +#define E1000_VLAPQF_QUEUE_SEL(_n, q_idx) (q_idx << ((_n) * 4)) +#define E1000_VLAPQF_P_VALID(_n) (0x1 << (3 + (_n) * 4)) +#define E1000_VLAPQF_QUEUE_MASK 0x03 + #endif diff --git a/drivers/net/ethernet/intel/igb/e1000_regs.h b/drivers/net/ethernet/intel/igb/e1000_regs.h index 21d9d02885cb..d84afdd83e53 100644 --- a/drivers/net/ethernet/intel/igb/e1000_regs.h +++ b/drivers/net/ethernet/intel/igb/e1000_regs.h @@ -309,6 +309,7 @@ (0x054E0 + ((_i - 16) * 8))) #define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ (0x054E4 + ((_i - 16) * 8))) +#define E1000_VLAPQF 0x055B0 /* VLAN Priority Queue Filter VLAPQF */ #define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) #define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) #define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 5387b3a96489..d11093dce1b9 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -350,11 +350,49 @@ struct hwmon_buff { }; #endif +/* The number of L2 ether-type filter registers, Index 3 is reserved + * for PTP 1588 timestamp + */ +#define MAX_ETYPE_FILTER (4 - 1) +/* ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters here!! + * + * Current filters: Filter 3 + */ +#define IGB_ETQF_FILTER_1588 3 + #define IGB_N_EXTTS 2 #define IGB_N_PEROUT 2 #define IGB_N_SDP 4 #define IGB_RETA_SIZE 128 +enum igb_filter_match_flags { + IGB_FILTER_FLAG_ETHER_TYPE = 0x1, + IGB_FILTER_FLAG_VLAN_TCI = 0x2, +}; + +#define IGB_MAX_RXNFC_FILTERS 16 + +/* RX network flow classification data structure */ +struct igb_nfc_input { + /* Byte layout in order, all values with MSB first: + * match_flags - 1 byte + * etype - 2 bytes + * vlan_tci - 2 bytes + */ + u8 match_flags; + __be16 etype; + __be16 vlan_tci; +}; + +struct igb_nfc_filter { + struct hlist_node nfc_node; + struct igb_nfc_input filter; + u16 etype_reg_index; + u16 sw_idx; + u16 action; +}; + /* board specific private data structure */ struct igb_adapter { unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; @@ -451,6 +489,7 @@ struct igb_adapter { struct timecounter tc; u32 tx_hwtstamp_timeouts; u32 rx_hwtstamp_cleared; + bool pps_sys_wrap_on; struct ptp_pin_desc sdp_config[IGB_N_SDP]; struct { @@ -473,6 +512,13 @@ struct igb_adapter { int copper_tries; struct e1000_info ei; u16 eee_advert; + + /* RX network flow classification support */ + struct hlist_head nfc_filter_list; + unsigned int nfc_filter_count; + /* lock for RX network flow classification filter */ + spinlock_t nfc_lock; + bool etype_bitmap[MAX_ETYPE_FILTER]; }; /* flags controlling PTP/1588 function */ @@ -599,4 +645,9 @@ static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring) return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); } +int igb_add_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input); +int igb_erase_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input); + #endif /* _IGB_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c index 64e91c575a39..737b664d004c 100644 --- a/drivers/net/ethernet/intel/igb/igb_ethtool.c +++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c @@ -2431,6 +2431,63 @@ static int igb_get_ts_info(struct net_device *dev, } } +#define ETHER_TYPE_FULL_MASK ((__force __be16)~0) +static int igb_get_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = &cmd->fs; + struct igb_nfc_filter *rule = NULL; + + /* report total rule count */ + cmd->data = IGB_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (fsp->location <= rule->sw_idx) + break; + } + + if (!rule || fsp->location != rule->sw_idx) + return -EINVAL; + + if (rule->filter.match_flags) { + fsp->flow_type = ETHER_FLOW; + fsp->ring_cookie = rule->action; + if (rule->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { + fsp->h_u.ether_spec.h_proto = rule->filter.etype; + fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK; + } + if (rule->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) { + fsp->flow_type |= FLOW_EXT; + fsp->h_ext.vlan_tci = rule->filter.vlan_tci; + fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK); + } + return 0; + } + return -EINVAL; +} + +static int igb_get_ethtool_nfc_all(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct igb_nfc_filter *rule; + int cnt = 0; + + /* report total rule count */ + cmd->data = IGB_MAX_RXNFC_FILTERS; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = rule->sw_idx; + cnt++; + } + + cmd->rule_cnt = cnt; + + return 0; +} + static int igb_get_rss_hash_opts(struct igb_adapter *adapter, struct ethtool_rxnfc *cmd) { @@ -2484,6 +2541,16 @@ static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, cmd->data = adapter->num_rx_queues; ret = 0; break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = adapter->nfc_filter_count; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = igb_get_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = igb_get_ethtool_nfc_all(adapter, cmd, rule_locs); + break; case ETHTOOL_GRXFH: ret = igb_get_rss_hash_opts(adapter, cmd); break; @@ -2598,6 +2665,279 @@ static int igb_set_rss_hash_opt(struct igb_adapter *adapter, return 0; } +static int igb_rxnfc_write_etype_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) +{ + struct e1000_hw *hw = &adapter->hw; + u8 i; + u32 etqf; + u16 etype; + + /* find an empty etype filter register */ + for (i = 0; i < MAX_ETYPE_FILTER; ++i) { + if (!adapter->etype_bitmap[i]) + break; + } + if (i == MAX_ETYPE_FILTER) { + dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n"); + return -EINVAL; + } + + adapter->etype_bitmap[i] = true; + + etqf = rd32(E1000_ETQF(i)); + etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK); + + etqf |= E1000_ETQF_FILTER_ENABLE; + etqf &= ~E1000_ETQF_ETYPE_MASK; + etqf |= (etype & E1000_ETQF_ETYPE_MASK); + + etqf &= ~E1000_ETQF_QUEUE_MASK; + etqf |= ((input->action << E1000_ETQF_QUEUE_SHIFT) + & E1000_ETQF_QUEUE_MASK); + etqf |= E1000_ETQF_QUEUE_ENABLE; + + wr32(E1000_ETQF(i), etqf); + + input->etype_reg_index = i; + + return 0; +} + +static int igb_rxnfc_write_vlan_prio_filter(struct igb_adapter *adapter, + struct igb_nfc_filter *input) +{ + struct e1000_hw *hw = &adapter->hw; + u8 vlan_priority; + u16 queue_index; + u32 vlapqf; + + vlapqf = rd32(E1000_VLAPQF); + vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) + >> VLAN_PRIO_SHIFT; + queue_index = (vlapqf >> (vlan_priority * 4)) & E1000_VLAPQF_QUEUE_MASK; + + /* check whether this vlan prio is already set */ + if ((vlapqf & E1000_VLAPQF_P_VALID(vlan_priority)) && + (queue_index != input->action)) { + dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n"); + return -EEXIST; + } + + vlapqf |= E1000_VLAPQF_P_VALID(vlan_priority); + vlapqf |= E1000_VLAPQF_QUEUE_SEL(vlan_priority, input->action); + + wr32(E1000_VLAPQF, vlapqf); + + return 0; +} + +int igb_add_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) +{ + int err = -EINVAL; + + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) { + err = igb_rxnfc_write_etype_filter(adapter, input); + if (err) + return err; + } + + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) + err = igb_rxnfc_write_vlan_prio_filter(adapter, input); + + return err; +} + +static void igb_clear_etype_filter_regs(struct igb_adapter *adapter, + u16 reg_index) +{ + struct e1000_hw *hw = &adapter->hw; + u32 etqf = rd32(E1000_ETQF(reg_index)); + + etqf &= ~E1000_ETQF_QUEUE_ENABLE; + etqf &= ~E1000_ETQF_QUEUE_MASK; + etqf &= ~E1000_ETQF_FILTER_ENABLE; + + wr32(E1000_ETQF(reg_index), etqf); + + adapter->etype_bitmap[reg_index] = false; +} + +static void igb_clear_vlan_prio_filter(struct igb_adapter *adapter, + u16 vlan_tci) +{ + struct e1000_hw *hw = &adapter->hw; + u8 vlan_priority; + u32 vlapqf; + + vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + + vlapqf = rd32(E1000_VLAPQF); + vlapqf &= ~E1000_VLAPQF_P_VALID(vlan_priority); + vlapqf &= ~E1000_VLAPQF_QUEUE_SEL(vlan_priority, + E1000_VLAPQF_QUEUE_MASK); + + wr32(E1000_VLAPQF, vlapqf); +} + +int igb_erase_filter(struct igb_adapter *adapter, struct igb_nfc_filter *input) +{ + if (input->filter.match_flags & IGB_FILTER_FLAG_ETHER_TYPE) + igb_clear_etype_filter_regs(adapter, + input->etype_reg_index); + + if (input->filter.match_flags & IGB_FILTER_FLAG_VLAN_TCI) + igb_clear_vlan_prio_filter(adapter, + ntohs(input->filter.vlan_tci)); + + return 0; +} + +static int igb_update_ethtool_nfc_entry(struct igb_adapter *adapter, + struct igb_nfc_filter *input, + u16 sw_idx) +{ + struct igb_nfc_filter *rule, *parent; + int err = -EINVAL; + + parent = NULL; + rule = NULL; + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + /* hash found, or no matching entry */ + if (rule->sw_idx >= sw_idx) + break; + parent = rule; + } + + /* if there is an old rule occupying our place remove it */ + if (rule && (rule->sw_idx == sw_idx)) { + if (!input) + err = igb_erase_filter(adapter, rule); + + hlist_del(&rule->nfc_node); + kfree(rule); + adapter->nfc_filter_count--; + } + + /* If no input this was a delete, err should be 0 if a rule was + * successfully found and removed from the list else -EINVAL + */ + if (!input) + return err; + + /* initialize node */ + INIT_HLIST_NODE(&input->nfc_node); + + /* add filter to the list */ + if (parent) + hlist_add_behind(&parent->nfc_node, &input->nfc_node); + else + hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list); + + /* update counts */ + adapter->nfc_filter_count++; + + return 0; +} + +static int igb_add_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct net_device *netdev = adapter->netdev; + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct igb_nfc_filter *input, *rule; + int err = 0; + + if (!(netdev->hw_features & NETIF_F_NTUPLE)) + return -EOPNOTSUPP; + + /* Don't allow programming if the action is a queue greater than + * the number of online Rx queues. + */ + if ((fsp->ring_cookie == RX_CLS_FLOW_DISC) || + (fsp->ring_cookie >= adapter->num_rx_queues)) { + dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n"); + return -EINVAL; + } + + /* Don't allow indexes to exist outside of available space */ + if (fsp->location >= IGB_MAX_RXNFC_FILTERS) { + dev_err(&adapter->pdev->dev, "Location out of range\n"); + return -EINVAL; + } + + if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) + return -EINVAL; + + if (fsp->m_u.ether_spec.h_proto != ETHER_TYPE_FULL_MASK && + fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) + return -EINVAL; + + input = kzalloc(sizeof(*input), GFP_KERNEL); + if (!input) + return -ENOMEM; + + if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) { + input->filter.etype = fsp->h_u.ether_spec.h_proto; + input->filter.match_flags = IGB_FILTER_FLAG_ETHER_TYPE; + } + + if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) { + if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) { + err = -EINVAL; + goto err_out; + } + input->filter.vlan_tci = fsp->h_ext.vlan_tci; + input->filter.match_flags |= IGB_FILTER_FLAG_VLAN_TCI; + } + + input->action = fsp->ring_cookie; + input->sw_idx = fsp->location; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) { + if (!memcmp(&input->filter, &rule->filter, + sizeof(input->filter))) { + err = -EEXIST; + dev_err(&adapter->pdev->dev, + "ethtool: this filter is already set\n"); + goto err_out_w_lock; + } + } + + err = igb_add_filter(adapter, input); + if (err) + goto err_out_w_lock; + + igb_update_ethtool_nfc_entry(adapter, input, input->sw_idx); + + spin_unlock(&adapter->nfc_lock); + return 0; + +err_out_w_lock: + spin_unlock(&adapter->nfc_lock); +err_out: + kfree(input); + return err; +} + +static int igb_del_ethtool_nfc_entry(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + int err; + + spin_lock(&adapter->nfc_lock); + err = igb_update_ethtool_nfc_entry(adapter, NULL, fsp->location); + spin_unlock(&adapter->nfc_lock); + + return err; +} + static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) { struct igb_adapter *adapter = netdev_priv(dev); @@ -2607,6 +2947,11 @@ static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) case ETHTOOL_SRXFH: ret = igb_set_rss_hash_opt(adapter, cmd); break; + case ETHTOOL_SRXCLSRLINS: + ret = igb_add_ethtool_nfc_entry(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = igb_del_ethtool_nfc_entry(adapter, cmd); default: break; } diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 942a89fb0090..edc9a6ac5169 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -58,7 +58,7 @@ #include "igb.h" #define MAJ 5 -#define MIN 3 +#define MIN 4 #define BUILD 0 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ __stringify(BUILD) "-k" @@ -169,13 +169,15 @@ static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); static void igb_restore_vf_multicasts(struct igb_adapter *adapter); static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac); static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos); + int vf, u16 vlan, u8 qos, __be16 vlan_proto); static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, struct ifla_vf_info *ivi); static void igb_check_vf_rate_limit(struct igb_adapter *); +static void igb_nfc_filter_exit(struct igb_adapter *adapter); +static void igb_nfc_filter_restore(struct igb_adapter *adapter); #ifdef CONFIG_PCI_IOV static int igb_vf_configure(struct igb_adapter *adapter, int vf); @@ -1611,6 +1613,7 @@ static void igb_configure(struct igb_adapter *adapter) igb_setup_mrqc(adapter); igb_setup_rctl(adapter); + igb_nfc_filter_restore(adapter); igb_configure_tx(adapter); igb_configure_rx(adapter); @@ -2059,6 +2062,21 @@ static int igb_set_features(struct net_device *netdev, if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE))) return 0; + if (!(features & NETIF_F_NTUPLE)) { + struct hlist_node *node2; + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + hlist_for_each_entry_safe(rule, node2, + &adapter->nfc_filter_list, nfc_node) { + igb_erase_filter(adapter, rule); + hlist_del(&rule->nfc_node); + kfree(rule); + } + spin_unlock(&adapter->nfc_lock); + adapter->nfc_filter_count = 0; + } + netdev->features = features; if (netif_running(netdev)) @@ -3053,6 +3071,7 @@ static int igb_sw_init(struct igb_adapter *adapter) VLAN_HLEN; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; + spin_lock_init(&adapter->nfc_lock); spin_lock_init(&adapter->stats64_lock); #ifdef CONFIG_PCI_IOV switch (hw->mac.type) { @@ -3240,6 +3259,8 @@ static int __igb_close(struct net_device *netdev, bool suspending) igb_down(adapter); igb_free_irq(adapter); + igb_nfc_filter_exit(adapter); + igb_free_all_tx_resources(adapter); igb_free_all_rx_resources(adapter); @@ -6201,14 +6222,17 @@ static int igb_disable_port_vlan(struct igb_adapter *adapter, int vf) return 0; } -static int igb_ndo_set_vf_vlan(struct net_device *netdev, - int vf, u16 vlan, u8 qos) +static int igb_ndo_set_vf_vlan(struct net_device *netdev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) { struct igb_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return (vlan || qos) ? igb_enable_port_vlan(adapter, vf, vlan, qos) : igb_disable_port_vlan(adapter, vf); } @@ -8306,4 +8330,28 @@ int igb_reinit_queues(struct igb_adapter *adapter) return err; } + +static void igb_nfc_filter_exit(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_erase_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} + +static void igb_nfc_filter_restore(struct igb_adapter *adapter) +{ + struct igb_nfc_filter *rule; + + spin_lock(&adapter->nfc_lock); + + hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) + igb_add_filter(adapter, rule); + + spin_unlock(&adapter->nfc_lock); +} /* igb_main.c */ diff --git a/drivers/net/ethernet/intel/igb/igb_ptp.c b/drivers/net/ethernet/intel/igb/igb_ptp.c index 336c103ae374..a7895c4cbcc3 100644 --- a/drivers/net/ethernet/intel/igb/igb_ptp.c +++ b/drivers/net/ethernet/intel/igb/igb_ptp.c @@ -591,6 +591,7 @@ static int igb_ptp_feature_enable_i210(struct ptp_clock_info *ptp, tsim |= TSINTR_SYS_WRAP; else tsim &= ~TSINTR_SYS_WRAP; + igb->pps_sys_wrap_on = !!on; wr32(E1000_TSIM, tsim); spin_unlock_irqrestore(&igb->tmreg_lock, flags); return 0; @@ -998,12 +999,12 @@ static int igb_ptp_set_timestamp_mode(struct igb_adapter *adapter, /* define ethertype filter for timestamped packets */ if (is_l2) - wr32(E1000_ETQF(3), + wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), (E1000_ETQF_FILTER_ENABLE | /* enable filter */ E1000_ETQF_1588 | /* enable timestamping */ ETH_P_1588)); /* 1588 eth protocol type */ else - wr32(E1000_ETQF(3), 0); + wr32(E1000_ETQF(IGB_ETQF_FILTER_1588), 0); /* L4 Queue Filter[3]: filter by destination port and protocol */ if (is_l4) { @@ -1159,7 +1160,7 @@ void igb_ptp_init(struct igb_adapter *adapter) if (IS_ERR(adapter->ptp_clock)) { adapter->ptp_clock = NULL; dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); - } else { + } else if (adapter->ptp_clock) { dev_info(&adapter->pdev->dev, "added PHC on %s\n", adapter->netdev->name); adapter->ptp_flags |= IGB_PTP_ENABLED; @@ -1235,7 +1236,9 @@ void igb_ptp_reset(struct igb_adapter *adapter) case e1000_i211: wr32(E1000_TSAUXC, 0x0); wr32(E1000_TSSDP, 0x0); - wr32(E1000_TSIM, TSYNC_INTERRUPTS); + wr32(E1000_TSIM, + TSYNC_INTERRUPTS | + (adapter->pps_sys_wrap_on ? TSINTR_SYS_WRAP : 0)); wr32(E1000_IMS, E1000_IMS_TS); break; default: diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index b0778ba65083..12bb877df860 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -47,7 +47,7 @@ #include "igbvf.h" -#define DRV_VERSION "2.0.2-k" +#define DRV_VERSION "2.4.0-k" char igbvf_driver_name[] = "igbvf"; const char igbvf_driver_version[] = DRV_VERSION; static const char igbvf_driver_string[] = diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index 9475ff9055aa..b06e32d0d22a 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -45,10 +45,10 @@ #include "ixgbe_type.h" #include "ixgbe_common.h" #include "ixgbe_dcb.h" -#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#if IS_ENABLED(CONFIG_FCOE) #define IXGBE_FCOE #include "ixgbe_fcoe.h" -#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ +#endif /* IS_ENABLED(CONFIG_FCOE) */ #ifdef CONFIG_IXGBE_DCA #include #endif @@ -645,6 +645,7 @@ struct ixgbe_adapter { #define IXGBE_FLAG_RX_HWTSTAMP_ENABLED BIT(25) #define IXGBE_FLAG_RX_HWTSTAMP_IN_REGISTER BIT(26) #define IXGBE_FLAG_DCB_CAPABLE BIT(27) +#define IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE BIT(28) u32 flags2; #define IXGBE_FLAG2_RSC_CAPABLE BIT(0) @@ -653,13 +654,12 @@ struct ixgbe_adapter { #define IXGBE_FLAG2_TEMP_SENSOR_EVENT BIT(3) #define IXGBE_FLAG2_SEARCH_FOR_SFP BIT(4) #define IXGBE_FLAG2_SFP_NEEDS_RESET BIT(5) -#define IXGBE_FLAG2_RESET_REQUESTED BIT(6) #define IXGBE_FLAG2_FDIR_REQUIRES_REINIT BIT(7) #define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP BIT(8) #define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP BIT(9) #define IXGBE_FLAG2_PTP_PPS_ENABLED BIT(10) #define IXGBE_FLAG2_PHY_INTERRUPT BIT(11) -#define IXGBE_FLAG2_VXLAN_REREG_NEEDED BIT(12) +#define IXGBE_FLAG2_UDP_TUN_REREG_NEEDED BIT(12) #define IXGBE_FLAG2_VLAN_PROMISC BIT(13) /* Tx fast path data */ @@ -673,6 +673,7 @@ struct ixgbe_adapter { /* Port number used to identify VXLAN traffic */ __be16 vxlan_port; + __be16 geneve_port; /* TX */ struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; @@ -840,6 +841,7 @@ enum ixgbe_state_t { __IXGBE_IN_SFP_INIT, __IXGBE_PTP_RUNNING, __IXGBE_PTP_TX_IN_PROGRESS, + __IXGBE_RESET_REQUESTED, }; struct ixgbe_cb { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c index c47b605e8651..77d3039283f6 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c @@ -99,6 +99,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550T: case IXGBE_DEV_ID_X550T1: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: supported = true; break; default: diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index 0d7209eb5abf..f49f80380aa5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -193,7 +193,9 @@ static int ixgbe_get_settings(struct net_device *netdev, if (supported_link & IXGBE_LINK_SPEED_10GB_FULL) ecmd->supported |= ixgbe_get_supported_10gtypes(hw); if (supported_link & IXGBE_LINK_SPEED_1GB_FULL) - ecmd->supported |= SUPPORTED_1000baseT_Full; + ecmd->supported |= (ixgbe_isbackplane(hw->phy.media_type)) ? + SUPPORTED_1000baseKX_Full : + SUPPORTED_1000baseT_Full; if (supported_link & IXGBE_LINK_SPEED_100_FULL) ecmd->supported |= ixgbe_isbackplane(hw->phy.media_type) ? SUPPORTED_1000baseKX_Full : @@ -311,6 +313,25 @@ static int ixgbe_get_settings(struct net_device *netdev, break; } + /* Indicate pause support */ + ecmd->supported |= SUPPORTED_Pause; + + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + ecmd->advertising |= ADVERTISED_Pause; + break; + case ixgbe_fc_rx_pause: + ecmd->advertising |= ADVERTISED_Pause | + ADVERTISED_Asym_Pause; + break; + case ixgbe_fc_tx_pause: + ecmd->advertising |= ADVERTISED_Asym_Pause; + break; + default: + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + } + if (netif_carrier_ok(netdev)) { switch (adapter->link_speed) { case IXGBE_LINK_SPEED_10GB_FULL: @@ -2926,9 +2947,13 @@ static u32 ixgbe_rss_indir_size(struct net_device *netdev) static void ixgbe_get_reta(struct ixgbe_adapter *adapter, u32 *indir) { int i, reta_size = ixgbe_rss_indir_tbl_entries(adapter); + u16 rss_m = adapter->ring_feature[RING_F_RSS].mask; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + rss_m = adapter->ring_feature[RING_F_RSS].indices - 1; for (i = 0; i < reta_size; i++) - indir[i] = adapter->rss_indir_tbl[i]; + indir[i] = adapter->rss_indir_tbl[i] & rss_m; } static int ixgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, @@ -3039,8 +3064,8 @@ static unsigned int ixgbe_max_channels(struct ixgbe_adapter *adapter) /* We only support one q_vector without MSI-X */ max_combined = 1; } else if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { - /* SR-IOV currently only allows one queue on the PF */ - max_combined = 1; + /* Limit value based on the queue mask */ + max_combined = adapter->ring_feature[RING_F_RSS].mask + 1; } else if (tcs > 1) { /* For DCB report channels per traffic class */ if (adapter->hw.mac.type == ixgbe_mac_82598EB) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index bcdc88444ceb..15ab337fd7ad 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -515,15 +515,16 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter) vmdq_i = min_t(u16, IXGBE_MAX_VMDQ_INDICES, vmdq_i); /* 64 pool mode with 2 queues per pool */ - if ((vmdq_i > 32) || (rss_i < 4) || (vmdq_i > 16 && pools)) { + if ((vmdq_i > 32) || (vmdq_i > 16 && pools)) { vmdq_m = IXGBE_82599_VMDQ_2Q_MASK; rss_m = IXGBE_RSS_2Q_MASK; rss_i = min_t(u16, rss_i, 2); - /* 32 pool mode with 4 queues per pool */ + /* 32 pool mode with up to 4 queues per pool */ } else { vmdq_m = IXGBE_82599_VMDQ_4Q_MASK; rss_m = IXGBE_RSS_4Q_MASK; - rss_i = 4; + /* We can support 4, 2, or 1 queues */ + rss_i = (rss_i > 3) ? 4 : (rss_i > 1) ? 2 : 1; } #ifdef IXGBE_FCOE diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index b4f03748adc0..a244d9a67264 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -137,6 +137,7 @@ static const struct pci_device_id ixgbe_pci_tbl[] = { {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N), board_x550em_a }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII), board_x550em_a }, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SGMII_L), board_x550em_a }, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_10G_T), board_x550em_a}, {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X550EM_A_SFP), board_x550em_a }, /* required last entry */ {0, } @@ -1103,7 +1104,7 @@ static void ixgbe_tx_timeout_reset(struct ixgbe_adapter *adapter) /* Do the reset outside of interrupt context */ if (!test_bit(__IXGBE_DOWN, &adapter->state)) { - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); e_warn(drv, "initiating reset due to tx timeout\n"); ixgbe_service_event_schedule(adapter); } @@ -1495,7 +1496,6 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, struct sk_buff *skb) { __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; - __le16 hdr_info = rx_desc->wb.lower.lo_dword.hs_rss.hdr_info; bool encap_pkt = false; skb_checksum_none_assert(skb); @@ -1504,8 +1504,8 @@ static inline void ixgbe_rx_checksum(struct ixgbe_ring *ring, if (!(ring->netdev->features & NETIF_F_RXCSUM)) return; - if ((pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) && - (hdr_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_TUNNEL >> 16))) { + /* check for VXLAN and Geneve packets */ + if (pkt_info & cpu_to_le16(IXGBE_RXDADV_PKTTYPE_VXLAN)) { encap_pkt = true; skb->encapsulation = 1; } @@ -2777,7 +2777,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data) } if (eicr & IXGBE_EICR_ECC) { e_info(link, "Received ECC Err, initiating reset\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); ixgbe_service_event_schedule(adapter); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } @@ -3007,7 +3007,7 @@ static irqreturn_t ixgbe_intr(int irq, void *data) case ixgbe_mac_x550em_a: if (eicr & IXGBE_EICR_ECC) { e_info(link, "Received ECC Err, initiating reset\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); ixgbe_service_event_schedule(adapter); IXGBE_WRITE_REG(hw, IXGBE_EICR, IXGBE_EICR_ECC); } @@ -3224,7 +3224,7 @@ void ixgbe_configure_tx_ring(struct ixgbe_adapter *adapter, txdctl = IXGBE_READ_REG(hw, IXGBE_TXDCTL(reg_idx)); } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); if (!wait_loop) - e_err(drv, "Could not enable Tx Queue %d\n", reg_idx); + hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx); } static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) @@ -3248,7 +3248,8 @@ static void ixgbe_setup_mtqc(struct ixgbe_adapter *adapter) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_8TC_8TQ; else if (tcs > 1) mtqc |= IXGBE_MTQC_RT_ENA | IXGBE_MTQC_4TC_4TQ; - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mtqc |= IXGBE_MTQC_32VF; else mtqc |= IXGBE_MTQC_64VF; @@ -3475,12 +3476,12 @@ static void ixgbe_setup_reta(struct ixgbe_adapter *adapter) u32 reta_entries = ixgbe_rss_indir_tbl_entries(adapter); u16 rss_i = adapter->ring_feature[RING_F_RSS].indices; - /* Program table for at least 2 queues w/ SR-IOV so that VFs can + /* Program table for at least 4 queues w/ SR-IOV so that VFs can * make full use of any rings they may have. We will use the * PSRTYPE register to control how many rings we use within the PF. */ - if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 2)) - rss_i = 2; + if ((adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) && (rss_i < 4)) + rss_i = 4; /* Fill out hash function seeds */ for (i = 0; i < 10; i++) @@ -3544,7 +3545,8 @@ static void ixgbe_setup_mrqc(struct ixgbe_adapter *adapter) mrqc = IXGBE_MRQC_VMDQRT8TCEN; /* 8 TCs */ else if (tcs > 1) mrqc = IXGBE_MRQC_VMDQRT4TCEN; /* 4 TCs */ - else if (adapter->ring_feature[RING_F_RSS].indices == 4) + else if (adapter->ring_feature[RING_F_VMDQ].mask == + IXGBE_82599_VMDQ_4Q_MASK) mrqc = IXGBE_MRQC_VMDQRSS32EN; else mrqc = IXGBE_MRQC_VMDQRSS64EN; @@ -3922,6 +3924,9 @@ static void ixgbe_configure_rx(struct ixgbe_adapter *adapter) rfctl &= ~IXGBE_RFCTL_RSC_DIS; if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) rfctl |= IXGBE_RFCTL_RSC_DIS; + + /* disable NFS filtering */ + rfctl |= (IXGBE_RFCTL_NFSW_DIS | IXGBE_RFCTL_NFSR_DIS); IXGBE_WRITE_REG(hw, IXGBE_RFCTL, rfctl); /* Program registers for the distribution of queues */ @@ -4102,23 +4107,20 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: - /* legacy case, we can just disable VLAN filtering */ + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ + vlnctrl |= IXGBE_VLNCTRL_VFE; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + } else { vlnctrl &= ~IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); return; } + /* Nothing to do for 82598 */ + if (hw->mac.type == ixgbe_mac_82598EB) + return; + /* We are already in VLAN promisc, nothing to do */ if (adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC) return; @@ -4126,10 +4128,6 @@ static void ixgbe_vlan_promisc_enable(struct ixgbe_adapter *adapter) /* Set flag so we don't redo unnecessary work */ adapter->flags2 |= IXGBE_FLAG2_VLAN_PROMISC; - /* For VMDq and SR-IOV we must leave VLAN filtering enabled */ - vlnctrl |= IXGBE_VLNCTRL_VFE; - IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - /* Add PF to all active pools */ for (i = IXGBE_VLVF_ENTRIES; --i;) { u32 reg_offset = IXGBE_VLVFB(i * 2 + VMDQ_P(0) / 32); @@ -4201,19 +4199,9 @@ static void ixgbe_vlan_promisc_disable(struct ixgbe_adapter *adapter) vlnctrl |= IXGBE_VLNCTRL_VFE; IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); - switch (hw->mac.type) { - case ixgbe_mac_82599EB: - case ixgbe_mac_X540: - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - default: - if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) - break; - /* fall through */ - case ixgbe_mac_82598EB: + if (!(adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) || + hw->mac.type == ixgbe_mac_82598EB) return; - } /* We are not in VLAN promisc, nothing to do */ if (!(adapter->flags2 & IXGBE_FLAG2_VLAN_PROMISC)) @@ -4586,18 +4574,23 @@ static void ixgbe_napi_disable_all(struct ixgbe_adapter *adapter) } } -static void ixgbe_clear_vxlan_port(struct ixgbe_adapter *adapter) +static void ixgbe_clear_udp_tunnel_port(struct ixgbe_adapter *adapter, u32 mask) { - switch (adapter->hw.mac.type) { - case ixgbe_mac_X550: - case ixgbe_mac_X550EM_x: - case ixgbe_mac_x550em_a: - IXGBE_WRITE_REG(&adapter->hw, IXGBE_VXLANCTRL, 0); + struct ixgbe_hw *hw = &adapter->hw; + u32 vxlanctrl; + + if (!(adapter->flags & (IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE | + IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE))) + return; + + vxlanctrl = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) && ~mask; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, vxlanctrl); + + if (mask & IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK) adapter->vxlan_port = 0; - break; - default: - break; - } + + if (mask & IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK) + adapter->geneve_port = 0; } #ifdef CONFIG_IXGBE_DCB @@ -5500,8 +5493,8 @@ void ixgbe_down(struct ixgbe_adapter *adapter) ixgbe_napi_disable_all(adapter); - adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | - IXGBE_FLAG2_RESET_REQUESTED); + clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state); + adapter->flags2 &= ~IXGBE_FLAG2_FDIR_REQUIRES_REINIT; adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; del_timer_sync(&adapter->service_timer); @@ -5711,8 +5704,10 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter) if (fwsm & IXGBE_FWSM_TS_ENABLED) adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; break; - case ixgbe_mac_X550EM_x: case ixgbe_mac_x550em_a: + adapter->flags |= IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE; + /* fall through */ + case ixgbe_mac_X550EM_x: #ifdef CONFIG_IXGBE_DCB adapter->flags &= ~IXGBE_FLAG_DCB_CAPABLE; #endif @@ -6144,7 +6139,7 @@ int ixgbe_open(struct net_device *netdev) ixgbe_up_complete(adapter); - ixgbe_clear_vxlan_port(adapter); + ixgbe_clear_udp_tunnel_port(adapter, IXGBE_VXLANCTRL_ALL_UDPPORT_MASK); udp_tunnel_get_rx_info(netdev); return 0; @@ -6921,7 +6916,7 @@ static void ixgbe_watchdog_flush_tx(struct ixgbe_adapter *adapter) * (Do the reset outside of interrupt context). */ e_warn(drv, "initiating reset to clear Tx work after link loss\n"); - adapter->flags2 |= IXGBE_FLAG2_RESET_REQUESTED; + set_bit(__IXGBE_RESET_REQUESTED, &adapter->state); } } } @@ -7187,11 +7182,9 @@ static void ixgbe_phy_interrupt_subtask(struct ixgbe_adapter *adapter) static void ixgbe_reset_subtask(struct ixgbe_adapter *adapter) { - if (!(adapter->flags2 & IXGBE_FLAG2_RESET_REQUESTED)) + if (!test_and_clear_bit(__IXGBE_RESET_REQUESTED, &adapter->state)) return; - adapter->flags2 &= ~IXGBE_FLAG2_RESET_REQUESTED; - /* If we're already down, removing or resetting, just bail */ if (test_bit(__IXGBE_DOWN, &adapter->state) || test_bit(__IXGBE_REMOVING, &adapter->state) || @@ -7225,9 +7218,9 @@ static void ixgbe_service_task(struct work_struct *work) ixgbe_service_event_complete(adapter); return; } - if (adapter->flags2 & IXGBE_FLAG2_VXLAN_REREG_NEEDED) { + if (adapter->flags2 & IXGBE_FLAG2_UDP_TUN_REREG_NEEDED) { rtnl_lock(); - adapter->flags2 &= ~IXGBE_FLAG2_VXLAN_REREG_NEEDED; + adapter->flags2 &= ~IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; udp_tunnel_get_rx_info(adapter->netdev); rtnl_unlock(); } @@ -7667,6 +7660,10 @@ static void ixgbe_atr(struct ixgbe_ring *ring, if (adapter->vxlan_port && udp_hdr(skb)->dest == adapter->vxlan_port) hdr.network = skb_inner_network_header(skb); + + if (adapter->geneve_port && + udp_hdr(skb)->dest == adapter->geneve_port) + hdr.network = skb_inner_network_header(skb); } /* Currently only IPv4/IPv6 with TCP is supported */ @@ -8802,10 +8799,23 @@ static int ixgbe_set_features(struct net_device *netdev, netdev->features = features; if ((adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) { - if (features & NETIF_F_RXCSUM) - adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED; - else - ixgbe_clear_vxlan_port(adapter); + if (features & NETIF_F_RXCSUM) { + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; + } else { + u32 port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; + + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + } + } + + if ((adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) { + if (features & NETIF_F_RXCSUM) { + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; + } else { + u32 port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; + + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + } } if (need_reset) @@ -8818,67 +8828,115 @@ static int ixgbe_set_features(struct net_device *netdev, } /** - * ixgbe_add_vxlan_port - Get notifications about VXLAN ports that come up + * ixgbe_add_udp_tunnel_port - Get notifications about adding UDP tunnel ports * @dev: The port's netdev * @ti: Tunnel endpoint information **/ -static void ixgbe_add_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) +static void ixgbe_add_udp_tunnel_port(struct net_device *dev, + struct udp_tunnel_info *ti) { struct ixgbe_adapter *adapter = netdev_priv(dev); struct ixgbe_hw *hw = &adapter->hw; __be16 port = ti->port; - - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) - return; + u32 port_shift = 0; + u32 reg; if (ti->sa_family != AF_INET) return; - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) + return; - if (adapter->vxlan_port == port) - return; + if (adapter->vxlan_port == port) + return; - if (adapter->vxlan_port) { - netdev_info(dev, - "Hit Max num of VXLAN ports, not adding port %d\n", - ntohs(port)); + if (adapter->vxlan_port) { + netdev_info(dev, + "VXLAN port %d set, not adding port %d\n", + ntohs(adapter->vxlan_port), + ntohs(port)); + return; + } + + adapter->vxlan_port = port; + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) + return; + + if (adapter->geneve_port == port) + return; + + if (adapter->geneve_port) { + netdev_info(dev, + "GENEVE port %d set, not adding port %d\n", + ntohs(adapter->geneve_port), + ntohs(port)); + return; + } + + port_shift = IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT; + adapter->geneve_port = port; + break; + default: return; } - adapter->vxlan_port = port; - IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, ntohs(port)); + reg = IXGBE_READ_REG(hw, IXGBE_VXLANCTRL) | ntohs(port) << port_shift; + IXGBE_WRITE_REG(hw, IXGBE_VXLANCTRL, reg); } /** - * ixgbe_del_vxlan_port - Get notifications about VXLAN ports that go away + * ixgbe_del_udp_tunnel_port - Get notifications about removing UDP tunnel ports * @dev: The port's netdev * @ti: Tunnel endpoint information **/ -static void ixgbe_del_vxlan_port(struct net_device *dev, - struct udp_tunnel_info *ti) +static void ixgbe_del_udp_tunnel_port(struct net_device *dev, + struct udp_tunnel_info *ti) { struct ixgbe_adapter *adapter = netdev_priv(dev); + u32 port_mask; - if (ti->type != UDP_TUNNEL_TYPE_VXLAN) + if (ti->type != UDP_TUNNEL_TYPE_VXLAN && + ti->type != UDP_TUNNEL_TYPE_GENEVE) return; if (ti->sa_family != AF_INET) return; - if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) - return; + switch (ti->type) { + case UDP_TUNNEL_TYPE_VXLAN: + if (!(adapter->flags & IXGBE_FLAG_VXLAN_OFFLOAD_CAPABLE)) + return; - if (adapter->vxlan_port != ti->port) { - netdev_info(dev, "Port %d was not found, not deleting\n", - ntohs(ti->port)); + if (adapter->vxlan_port != ti->port) { + netdev_info(dev, "VXLAN port %d not found\n", + ntohs(ti->port)); + return; + } + + port_mask = IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK; + break; + case UDP_TUNNEL_TYPE_GENEVE: + if (!(adapter->flags & IXGBE_FLAG_GENEVE_OFFLOAD_CAPABLE)) + return; + + if (adapter->geneve_port != ti->port) { + netdev_info(dev, "GENEVE port %d not found\n", + ntohs(ti->port)); + return; + } + + port_mask = IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK; + break; + default: return; } - ixgbe_clear_vxlan_port(adapter); - adapter->flags2 |= IXGBE_FLAG2_VXLAN_REREG_NEEDED; + ixgbe_clear_udp_tunnel_port(adapter, port_mask); + adapter->flags2 |= IXGBE_FLAG2_UDP_TUN_REREG_NEEDED; } static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], @@ -9192,8 +9250,8 @@ static const struct net_device_ops ixgbe_netdev_ops = { .ndo_bridge_getlink = ixgbe_ndo_bridge_getlink, .ndo_dfwd_add_station = ixgbe_fwd_add, .ndo_dfwd_del_station = ixgbe_fwd_del, - .ndo_udp_tunnel_add = ixgbe_add_vxlan_port, - .ndo_udp_tunnel_del = ixgbe_del_vxlan_port, + .ndo_udp_tunnel_add = ixgbe_add_udp_tunnel_port, + .ndo_udp_tunnel_del = ixgbe_del_udp_tunnel_port, .ndo_features_check = ixgbe_features_check, }; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c index db0731e05401..021ab9b89c71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_phy.c @@ -346,8 +346,8 @@ s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) return 0; } } - /* clear value if nothing found */ - hw->phy.mdio.prtad = 0; + /* indicate no PHY found */ + hw->phy.mdio.prtad = MDIO_PRTAD_NONE; return IXGBE_ERR_PHY_ADDR_INVALID; } return 0; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c index e5431bfe3339..a92277683a64 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c @@ -1254,7 +1254,7 @@ static long ixgbe_ptp_create_clock(struct ixgbe_adapter *adapter) adapter->ptp_clock = NULL; e_dev_err("ptp_clock_register failed\n"); return err; - } else + } else if (adapter->ptp_clock) e_dev_info("registered PHC device on %s\n", netdev->name); /* set default timestamp mode to disabled here. We do this in diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c index 8618599dfd6f..7e5d9850e4b2 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c @@ -329,13 +329,15 @@ static int ixgbe_pci_sriov_enable(struct pci_dev *dev, int num_vfs) for (i = 0; i < adapter->num_vfs; i++) ixgbe_vf_configuration(dev, (i | 0x10000000)); + /* reset before enabling SRIOV to avoid mailbox issues */ + ixgbe_sriov_reinit(adapter); + err = pci_enable_sriov(dev, num_vfs); if (err) { e_dev_warn("Failed to enable PCI sriov: %d\n", err); return err; } ixgbe_get_vfs(adapter); - ixgbe_sriov_reinit(adapter); return num_vfs; #else @@ -1354,13 +1356,16 @@ static int ixgbe_disable_port_vlan(struct ixgbe_adapter *adapter, int vf) return err; } -int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos) +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, + u8 qos, __be16 vlan_proto) { int err = 0; struct ixgbe_adapter *adapter = netdev_priv(netdev); if ((vf >= adapter->num_vfs) || (vlan > 4095) || (qos > 7)) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; if (vlan || qos) { /* Check if there is already a port VLAN set, if so * we have to delete the old one first before we diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h index 47e65e2f886a..0c7977d27b71 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.h @@ -43,7 +43,7 @@ void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int ixgbe_link_mbps(struct ixgbe_adapter *adapter); int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, int max_tx_rate); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h index 1248a9936f7a..31d82e3abac8 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_type.h @@ -90,6 +90,7 @@ #define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 #define IXGBE_DEV_ID_X550EM_A_SGMII 0x15C6 #define IXGBE_DEV_ID_X550EM_A_SGMII_L 0x15C7 +#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 #define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE /* VF Device IDs */ @@ -487,6 +488,13 @@ struct ixgbe_thermal_sensor_data { #define IXGBE_FHFT_EXT(_n) (0x09800 + ((_n) * 0x100)) /* Ext Flexible Host * Filter Table */ +/* masks for accessing VXLAN and GENEVE UDP ports */ +#define IXGBE_VXLANCTRL_VXLAN_UDPPORT_MASK 0x0000ffff /* VXLAN port */ +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_MASK 0xffff0000 /* GENEVE port */ +#define IXGBE_VXLANCTRL_ALL_UDPPORT_MASK 0xffffffff /* GENEVE/VXLAN */ + +#define IXGBE_VXLANCTRL_GENEVE_UDPPORT_SHIFT 16 + #define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 #define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 @@ -1823,6 +1831,9 @@ enum { #define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) #define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) #define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) +#define IXGBE_X557_LED_MANUAL_SET_MASK BIT(8) +#define IXGBE_X557_MAX_LED_INDEX 3 +#define IXGBE_X557_LED_PROVISIONING 0xC430 /* LED modes */ #define IXGBE_LED_LINK_UP 0x0 diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c index 4716ca499e67..7e6b9267ca9d 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x550.c @@ -295,6 +295,12 @@ static s32 ixgbe_identify_phy_x550em(struct ixgbe_hw *hw) case IXGBE_DEV_ID_X550EM_A_KR_L: hw->phy.type = ixgbe_phy_x550em_kr; break; + case IXGBE_DEV_ID_X550EM_A_10G_T: + if (hw->bus.lan_id) + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY1_SM; + else + hw->phy.phy_semaphore_mask = IXGBE_GSSR_PHY0_SM; + /* Fallthrough */ case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: return ixgbe_identify_phy_generic(hw); @@ -1453,7 +1459,7 @@ ixgbe_setup_mac_link_sfp_x550a(struct ixgbe_hw *hw, ixgbe_link_speed speed, /* Configure internal PHY for KR/KX. */ ixgbe_setup_kr_speed_x550em(hw, speed); - if (!hw->phy.mdio.prtad || hw->phy.mdio.prtad == 0xFFFF) + if (hw->phy.mdio.prtad == MDIO_PRTAD_NONE) return IXGBE_ERR_PHY_ADDR_INVALID; /* Get external PHY device id */ @@ -2114,6 +2120,50 @@ static s32 ixgbe_reset_phy_t_X550em(struct ixgbe_hw *hw) return ixgbe_enable_lasi_ext_t_x550em(hw); } +/** + * ixgbe_led_on_t_x550em - Turns on the software controllable LEDs. + * @hw: pointer to hardware structure + * @led_idx: led number to turn on + **/ +static s32 ixgbe_led_on_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +{ + u16 phy_data; + + if (led_idx >= IXGBE_X557_MAX_LED_INDEX) + return IXGBE_ERR_PARAM; + + /* To turn on the LED, set mode to ON. */ + hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + phy_data |= IXGBE_X557_LED_MANUAL_SET_MASK; + hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + + return 0; +} + +/** + * ixgbe_led_off_t_x550em - Turns off the software controllable LEDs. + * @hw: pointer to hardware structure + * @led_idx: led number to turn off + **/ +static s32 ixgbe_led_off_t_x550em(struct ixgbe_hw *hw, u32 led_idx) +{ + u16 phy_data; + + if (led_idx >= IXGBE_X557_MAX_LED_INDEX) + return IXGBE_ERR_PARAM; + + /* To turn on the LED, set mode to ON. */ + hw->phy.ops.read_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, &phy_data); + phy_data &= ~IXGBE_X557_LED_MANUAL_SET_MASK; + hw->phy.ops.write_reg(hw, IXGBE_X557_LED_PROVISIONING + led_idx, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, phy_data); + + return 0; +} + /** ixgbe_get_lcd_x550em - Determine lowest common denominator * @hw: pointer to hardware structure * @lcd_speed: pointer to lowest common link speed @@ -2344,18 +2394,12 @@ static void ixgbe_read_mng_if_sel_x550em(struct ixgbe_hw *hw) /* If X552 (X550EM_a) and MDIO is connected to external PHY, then set * PHY address. This register field was has only been used for X552. */ - if (!hw->phy.nw_mng_if_sel) { - if (hw->mac.type == ixgbe_mac_x550em_a) { - struct ixgbe_adapter *adapter = hw->back; - - e_warn(drv, "nw_mng_if_sel not set\n"); - } - return; + if (hw->mac.type == ixgbe_mac_x550em_a && + hw->phy.nw_mng_if_sel & IXGBE_NW_MNG_IF_SEL_MDIO_ACT) { + hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> + IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } - - hw->phy.mdio.prtad = (hw->phy.nw_mng_if_sel & - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD) >> - IXGBE_NW_MNG_IF_SEL_MDIO_PHY_ADD_SHIFT; } /** ixgbe_init_phy_ops_X550em - PHY/SFP specific init @@ -2456,6 +2500,7 @@ static enum ixgbe_media_type ixgbe_get_media_type_X550em(struct ixgbe_hw *hw) break; case IXGBE_DEV_ID_X550EM_X_1G_T: case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_10G_T: media_type = ixgbe_media_type_copper; break; default: @@ -2514,6 +2559,9 @@ static void ixgbe_set_mdio_speed(struct ixgbe_hw *hw) switch (hw->device_id) { case IXGBE_DEV_ID_X550EM_X_10G_T: + case IXGBE_DEV_ID_X550EM_A_SGMII: + case IXGBE_DEV_ID_X550EM_A_SGMII_L: + case IXGBE_DEV_ID_X550EM_A_10G_T: case IXGBE_DEV_ID_X550EM_A_SFP: /* Config MDIO clock speed before the first MDIO PHY access */ hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); @@ -2853,8 +2901,6 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, .write_analog_reg8 = NULL, \ .set_rxpba = &ixgbe_set_rxpba_generic, \ .check_link = &ixgbe_check_mac_link_generic, \ - .led_on = &ixgbe_led_on_generic, \ - .led_off = &ixgbe_led_off_generic, \ .blink_led_start = &ixgbe_blink_led_start_X540, \ .blink_led_stop = &ixgbe_blink_led_stop_X540, \ .set_rar = &ixgbe_set_rar_generic, \ @@ -2886,6 +2932,8 @@ static s32 ixgbe_write_phy_reg_x550a(struct ixgbe_hw *hw, u32 reg_addr, static const struct ixgbe_mac_operations mac_ops_X550 = { X550_COMMON_MAC + .led_on = ixgbe_led_on_generic, + .led_off = ixgbe_led_off_generic, .reset_hw = &ixgbe_reset_hw_X540, .get_media_type = &ixgbe_get_media_type_X540, .get_san_mac_addr = &ixgbe_get_san_mac_addr_generic, @@ -2904,6 +2952,8 @@ static const struct ixgbe_mac_operations mac_ops_X550 = { static const struct ixgbe_mac_operations mac_ops_X550EM_x = { X550_COMMON_MAC + .led_on = ixgbe_led_on_t_x550em, + .led_off = ixgbe_led_off_t_x550em, .reset_hw = &ixgbe_reset_hw_X550em, .get_media_type = &ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2922,6 +2972,8 @@ static const struct ixgbe_mac_operations mac_ops_X550EM_x = { static struct ixgbe_mac_operations mac_ops_x550em_a = { X550_COMMON_MAC + .led_on = ixgbe_led_on_t_x550em, + .led_off = ixgbe_led_off_t_x550em, .reset_hw = ixgbe_reset_hw_X550em, .get_media_type = ixgbe_get_media_type_X550em, .get_san_mac_addr = NULL, @@ -2997,6 +3049,8 @@ static const struct ixgbe_phy_operations phy_ops_x550em_a = { .identify = &ixgbe_identify_phy_x550em, .read_reg = &ixgbe_read_phy_reg_x550a, .write_reg = &ixgbe_write_phy_reg_x550a, + .read_reg_mdi = &ixgbe_read_phy_reg_mdi, + .write_reg_mdi = &ixgbe_write_phy_reg_mdi, }; static const u32 ixgbe_mvals_X550[IXGBE_MVALS_IDX_LIMIT] = { diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index be52f597688b..5639fbe294d0 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -502,12 +502,9 @@ extern void ixgbevf_write_eitr(struct ixgbevf_q_vector *q_vector); void ixgbe_napi_add_all(struct ixgbevf_adapter *adapter); void ixgbe_napi_del_all(struct ixgbevf_adapter *adapter); -#ifdef DEBUG -char *ixgbevf_get_hw_dev_name(struct ixgbe_hw *hw); -#define hw_dbg(hw, format, arg...) \ - printk(KERN_DEBUG "%s: " format, ixgbevf_get_hw_dev_name(hw), ##arg) -#else -#define hw_dbg(hw, format, arg...) do {} while (0) -#endif +#define ixgbevf_hw_to_netdev(hw) \ + (((struct ixgbevf_adapter *)(hw)->back)->netdev) +#define hw_dbg(hw, format, arg...) \ + netdev_dbg(ixgbevf_hw_to_netdev(hw), format, ## arg) #endif /* _IXGBEVF_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index d9d6616f02a4..7eaac3234049 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -1612,7 +1612,7 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, txdctl = IXGBE_READ_REG(hw, IXGBE_VFTXDCTL(reg_idx)); } while (--wait_loop && !(txdctl & IXGBE_TXDCTL_ENABLE)); if (!wait_loop) - pr_err("Could not enable Tx Queue %d\n", reg_idx); + hw_dbg(hw, "Could not enable Tx Queue %d\n", reg_idx); } /** @@ -1810,8 +1810,10 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) if (hw->mac.type >= ixgbe_mac_X550_vf) ixgbevf_setup_vfmrqc(adapter); + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, netdev->mtu + ETH_HLEN + ETH_FCS_LEN); + spin_unlock_bh(&adapter->mbx_lock); if (ret) dev_err(&adapter->pdev->dev, "Failed to set MTU at %d\n", netdev->mtu); @@ -2993,6 +2995,7 @@ static void ixgbevf_free_all_tx_resources(struct ixgbevf_adapter *adapter) **/ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) { + struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; @@ -3757,8 +3760,10 @@ static int ixgbevf_change_mtu(struct net_device *netdev, int new_mtu) if ((new_mtu < 68) || (max_frame > max_possible_frame)) return -EINVAL; + spin_lock_bh(&adapter->mbx_lock); /* notify the PF of our intent to use this size of frame */ ret = hw->mac.ops.set_rlpml(hw, max_frame); + spin_unlock_bh(&adapter->mbx_lock); if (ret) return -EINVAL; diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index a52f70ec42b6..d46ba1dabcb7 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -284,7 +284,8 @@ static s32 ixgbevf_set_uc_addr_vf(struct ixgbe_hw *hw, u32 index, u8 *addr) if (addr) ether_addr_copy(msg_addr, addr); - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 3); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (!ret_val) { msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -441,7 +442,8 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr, msgbuf[0] = IXGBE_VF_SET_MAC_ADDR; ether_addr_copy(msg_addr, addr); - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); msgbuf[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -551,7 +553,8 @@ static s32 ixgbevf_update_xcast_mode(struct ixgbe_hw *hw, int xcast_mode) msgbuf[0] = IXGBE_VF_UPDATE_XCAST_MODE; msgbuf[1] = xcast_mode; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (err) return err; @@ -588,7 +591,8 @@ static s32 ixgbevf_set_vfta_vf(struct ixgbe_hw *hw, u32 vlan, u32 vind, /* Setting the 8 bit field MSG INFO to TRUE indicates "add" */ msgbuf[0] |= vlan_on << IXGBE_VT_MSGINFO_SHIFT; - err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + err = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (err) goto mbx_err; @@ -791,7 +795,8 @@ static s32 ixgbevf_set_rlpml_vf(struct ixgbe_hw *hw, u16 max_size) msgbuf[0] = IXGBE_VF_SET_LPE; msgbuf[1] = max_size; - ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, 2); + ret_val = ixgbevf_write_msg_read_ack(hw, msgbuf, msgbuf, + sizeof(msgbuf) / sizeof(u32)); if (ret_val) return ret_val; if ((msgbuf[0] & IXGBE_VF_SET_LPE) && @@ -837,7 +842,8 @@ static int ixgbevf_negotiate_api_version_vf(struct ixgbe_hw *hw, int api) msg[1] = api; msg[2] = 0; - err = ixgbevf_write_msg_read_ack(hw, msg, msg, 3); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, + sizeof(msg) / sizeof(u32)); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; @@ -887,7 +893,8 @@ int ixgbevf_get_queues(struct ixgbe_hw *hw, unsigned int *num_tcs, msg[0] = IXGBE_VF_GET_QUEUE; msg[1] = msg[2] = msg[3] = msg[4] = 0; - err = ixgbevf_write_msg_read_ack(hw, msg, msg, 5); + err = ixgbevf_write_msg_read_ack(hw, msg, msg, + sizeof(msg) / sizeof(u32)); if (!err) { msg[0] &= ~IXGBE_VT_MSGTYPE_CTS; diff --git a/drivers/net/ethernet/marvell/mvmdio.c b/drivers/net/ethernet/marvell/mvmdio.c index 8982c882af1b..a0d1b084ecec 100644 --- a/drivers/net/ethernet/marvell/mvmdio.c +++ b/drivers/net/ethernet/marvell/mvmdio.c @@ -211,8 +211,7 @@ static int orion_mdio_probe(struct platform_device *pdev) dev->regs = devm_ioremap(&pdev->dev, r->start, resource_size(r)); if (!dev->regs) { dev_err(&pdev->dev, "Unable to remap SMI register\n"); - ret = -ENODEV; - goto out_mdio; + return -ENODEV; } init_waitqueue_head(&dev->smi_busy_wait); diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c index b74548728fb5..5cb07c2017bf 100644 --- a/drivers/net/ethernet/marvell/mvneta.c +++ b/drivers/net/ethernet/marvell/mvneta.c @@ -400,7 +400,6 @@ struct mvneta_port { u16 rx_ring_size; struct mii_bus *mii_bus; - struct phy_device *phy_dev; phy_interface_t phy_interface; struct device_node *phy_node; unsigned int link; @@ -637,8 +636,9 @@ static void mvneta_mib_counters_clear(struct mvneta_port *pp) } /* Get System Network Statistics */ -struct rtnl_link_stats64 *mvneta_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static struct rtnl_link_stats64 * +mvneta_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mvneta_port *pp = netdev_priv(dev); unsigned int start; @@ -2653,6 +2653,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget) u32 cause_rx_tx; int rx_queue; struct mvneta_port *pp = netdev_priv(napi->dev); + struct net_device *ndev = pp->dev; struct mvneta_pcpu_port *port = this_cpu_ptr(pp->ports); if (!netif_running(pp->dev)) { @@ -2670,7 +2671,7 @@ static int mvneta_poll(struct napi_struct *napi, int budget) (MVNETA_CAUSE_PHY_STATUS_CHANGE | MVNETA_CAUSE_LINK_CHANGE | MVNETA_CAUSE_PSC_SYNC_CHANGE))) { - mvneta_fixed_link_update(pp, pp->phy_dev); + mvneta_fixed_link_update(pp, ndev->phydev); } } @@ -2965,6 +2966,7 @@ static int mvneta_setup_txqs(struct mvneta_port *pp) static void mvneta_start_dev(struct mvneta_port *pp) { int cpu; + struct net_device *ndev = pp->dev; mvneta_max_rx_size_set(pp, pp->pkt_size); mvneta_txq_max_tx_size_set(pp, pp->pkt_size); @@ -2987,15 +2989,16 @@ static void mvneta_start_dev(struct mvneta_port *pp) MVNETA_CAUSE_LINK_CHANGE | MVNETA_CAUSE_PSC_SYNC_CHANGE); - phy_start(pp->phy_dev); + phy_start(ndev->phydev); netif_tx_start_all_queues(pp->dev); } static void mvneta_stop_dev(struct mvneta_port *pp) { unsigned int cpu; + struct net_device *ndev = pp->dev; - phy_stop(pp->phy_dev); + phy_stop(ndev->phydev); for_each_online_cpu(cpu) { struct mvneta_pcpu_port *port = per_cpu_ptr(pp->ports, cpu); @@ -3168,7 +3171,7 @@ static int mvneta_set_mac_addr(struct net_device *dev, void *addr) static void mvneta_adjust_link(struct net_device *ndev) { struct mvneta_port *pp = netdev_priv(ndev); - struct phy_device *phydev = pp->phy_dev; + struct phy_device *phydev = ndev->phydev; int status_change = 0; if (phydev->link) { @@ -3246,7 +3249,6 @@ static int mvneta_mdio_probe(struct mvneta_port *pp) phy_dev->supported &= PHY_GBIT_FEATURES; phy_dev->advertising = phy_dev->supported; - pp->phy_dev = phy_dev; pp->link = 0; pp->duplex = 0; pp->speed = 0; @@ -3256,8 +3258,9 @@ static int mvneta_mdio_probe(struct mvneta_port *pp) static void mvneta_mdio_remove(struct mvneta_port *pp) { - phy_disconnect(pp->phy_dev); - pp->phy_dev = NULL; + struct net_device *ndev = pp->dev; + + phy_disconnect(ndev->phydev); } /* Electing a CPU must be done in an atomic way: it should be done @@ -3515,42 +3518,31 @@ static int mvneta_stop(struct net_device *dev) static int mvneta_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mvneta_port *pp = netdev_priv(dev); - - if (!pp->phy_dev) + if (!dev->phydev) return -ENOTSUPP; - return phy_mii_ioctl(pp->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); } /* Ethtool methods */ -/* Get settings (phy address, speed) for ethtools */ -int mvneta_ethtool_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +/* Set link ksettings (phy address, speed) for ethtools */ +static int +mvneta_ethtool_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - struct mvneta_port *pp = netdev_priv(dev); - - if (!pp->phy_dev) - return -ENODEV; - - return phy_ethtool_gset(pp->phy_dev, cmd); -} - -/* Set settings (phy address, speed) for ethtools */ -int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct mvneta_port *pp = netdev_priv(dev); - struct phy_device *phydev = pp->phy_dev; + struct mvneta_port *pp = netdev_priv(ndev); + struct phy_device *phydev = ndev->phydev; if (!phydev) return -ENODEV; - if ((cmd->autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { + if ((cmd->base.autoneg == AUTONEG_ENABLE) != pp->use_inband_status) { u32 val; - mvneta_set_autoneg(pp, cmd->autoneg == AUTONEG_ENABLE); + mvneta_set_autoneg(pp, cmd->base.autoneg == AUTONEG_ENABLE); - if (cmd->autoneg == AUTONEG_DISABLE) { + if (cmd->base.autoneg == AUTONEG_DISABLE) { val = mvreg_read(pp, MVNETA_GMAC_AUTONEG_CONFIG); val &= ~(MVNETA_GMAC_CONFIG_MII_SPEED | MVNETA_GMAC_CONFIG_GMII_SPEED | @@ -3567,17 +3559,17 @@ int mvneta_ethtool_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) mvreg_write(pp, MVNETA_GMAC_AUTONEG_CONFIG, val); } - pp->use_inband_status = (cmd->autoneg == AUTONEG_ENABLE); + pp->use_inband_status = (cmd->base.autoneg == AUTONEG_ENABLE); netdev_info(pp->dev, "autoneg status set to %i\n", pp->use_inband_status); - if (netif_running(dev)) { + if (netif_running(ndev)) { mvneta_port_down(pp); mvneta_port_up(pp); } } - return phy_ethtool_sset(pp->phy_dev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } /* Set interrupt coalescing for ethtools */ @@ -3841,8 +3833,6 @@ static const struct net_device_ops mvneta_netdev_ops = { const struct ethtool_ops mvneta_eth_tool_ops = { .get_link = ethtool_op_get_link, - .get_settings = mvneta_ethtool_get_settings, - .set_settings = mvneta_ethtool_set_settings, .set_coalesce = mvneta_ethtool_set_coalesce, .get_coalesce = mvneta_ethtool_get_coalesce, .get_drvinfo = mvneta_ethtool_get_drvinfo, @@ -3855,6 +3845,8 @@ const struct ethtool_ops mvneta_eth_tool_ops = { .get_rxnfc = mvneta_ethtool_get_rxnfc, .get_rxfh = mvneta_ethtool_get_rxfh, .set_rxfh = mvneta_ethtool_set_rxfh, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = mvneta_ethtool_set_link_ksettings, }; /* Initialize hw */ diff --git a/drivers/net/ethernet/marvell/mvneta_bm.h b/drivers/net/ethernet/marvell/mvneta_bm.h index e74fd44a92f7..a32de432800c 100644 --- a/drivers/net/ethernet/marvell/mvneta_bm.h +++ b/drivers/net/ethernet/marvell/mvneta_bm.h @@ -133,7 +133,7 @@ struct mvneta_bm_pool { void *mvneta_frag_alloc(unsigned int frag_size); void mvneta_frag_free(unsigned int frag_size, void *data); -#if defined(CONFIG_MVNETA_BM) || defined(CONFIG_MVNETA_BM_MODULE) +#if IS_ENABLED(CONFIG_MVNETA_BM) void mvneta_bm_pool_destroy(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, u8 port_map); void mvneta_bm_bufs_free(struct mvneta_bm *priv, struct mvneta_bm_pool *bm_pool, diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c index 467138b423d3..f05ea56dcff2 100644 --- a/drivers/net/ethernet/marvell/sky2.c +++ b/drivers/net/ethernet/marvell/sky2.c @@ -3070,7 +3070,7 @@ static int sky2_poll(struct napi_struct *napi, int work_limit) goto done; } - napi_complete(napi); + napi_complete_done(napi, work_done); sky2_read32(hw, B0_Y2_SP_LISR); done: diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3743af8f1ded..ad4ab979507b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -51,7 +52,7 @@ static const struct mtk_ethtool_stats { }; static const char * const mtk_clks_source_name[] = { - "ethif", "esw", "gp1", "gp2" + "ethif", "esw", "gp1", "gp2", "trgpll" }; void mtk_w32(struct mtk_eth *eth, u32 val, unsigned reg) @@ -134,6 +135,33 @@ static int mtk_mdio_read(struct mii_bus *bus, int phy_addr, int phy_reg) return _mtk_mdio_read(eth, phy_addr, phy_reg); } +static void mtk_gmac0_rgmii_adjust(struct mtk_eth *eth, int speed) +{ + u32 val; + int ret; + + val = (speed == SPEED_1000) ? + INTF_MODE_RGMII_1000 : INTF_MODE_RGMII_10_100; + mtk_w32(eth, val, INTF_MODE); + + regmap_update_bits(eth->ethsys, ETHSYS_CLKCFG0, + ETHSYS_TRGMII_CLK_SEL362_5, + ETHSYS_TRGMII_CLK_SEL362_5); + + val = (speed == SPEED_1000) ? 250000000 : 500000000; + ret = clk_set_rate(eth->clks[MTK_CLK_TRGPLL], val); + if (ret) + dev_err(eth->dev, "Failed to set trgmii pll: %d\n", ret); + + val = (speed == SPEED_1000) ? + RCK_CTRL_RGMII_1000 : RCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_RCK_CTRL); + + val = (speed == SPEED_1000) ? + TCK_CTRL_RGMII_1000 : TCK_CTRL_RGMII_10_100; + mtk_w32(eth, val, TRGMII_TCK_CTRL); +} + static void mtk_phy_link_adjust(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -144,7 +172,10 @@ static void mtk_phy_link_adjust(struct net_device *dev) MAC_MCR_RX_EN | MAC_MCR_BACKOFF_EN | MAC_MCR_BACKPR_EN; - switch (mac->phy_dev->speed) { + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + + switch (dev->phydev->speed) { case SPEED_1000: mcr |= MAC_MCR_SPEED_1000; break; @@ -153,20 +184,23 @@ static void mtk_phy_link_adjust(struct net_device *dev) break; }; - if (mac->phy_dev->link) + if (mac->id == 0 && !mac->trgmii) + mtk_gmac0_rgmii_adjust(mac->hw, dev->phydev->speed); + + if (dev->phydev->link) mcr |= MAC_MCR_FORCE_LINK; - if (mac->phy_dev->duplex) { + if (dev->phydev->duplex) { mcr |= MAC_MCR_FORCE_DPX; - if (mac->phy_dev->pause) + if (dev->phydev->pause) rmt_adv = LPA_PAUSE_CAP; - if (mac->phy_dev->asym_pause) + if (dev->phydev->asym_pause) rmt_adv |= LPA_PAUSE_ASYM; - if (mac->phy_dev->advertising & ADVERTISED_Pause) + if (dev->phydev->advertising & ADVERTISED_Pause) lcl_adv |= ADVERTISE_PAUSE_CAP; - if (mac->phy_dev->advertising & ADVERTISED_Asym_Pause) + if (dev->phydev->advertising & ADVERTISED_Asym_Pause) lcl_adv |= ADVERTISE_PAUSE_ASYM; flowctrl = mii_resolve_flowctrl_fdx(lcl_adv, rmt_adv); @@ -183,7 +217,7 @@ static void mtk_phy_link_adjust(struct net_device *dev) mtk_w32(mac->hw, mcr, MTK_MAC_MCR(mac->id)); - if (mac->phy_dev->link) + if (dev->phydev->link) netif_carrier_on(dev); else netif_carrier_off(dev); @@ -192,17 +226,9 @@ static void mtk_phy_link_adjust(struct net_device *dev) static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, struct device_node *phy_node) { - const __be32 *_addr = NULL; struct phy_device *phydev; - int phy_mode, addr; + int phy_mode; - _addr = of_get_property(phy_node, "reg", NULL); - - if (!_addr || (be32_to_cpu(*_addr) >= 0x20)) { - pr_err("%s: invalid phy address\n", phy_node->name); - return -EINVAL; - } - addr = be32_to_cpu(*_addr); phy_mode = of_get_phy_mode(phy_node); if (phy_mode < 0) { dev_err(eth->dev, "incorrect phy-mode %d\n", phy_mode); @@ -221,17 +247,17 @@ static int mtk_phy_connect_node(struct mtk_eth *eth, struct mtk_mac *mac, mac->id, phydev_name(phydev), phydev->phy_id, phydev->drv->name); - mac->phy_dev = phydev; - return 0; } -static int mtk_phy_connect(struct mtk_mac *mac) +static int mtk_phy_connect(struct net_device *dev) { - struct mtk_eth *eth = mac->hw; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth; struct device_node *np; - u32 val, ge_mode; + u32 val; + eth = mac->hw; np = of_parse_phandle(mac->of_node, "phy-handle", 0); if (!np && of_phy_is_fixed_link(mac->of_node)) if (!of_phy_register_fixed_link(mac->of_node)) @@ -240,22 +266,24 @@ static int mtk_phy_connect(struct mtk_mac *mac) return -ENODEV; switch (of_get_phy_mode(np)) { + case PHY_INTERFACE_MODE_TRGMII: + mac->trgmii = true; case PHY_INTERFACE_MODE_RGMII_TXID: case PHY_INTERFACE_MODE_RGMII_RXID: case PHY_INTERFACE_MODE_RGMII_ID: case PHY_INTERFACE_MODE_RGMII: - ge_mode = 0; + mac->ge_mode = 0; break; case PHY_INTERFACE_MODE_MII: - ge_mode = 1; + mac->ge_mode = 1; break; case PHY_INTERFACE_MODE_REVMII: - ge_mode = 2; + mac->ge_mode = 2; break; case PHY_INTERFACE_MODE_RMII: if (!mac->id) goto err_phy; - ge_mode = 3; + mac->ge_mode = 3; break; default: goto err_phy; @@ -264,23 +292,26 @@ static int mtk_phy_connect(struct mtk_mac *mac) /* put the gmac into the right mode */ regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, mac->id); - val |= SYSCFG0_GE_MODE(ge_mode, mac->id); + val |= SYSCFG0_GE_MODE(mac->ge_mode, mac->id); regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); - mtk_phy_connect_node(eth, mac, np); - mac->phy_dev->autoneg = AUTONEG_ENABLE; - mac->phy_dev->speed = 0; - mac->phy_dev->duplex = 0; + /* couple phydev to net_device */ + if (mtk_phy_connect_node(eth, mac, np)) + goto err_phy; + + dev->phydev->autoneg = AUTONEG_ENABLE; + dev->phydev->speed = 0; + dev->phydev->duplex = 0; if (of_phy_is_fixed_link(mac->of_node)) - mac->phy_dev->supported |= + dev->phydev->supported |= SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | + dev->phydev->supported &= PHY_GBIT_FEATURES | SUPPORTED_Pause | SUPPORTED_Asym_Pause; - mac->phy_dev->advertising = mac->phy_dev->supported | + dev->phydev->advertising = dev->phydev->supported | ADVERTISED_Autoneg; - phy_start_aneg(mac->phy_dev); + phy_start_aneg(dev->phydev); of_node_put(np); @@ -288,7 +319,7 @@ static int mtk_phy_connect(struct mtk_mac *mac) err_phy: of_node_put(np); - dev_err(eth->dev, "invalid phy_mode\n"); + dev_err(eth->dev, "%s: invalid phy\n", __func__); return -EINVAL; } @@ -336,25 +367,27 @@ static void mtk_mdio_cleanup(struct mtk_eth *eth) mdiobus_unregister(eth->mii_bus); } -static inline void mtk_irq_disable(struct mtk_eth *eth, u32 mask) +static inline void mtk_irq_disable(struct mtk_eth *eth, + unsigned reg, u32 mask) { unsigned long flags; u32 val; spin_lock_irqsave(ð->irq_lock, flags); - val = mtk_r32(eth, MTK_QDMA_INT_MASK); - mtk_w32(eth, val & ~mask, MTK_QDMA_INT_MASK); + val = mtk_r32(eth, reg); + mtk_w32(eth, val & ~mask, reg); spin_unlock_irqrestore(ð->irq_lock, flags); } -static inline void mtk_irq_enable(struct mtk_eth *eth, u32 mask) +static inline void mtk_irq_enable(struct mtk_eth *eth, + unsigned reg, u32 mask) { unsigned long flags; u32 val; spin_lock_irqsave(ð->irq_lock, flags); - val = mtk_r32(eth, MTK_QDMA_INT_MASK); - mtk_w32(eth, val | mask, MTK_QDMA_INT_MASK); + val = mtk_r32(eth, reg); + mtk_w32(eth, val | mask, reg); spin_unlock_irqrestore(ð->irq_lock, flags); } @@ -363,18 +396,20 @@ static int mtk_set_mac_address(struct net_device *dev, void *p) int ret = eth_mac_addr(dev, p); struct mtk_mac *mac = netdev_priv(dev); const char *macaddr = dev->dev_addr; - unsigned long flags; if (ret) return ret; - spin_lock_irqsave(&mac->hw->page_lock, flags); + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + + spin_lock_bh(&mac->hw->page_lock); mtk_w32(mac->hw, (macaddr[0] << 8) | macaddr[1], MTK_GDMA_MAC_ADRH(mac->id)); mtk_w32(mac->hw, (macaddr[2] << 24) | (macaddr[3] << 16) | (macaddr[4] << 8) | macaddr[5], MTK_GDMA_MAC_ADRL(mac->id)); - spin_unlock_irqrestore(&mac->hw->page_lock, flags); + spin_unlock_bh(&mac->hw->page_lock); return 0; } @@ -759,7 +794,6 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) struct mtk_eth *eth = mac->hw; struct mtk_tx_ring *ring = ð->tx_ring; struct net_device_stats *stats = &dev->stats; - unsigned long flags; bool gso = false; int tx_num; @@ -767,14 +801,17 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) * however we have 2 queues running on the same ring so we need to lock * the ring access */ - spin_lock_irqsave(ð->page_lock, flags); + spin_lock(ð->page_lock); + + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto drop; tx_num = mtk_cal_txd_req(skb); if (unlikely(atomic_read(&ring->free_count) <= tx_num)) { mtk_stop_queue(eth); netif_err(eth, tx_queued, dev, "Tx Ring full when queue awake!\n"); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); return NETDEV_TX_BUSY; } @@ -799,22 +836,62 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) mtk_stop_queue(eth); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); return NETDEV_TX_OK; drop: - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock(ð->page_lock); stats->tx_dropped++; dev_kfree_skb(skb); return NETDEV_TX_OK; } +static struct mtk_rx_ring *mtk_get_rx_ring(struct mtk_eth *eth) +{ + int i; + struct mtk_rx_ring *ring; + int idx; + + if (!eth->hwlro) + return ð->rx_ring[0]; + + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); + if (ring->dma[idx].rxd2 & RX_DMA_DONE) { + ring->calc_idx_update = true; + return ring; + } + } + + return NULL; +} + +static void mtk_update_rx_cpu_idx(struct mtk_eth *eth) +{ + struct mtk_rx_ring *ring; + int i; + + if (!eth->hwlro) { + ring = ð->rx_ring[0]; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } else { + for (i = 0; i < MTK_MAX_RX_RING_NUM; i++) { + ring = ð->rx_ring[i]; + if (ring->calc_idx_update) { + ring->calc_idx_update = false; + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + } + } + } +} + static int mtk_poll_rx(struct napi_struct *napi, int budget, struct mtk_eth *eth) { - struct mtk_rx_ring *ring = ð->rx_ring; - int idx = ring->calc_idx; + struct mtk_rx_ring *ring; + int idx; struct sk_buff *skb; u8 *data, *new_data; struct mtk_rx_dma *rxd, trxd; @@ -826,7 +903,11 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, dma_addr_t dma_addr; int mac = 0; - idx = NEXT_RX_DESP_IDX(idx); + ring = mtk_get_rx_ring(eth); + if (unlikely(!ring)) + goto rx_done; + + idx = NEXT_RX_DESP_IDX(ring->calc_idx, ring->dma_size); rxd = &ring->dma[idx]; data = ring->data[idx]; @@ -841,6 +922,9 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, netdev = eth->netdev[mac]; + if (unlikely(test_bit(MTK_RESETTING, ð->state))) + goto release_desc; + /* alloc new buffer */ new_data = napi_alloc_frag(ring->frag_size); if (unlikely(!new_data)) { @@ -890,17 +974,19 @@ release_desc: rxd->rxd2 = RX_DMA_PLEN0(ring->buf_size); ring->calc_idx = idx; + + done++; + } + +rx_done: + if (done) { /* make sure that all changes to the dma ring are flushed before * we continue */ wmb(); - mtk_w32(eth, ring->calc_idx, MTK_QRX_CRX_IDX0); - done++; + mtk_update_rx_cpu_idx(eth); } - if (done < budget) - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); - return done; } @@ -1009,7 +1095,7 @@ static int mtk_napi_tx(struct napi_struct *napi, int budget) return budget; napi_complete(napi); - mtk_irq_enable(eth, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); return tx_done; } @@ -1019,30 +1105,33 @@ static int mtk_napi_rx(struct napi_struct *napi, int budget) struct mtk_eth *eth = container_of(napi, struct mtk_eth, rx_napi); u32 status, mask; int rx_done = 0; + int remain_budget = budget; mtk_handle_status_irq(eth); - mtk_w32(eth, MTK_RX_DONE_INT, MTK_QMTK_INT_STATUS); - rx_done = mtk_poll_rx(napi, budget, eth); + +poll_again: + mtk_w32(eth, MTK_RX_DONE_INT, MTK_PDMA_INT_STATUS); + rx_done = mtk_poll_rx(napi, remain_budget, eth); if (unlikely(netif_msg_intr(eth))) { - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - mask = mtk_r32(eth, MTK_QDMA_INT_MASK); + status = mtk_r32(eth, MTK_PDMA_INT_STATUS); + mask = mtk_r32(eth, MTK_PDMA_INT_MASK); dev_info(eth->dev, "done rx %d, intr 0x%08x/0x%x\n", rx_done, status, mask); } - - if (rx_done == budget) - return budget; - - status = mtk_r32(eth, MTK_QMTK_INT_STATUS); - if (status & MTK_RX_DONE_INT) + if (rx_done == remain_budget) return budget; + status = mtk_r32(eth, MTK_PDMA_INT_STATUS); + if (status & MTK_RX_DONE_INT) { + remain_budget -= rx_done; + goto poll_again; + } napi_complete(napi); - mtk_irq_enable(eth, MTK_RX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); - return rx_done; + return rx_done + budget - remain_budget; } static int mtk_tx_alloc(struct mtk_eth *eth) @@ -1089,6 +1178,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) mtk_w32(eth, ring->phys + ((MTK_DMA_SIZE - 1) * sz), MTK_QTX_DRX_PTR); + mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); return 0; @@ -1117,32 +1207,41 @@ static void mtk_tx_clean(struct mtk_eth *eth) } } -static int mtk_rx_alloc(struct mtk_eth *eth) +static int mtk_rx_alloc(struct mtk_eth *eth, int ring_no, int rx_flag) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; + int rx_data_len, rx_dma_size; int i; - ring->frag_size = mtk_max_frag_size(ETH_DATA_LEN); + if (rx_flag == MTK_RX_FLAGS_HWLRO) { + rx_data_len = MTK_MAX_LRO_RX_LENGTH; + rx_dma_size = MTK_HW_LRO_DMA_SIZE; + } else { + rx_data_len = ETH_DATA_LEN; + rx_dma_size = MTK_DMA_SIZE; + } + + ring->frag_size = mtk_max_frag_size(rx_data_len); ring->buf_size = mtk_max_buf_size(ring->frag_size); - ring->data = kcalloc(MTK_DMA_SIZE, sizeof(*ring->data), + ring->data = kcalloc(rx_dma_size, sizeof(*ring->data), GFP_KERNEL); if (!ring->data) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { ring->data[i] = netdev_alloc_frag(ring->frag_size); if (!ring->data[i]) return -ENOMEM; } ring->dma = dma_alloc_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + rx_dma_size * sizeof(*ring->dma), &ring->phys, GFP_ATOMIC | __GFP_ZERO); if (!ring->dma) return -ENOMEM; - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < rx_dma_size; i++) { dma_addr_t dma_addr = dma_map_single(eth->dev, ring->data[i] + NET_SKB_PAD, ring->buf_size, @@ -1153,28 +1252,30 @@ static int mtk_rx_alloc(struct mtk_eth *eth) ring->dma[i].rxd2 = RX_DMA_PLEN0(ring->buf_size); } - ring->calc_idx = MTK_DMA_SIZE - 1; + ring->dma_size = rx_dma_size; + ring->calc_idx_update = false; + ring->calc_idx = rx_dma_size - 1; + ring->crx_idx_reg = MTK_PRX_CRX_IDX_CFG(ring_no); /* make sure that all changes to the dma ring are flushed before we * continue */ wmb(); - mtk_w32(eth, eth->rx_ring.phys, MTK_QRX_BASE_PTR0); - mtk_w32(eth, MTK_DMA_SIZE, MTK_QRX_MAX_CNT0); - mtk_w32(eth, eth->rx_ring.calc_idx, MTK_QRX_CRX_IDX0); - mtk_w32(eth, MTK_PST_DRX_IDX0, MTK_QDMA_RST_IDX); - mtk_w32(eth, (QDMA_RES_THRES << 8) | QDMA_RES_THRES, MTK_QTX_CFG(0)); + mtk_w32(eth, ring->phys, MTK_PRX_BASE_PTR_CFG(ring_no)); + mtk_w32(eth, rx_dma_size, MTK_PRX_MAX_CNT_CFG(ring_no)); + mtk_w32(eth, ring->calc_idx, ring->crx_idx_reg); + mtk_w32(eth, MTK_PST_DRX_IDX_CFG(ring_no), MTK_PDMA_RST_IDX); return 0; } -static void mtk_rx_clean(struct mtk_eth *eth) +static void mtk_rx_clean(struct mtk_eth *eth, int ring_no) { - struct mtk_rx_ring *ring = ð->rx_ring; + struct mtk_rx_ring *ring = ð->rx_ring[ring_no]; int i; if (ring->data && ring->dma) { - for (i = 0; i < MTK_DMA_SIZE; i++) { + for (i = 0; i < ring->dma_size; i++) { if (!ring->data[i]) continue; if (!ring->dma[i].rxd1) @@ -1191,13 +1292,275 @@ static void mtk_rx_clean(struct mtk_eth *eth) if (ring->dma) { dma_free_coherent(eth->dev, - MTK_DMA_SIZE * sizeof(*ring->dma), + ring->dma_size * sizeof(*ring->dma), ring->dma, ring->phys); ring->dma = NULL; } } +static int mtk_hwlro_rx_init(struct mtk_eth *eth) +{ + int i; + u32 ring_ctrl_dw1 = 0, ring_ctrl_dw2 = 0, ring_ctrl_dw3 = 0; + u32 lro_ctrl_dw0 = 0, lro_ctrl_dw3 = 0; + + /* set LRO rings to auto-learn modes */ + ring_ctrl_dw2 |= MTK_RING_AUTO_LERAN_MODE; + + /* validate LRO ring */ + ring_ctrl_dw2 |= MTK_RING_VLD; + + /* set AGE timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_AGE_TIME_H; + ring_ctrl_dw1 |= MTK_RING_AGE_TIME_L; + + /* set max AGG timer (unit: 20us) */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_TIME; + + /* set max LRO AGG count */ + ring_ctrl_dw2 |= MTK_RING_MAX_AGG_CNT_L; + ring_ctrl_dw3 |= MTK_RING_MAX_AGG_CNT_H; + + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + mtk_w32(eth, ring_ctrl_dw1, MTK_LRO_CTRL_DW1_CFG(i)); + mtk_w32(eth, ring_ctrl_dw2, MTK_LRO_CTRL_DW2_CFG(i)); + mtk_w32(eth, ring_ctrl_dw3, MTK_LRO_CTRL_DW3_CFG(i)); + } + + /* IPv4 checksum update enable */ + lro_ctrl_dw0 |= MTK_L3_CKS_UPD_EN; + + /* switch priority comparison to packet count mode */ + lro_ctrl_dw0 |= MTK_LRO_ALT_PKT_CNT_MODE; + + /* bandwidth threshold setting */ + mtk_w32(eth, MTK_HW_LRO_BW_THRE, MTK_PDMA_LRO_CTRL_DW2); + + /* auto-learn score delta setting */ + mtk_w32(eth, MTK_HW_LRO_REPLACE_DELTA, MTK_PDMA_LRO_ALT_SCORE_DELTA); + + /* set refresh timer for altering flows to 1 sec. (unit: 20us) */ + mtk_w32(eth, (MTK_HW_LRO_TIMER_UNIT << 16) | MTK_HW_LRO_REFRESH_TIME, + MTK_PDMA_LRO_ALT_REFRESH_TIMER); + + /* set HW LRO mode & the max aggregation count for rx packets */ + lro_ctrl_dw3 |= MTK_ADMA_MODE | (MTK_HW_LRO_MAX_AGG_CNT & 0xff); + + /* the minimal remaining room of SDL0 in RXD for lro aggregation */ + lro_ctrl_dw3 |= MTK_LRO_MIN_RXD_SDL; + + /* enable HW LRO */ + lro_ctrl_dw0 |= MTK_LRO_EN; + + mtk_w32(eth, lro_ctrl_dw3, MTK_PDMA_LRO_CTRL_DW3); + mtk_w32(eth, lro_ctrl_dw0, MTK_PDMA_LRO_CTRL_DW0); + + return 0; +} + +static void mtk_hwlro_rx_uninit(struct mtk_eth *eth) +{ + int i; + u32 val; + + /* relinquish lro rings, flush aggregated packets */ + mtk_w32(eth, MTK_LRO_RING_RELINQUISH_REQ, MTK_PDMA_LRO_CTRL_DW0); + + /* wait for relinquishments done */ + for (i = 0; i < 10; i++) { + val = mtk_r32(eth, MTK_PDMA_LRO_CTRL_DW0); + if (val & MTK_LRO_RING_RELINQUISH_DONE) { + msleep(20); + continue; + } + break; + } + + /* invalidate lro rings */ + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_w32(eth, 0, MTK_LRO_CTRL_DW2_CFG(i)); + + /* disable HW LRO */ + mtk_w32(eth, 0, MTK_PDMA_LRO_CTRL_DW0); +} + +static void mtk_hwlro_val_ipaddr(struct mtk_eth *eth, int idx, __be32 ip) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, ip, MTK_LRO_DIP_DW0_CFG(idx)); + + /* validate the IP setting */ + mtk_w32(eth, (reg_val | MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); +} + +static void mtk_hwlro_inval_ipaddr(struct mtk_eth *eth, int idx) +{ + u32 reg_val; + + reg_val = mtk_r32(eth, MTK_LRO_CTRL_DW2_CFG(idx)); + + /* invalidate the IP setting */ + mtk_w32(eth, (reg_val & ~MTK_RING_MYIP_VLD), MTK_LRO_CTRL_DW2_CFG(idx)); + + mtk_w32(eth, 0, MTK_LRO_DIP_DW0_CFG(idx)); +} + +static int mtk_hwlro_get_ip_cnt(struct mtk_mac *mac) +{ + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) + cnt++; + } + + return cnt; +} + +static int mtk_hwlro_add_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if ((fsp->flow_type != TCP_V4_FLOW) || + (!fsp->h_u.tcp_ip4_spec.ip4dst) || + (fsp->location > 1)) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = htonl(fsp->h_u.tcp_ip4_spec.ip4dst); + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_val_ipaddr(eth, hwlro_idx, mac->hwlro_ip[fsp->location]); + + return 0; +} + +static int mtk_hwlro_del_ipaddr(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int hwlro_idx; + + if (fsp->location > 1) + return -EINVAL; + + mac->hwlro_ip[fsp->location] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + fsp->location; + + mac->hwlro_ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + + return 0; +} + +static void mtk_hwlro_netdev_disable(struct net_device *dev) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct mtk_eth *eth = mac->hw; + int i, hwlro_idx; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + mac->hwlro_ip[i] = 0; + hwlro_idx = (mac->id * MTK_MAX_LRO_IP_CNT) + i; + + mtk_hwlro_inval_ipaddr(eth, hwlro_idx); + } + + mac->hwlro_ip_cnt = 0; +} + +static int mtk_hwlro_get_fdir_entry(struct net_device *dev, + struct ethtool_rxnfc *cmd) +{ + struct mtk_mac *mac = netdev_priv(dev); + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + + /* only tcp dst ipv4 is meaningful, others are meaningless */ + fsp->flow_type = TCP_V4_FLOW; + fsp->h_u.tcp_ip4_spec.ip4dst = ntohl(mac->hwlro_ip[fsp->location]); + fsp->m_u.tcp_ip4_spec.ip4dst = 0; + + fsp->h_u.tcp_ip4_spec.ip4src = 0; + fsp->m_u.tcp_ip4_spec.ip4src = 0xffffffff; + fsp->h_u.tcp_ip4_spec.psrc = 0; + fsp->m_u.tcp_ip4_spec.psrc = 0xffff; + fsp->h_u.tcp_ip4_spec.pdst = 0; + fsp->m_u.tcp_ip4_spec.pdst = 0xffff; + fsp->h_u.tcp_ip4_spec.tos = 0; + fsp->m_u.tcp_ip4_spec.tos = 0xff; + + return 0; +} + +static int mtk_hwlro_get_fdir_all(struct net_device *dev, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct mtk_mac *mac = netdev_priv(dev); + int cnt = 0; + int i; + + for (i = 0; i < MTK_MAX_LRO_IP_CNT; i++) { + if (mac->hwlro_ip[i]) { + rule_locs[cnt] = i; + cnt++; + } + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static netdev_features_t mtk_fix_features(struct net_device *dev, + netdev_features_t features) +{ + if (!(features & NETIF_F_LRO)) { + struct mtk_mac *mac = netdev_priv(dev); + int ip_cnt = mtk_hwlro_get_ip_cnt(mac); + + if (ip_cnt) { + netdev_info(dev, "RX flow is programmed, LRO should keep on\n"); + + features |= NETIF_F_LRO; + } + } + + return features; +} + +static int mtk_set_features(struct net_device *dev, netdev_features_t features) +{ + int err = 0; + + if (!((dev->features ^ features) & NETIF_F_LRO)) + return 0; + + if (!(features & NETIF_F_LRO)) + mtk_hwlro_netdev_disable(dev); + + return err; +} + /* wait for DMA to finish whatever it is doing before we start using it again */ static int mtk_dma_busy_wait(struct mtk_eth *eth) { @@ -1218,6 +1581,7 @@ static int mtk_dma_busy_wait(struct mtk_eth *eth) static int mtk_dma_init(struct mtk_eth *eth) { int err; + u32 i; if (mtk_dma_busy_wait(eth)) return -EBUSY; @@ -1233,10 +1597,21 @@ static int mtk_dma_init(struct mtk_eth *eth) if (err) return err; - err = mtk_rx_alloc(eth); + err = mtk_rx_alloc(eth, 0, MTK_RX_FLAGS_NORMAL); if (err) return err; + if (eth->hwlro) { + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) { + err = mtk_rx_alloc(eth, i, MTK_RX_FLAGS_HWLRO); + if (err) + return err; + } + err = mtk_hwlro_rx_init(eth); + if (err) + return err; + } + /* Enable random early drop and set drop threshold automatically */ mtk_w32(eth, FC_THRES_DROP_MODE | FC_THRES_DROP_EN | FC_THRES_MIN, MTK_QDMA_FC_THRES); @@ -1261,7 +1636,14 @@ static void mtk_dma_free(struct mtk_eth *eth) eth->phy_scratch_ring = 0; } mtk_tx_clean(eth); - mtk_rx_clean(eth); + mtk_rx_clean(eth, 0); + + if (eth->hwlro) { + mtk_hwlro_rx_uninit(eth); + for (i = 1; i < MTK_MAX_RX_RING_NUM; i++) + mtk_rx_clean(eth, i); + } + kfree(eth->scratch_head); } @@ -1282,7 +1664,7 @@ static irqreturn_t mtk_handle_irq_rx(int irq, void *_eth) if (likely(napi_schedule_prep(ð->rx_napi))) { __napi_schedule(ð->rx_napi); - mtk_irq_disable(eth, MTK_RX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } return IRQ_HANDLED; @@ -1294,7 +1676,7 @@ static irqreturn_t mtk_handle_irq_tx(int irq, void *_eth) if (likely(napi_schedule_prep(ð->tx_napi))) { __napi_schedule(ð->tx_napi); - mtk_irq_disable(eth, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); } return IRQ_HANDLED; @@ -1305,11 +1687,12 @@ static void mtk_poll_controller(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - u32 int_mask = MTK_TX_DONE_INT | MTK_RX_DONE_INT; - mtk_irq_disable(eth, int_mask); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); mtk_handle_irq_rx(eth->irq[2], dev); - mtk_irq_enable(eth, int_mask); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } #endif @@ -1324,11 +1707,15 @@ static int mtk_start_dma(struct mtk_eth *eth) } mtk_w32(eth, - MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN | - MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS | - MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO, + MTK_TX_WB_DDONE | MTK_TX_DMA_EN | + MTK_DMA_SIZE_16DWORDS | MTK_NDP_CO_PRO, MTK_QDMA_GLO_CFG); + mtk_w32(eth, + MTK_RX_DMA_EN | MTK_RX_2B_OFFSET | + MTK_RX_BT_32DWORDS | MTK_MULTI_EN, + MTK_PDMA_GLO_CFG); + return 0; } @@ -1346,11 +1733,12 @@ static int mtk_open(struct net_device *dev) napi_enable(ð->tx_napi); napi_enable(ð->rx_napi); - mtk_irq_enable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_enable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_enable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); } atomic_inc(ð->dma_refcnt); - phy_start(mac->phy_dev); + phy_start(dev->phydev); netif_start_queue(dev); return 0; @@ -1358,16 +1746,15 @@ static int mtk_open(struct net_device *dev) static void mtk_stop_dma(struct mtk_eth *eth, u32 glo_cfg) { - unsigned long flags; u32 val; int i; /* stop the dma engine */ - spin_lock_irqsave(ð->page_lock, flags); + spin_lock_bh(ð->page_lock); val = mtk_r32(eth, glo_cfg); mtk_w32(eth, val & ~(MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN), glo_cfg); - spin_unlock_irqrestore(ð->page_lock, flags); + spin_unlock_bh(ð->page_lock); /* wait for dma stop */ for (i = 0; i < 10; i++) { @@ -1386,32 +1773,63 @@ static int mtk_stop(struct net_device *dev) struct mtk_eth *eth = mac->hw; netif_tx_disable(dev); - phy_stop(mac->phy_dev); + phy_stop(dev->phydev); /* only shutdown DMA if this is the last user */ if (!atomic_dec_and_test(ð->dma_refcnt)) return 0; - mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, MTK_TX_DONE_INT); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, MTK_RX_DONE_INT); napi_disable(ð->tx_napi); napi_disable(ð->rx_napi); mtk_stop_dma(eth, MTK_QDMA_GLO_CFG); + mtk_stop_dma(eth, MTK_PDMA_GLO_CFG); mtk_dma_free(eth); return 0; } -static int __init mtk_hw_init(struct mtk_eth *eth) +static void ethsys_reset(struct mtk_eth *eth, u32 reset_bits) { - int err, i; + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + reset_bits); - /* reset the frame engine */ - reset_control_assert(eth->rstc); - usleep_range(10, 20); - reset_control_deassert(eth->rstc); - usleep_range(10, 20); + usleep_range(1000, 1100); + regmap_update_bits(eth->ethsys, ETHSYS_RSTCTRL, + reset_bits, + ~reset_bits); + mdelay(10); +} + +static int mtk_hw_init(struct mtk_eth *eth) +{ + int i, val; + + if (test_and_set_bit(MTK_HW_INIT, ð->state)) + return 0; + + pm_runtime_enable(eth->dev); + pm_runtime_get_sync(eth->dev); + + clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); + clk_prepare_enable(eth->clks[MTK_CLK_ESW]); + clk_prepare_enable(eth->clks[MTK_CLK_GP1]); + clk_prepare_enable(eth->clks[MTK_CLK_GP2]); + ethsys_reset(eth, RSTCTRL_FE); + ethsys_reset(eth, RSTCTRL_PPE); + + regmap_read(eth->ethsys, ETHSYS_SYSCFG0, &val); + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i]) + continue; + val &= ~SYSCFG0_GE_MODE(SYSCFG0_GE_MASK, eth->mac[i]->id); + val |= SYSCFG0_GE_MODE(eth->mac[i]->ge_mode, eth->mac[i]->id); + } + regmap_write(eth->ethsys, ETHSYS_SYSCFG0, val); /* Set GE2 driving and slew rate */ regmap_write(eth->pctl, GPIO_DRV_SEL10, 0xa00); @@ -1431,22 +1849,11 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* Enable RX VLan Offloading */ mtk_w32(eth, 1, MTK_CDMP_EG_CTRL); - err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, - dev_name(eth->dev), eth); - if (err) - return err; - - err = mtk_mdio_init(eth); - if (err) - return err; - /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, ~0); + mtk_w32(eth, 0, MTK_PDMA_DELAY_INT); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); mtk_w32(eth, 0, MTK_RST_GL); @@ -1460,9 +1867,8 @@ static int __init mtk_hw_init(struct mtk_eth *eth) for (i = 0; i < 2; i++) { u32 val = mtk_r32(eth, MTK_GDMA_FWD_CFG(i)); - /* setup the forward port to send frame to QDMA */ + /* setup the forward port to send frame to PDMA */ val &= ~0xffff; - val |= 0x5555; /* Enable RX checksum */ val |= MTK_GDMA_ICS_EN | MTK_GDMA_TCS_EN | MTK_GDMA_UCS_EN; @@ -1474,6 +1880,22 @@ static int __init mtk_hw_init(struct mtk_eth *eth) return 0; } +static int mtk_hw_deinit(struct mtk_eth *eth) +{ + if (!test_and_clear_bit(MTK_HW_INIT, ð->state)) + return 0; + + clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); + clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); + clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); + + pm_runtime_put_sync(eth->dev); + pm_runtime_disable(eth->dev); + + return 0; +} + static int __init mtk_init(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); @@ -1492,7 +1914,7 @@ static int __init mtk_init(struct net_device *dev) dev->addr_assign_type = NET_ADDR_RANDOM; } - return mtk_phy_connect(mac); + return mtk_phy_connect(dev); } static void mtk_uninit(struct net_device *dev) @@ -1500,19 +1922,18 @@ static void mtk_uninit(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); struct mtk_eth *eth = mac->hw; - phy_disconnect(mac->phy_dev); - mtk_irq_disable(eth, ~0); + phy_disconnect(dev->phydev); + mtk_irq_disable(eth, MTK_QDMA_INT_MASK, ~0); + mtk_irq_disable(eth, MTK_PDMA_INT_MASK, ~0); } static int mtk_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct mtk_mac *mac = netdev_priv(dev); - switch (cmd) { case SIOCGMIIPHY: case SIOCGMIIREG: case SIOCSMIIREG: - return phy_mii_ioctl(mac->phy_dev, ifr, cmd); + return phy_mii_ioctl(dev->phydev, ifr, cmd); default: break; } @@ -1528,6 +1949,12 @@ static void mtk_pending_work(struct work_struct *work) rtnl_lock(); + dev_dbg(eth->dev, "[%s][%d] reset\n", __func__, __LINE__); + + while (test_and_set_bit_lock(MTK_RESETTING, ð->state)) + cpu_relax(); + + dev_dbg(eth->dev, "[%s][%d] mtk_stop starts\n", __func__, __LINE__); /* stop all devices to make sure that dma is properly shut down */ for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) @@ -1535,6 +1962,27 @@ static void mtk_pending_work(struct work_struct *work) mtk_stop(eth->netdev[i]); __set_bit(i, &restart); } + dev_dbg(eth->dev, "[%s][%d] mtk_stop ends\n", __func__, __LINE__); + + /* restart underlying hardware such as power, clock, pin mux + * and the connected phy + */ + mtk_hw_deinit(eth); + + if (eth->dev->pins) + pinctrl_select_state(eth->dev->pins->p, + eth->dev->pins->default_state); + mtk_hw_init(eth); + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->mac[i] || + of_phy_is_fixed_link(eth->mac[i]->of_node)) + continue; + err = phy_init_hw(eth->netdev[i]->phydev); + if (err) + dev_err(eth->dev, "%s: PHY init failed.\n", + eth->netdev[i]->name); + } /* restart DMA and enable IRQs */ for (i = 0; i < MTK_MAC_COUNT; i++) { @@ -1547,51 +1995,69 @@ static void mtk_pending_work(struct work_struct *work) dev_close(eth->netdev[i]); } } + + dev_dbg(eth->dev, "[%s][%d] reset done\n", __func__, __LINE__); + + clear_bit_unlock(MTK_RESETTING, ð->state); + rtnl_unlock(); } -static int mtk_cleanup(struct mtk_eth *eth) +static int mtk_free_dev(struct mtk_eth *eth) { int i; for (i = 0; i < MTK_MAC_COUNT; i++) { if (!eth->netdev[i]) continue; - - unregister_netdev(eth->netdev[i]); free_netdev(eth->netdev[i]); } + + return 0; +} + +static int mtk_unreg_dev(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + unregister_netdev(eth->netdev[i]); + } + + return 0; +} + +static int mtk_cleanup(struct mtk_eth *eth) +{ + mtk_unreg_dev(eth); + mtk_free_dev(eth); cancel_work_sync(ð->pending_work); return 0; } -static int mtk_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int mtk_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); - int err; + struct mtk_mac *mac = netdev_priv(ndev); - err = phy_read_status(mac->phy_dev); - if (err) - return -ENODEV; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; - return phy_ethtool_gset(mac->phy_dev, cmd); + return phy_ethtool_ksettings_get(ndev->phydev, cmd); } -static int mtk_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static int mtk_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { - struct mtk_mac *mac = netdev_priv(dev); + struct mtk_mac *mac = netdev_priv(ndev); - if (cmd->phy_address != mac->phy_dev->mdio.addr) { - mac->phy_dev = mdiobus_get_phy(mac->hw->mii_bus, - cmd->phy_address); - if (!mac->phy_dev) - return -ENODEV; - } + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; - return phy_ethtool_sset(mac->phy_dev, cmd); + return phy_ethtool_ksettings_set(ndev->phydev, cmd); } static void mtk_get_drvinfo(struct net_device *dev, @@ -1622,7 +2088,10 @@ static int mtk_nway_reset(struct net_device *dev) { struct mtk_mac *mac = netdev_priv(dev); - return genphy_restart_aneg(mac->phy_dev); + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + + return genphy_restart_aneg(dev->phydev); } static u32 mtk_get_link(struct net_device *dev) @@ -1630,11 +2099,14 @@ static u32 mtk_get_link(struct net_device *dev) struct mtk_mac *mac = netdev_priv(dev); int err; - err = genphy_update_link(mac->phy_dev); + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return -EBUSY; + + err = genphy_update_link(dev->phydev); if (err) return ethtool_op_get_link(dev); - return mac->phy_dev->link; + return dev->phydev->link; } static void mtk_get_strings(struct net_device *dev, u32 stringset, u8 *data) @@ -1670,6 +2142,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, unsigned int start; int i; + if (unlikely(test_bit(MTK_RESETTING, &mac->hw->state))) + return; + if (netif_running(dev) && netif_device_present(dev)) { if (spin_trylock(&hwstats->stats_lock)) { mtk_stats_update_mac(mac); @@ -1677,8 +2152,9 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } } + data_src = (u64 *)hwstats; + do { - data_src = (u64*)hwstats; data_dst = data; start = u64_stats_fetch_begin_irq(&hwstats->syncp); @@ -1687,9 +2163,65 @@ static void mtk_get_ethtool_stats(struct net_device *dev, } while (u64_stats_fetch_retry_irq(&hwstats->syncp, start)); } -static struct ethtool_ops mtk_ethtool_ops = { - .get_settings = mtk_get_settings, - .set_settings = mtk_set_settings, +static int mtk_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + if (dev->features & NETIF_F_LRO) { + cmd->data = MTK_MAX_RX_RING_NUM; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRLCNT: + if (dev->features & NETIF_F_LRO) { + struct mtk_mac *mac = netdev_priv(dev); + + cmd->rule_cnt = mac->hwlro_ip_cnt; + ret = 0; + } + break; + case ETHTOOL_GRXCLSRULE: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_entry(dev, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_get_fdir_all(dev, cmd, + rule_locs); + break; + default: + break; + } + + return ret; +} + +static int mtk_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_add_ipaddr(dev, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + if (dev->features & NETIF_F_LRO) + ret = mtk_hwlro_del_ipaddr(dev, cmd); + break; + default: + break; + } + + return ret; +} + +static const struct ethtool_ops mtk_ethtool_ops = { + .get_link_ksettings = mtk_get_link_ksettings, + .set_link_ksettings = mtk_set_link_ksettings, .get_drvinfo = mtk_get_drvinfo, .get_msglevel = mtk_get_msglevel, .set_msglevel = mtk_set_msglevel, @@ -1698,6 +2230,8 @@ static struct ethtool_ops mtk_ethtool_ops = { .get_strings = mtk_get_strings, .get_sset_count = mtk_get_sset_count, .get_ethtool_stats = mtk_get_ethtool_stats, + .get_rxnfc = mtk_get_rxnfc, + .set_rxnfc = mtk_set_rxnfc, }; static const struct net_device_ops mtk_netdev_ops = { @@ -1712,6 +2246,8 @@ static const struct net_device_ops mtk_netdev_ops = { .ndo_change_mtu = eth_change_mtu, .ndo_tx_timeout = mtk_tx_timeout, .ndo_get_stats64 = mtk_get_stats64, + .ndo_fix_features = mtk_fix_features, + .ndo_set_features = mtk_set_features, #ifdef CONFIG_NET_POLL_CONTROLLER .ndo_poll_controller = mtk_poll_controller, #endif @@ -1750,6 +2286,9 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw = eth; mac->of_node = np; + memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip)); + mac->hwlro_ip_cnt = 0; + mac->hw_stats = devm_kzalloc(eth->dev, sizeof(*mac->hw_stats), GFP_KERNEL); @@ -1766,21 +2305,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; + + eth->netdev[id]->hw_features = MTK_HW_FEATURES; + if (eth->hwlro) + eth->netdev[id]->hw_features |= NETIF_F_LRO; + eth->netdev[id]->vlan_features = MTK_HW_FEATURES & ~(NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX); eth->netdev[id]->features |= MTK_HW_FEATURES; eth->netdev[id]->ethtool_ops = &mtk_ethtool_ops; - err = register_netdev(eth->netdev[id]); - if (err) { - dev_err(eth->dev, "error bringing up device\n"); - goto free_netdev; - } eth->netdev[id]->irq = eth->irq[0]; - netif_info(eth, probe, eth->netdev[id], - "mediatek frame engine at 0x%08lx, irq %d\n", - eth->netdev[id]->base_addr, eth->irq[0]); - return 0; free_netdev: @@ -1827,11 +2362,7 @@ static int mtk_probe(struct platform_device *pdev) return PTR_ERR(eth->pctl); } - eth->rstc = devm_reset_control_get(&pdev->dev, "eth"); - if (IS_ERR(eth->rstc)) { - dev_err(&pdev->dev, "no eth reset found\n"); - return PTR_ERR(eth->rstc); - } + eth->hwlro = of_property_read_bool(pdev->dev.of_node, "mediatek,hwlro"); for (i = 0; i < 3; i++) { eth->irq[i] = platform_get_irq(pdev, i); @@ -1850,11 +2381,6 @@ static int mtk_probe(struct platform_device *pdev) } } - clk_prepare_enable(eth->clks[MTK_CLK_ETHIF]); - clk_prepare_enable(eth->clks[MTK_CLK_ESW]); - clk_prepare_enable(eth->clks[MTK_CLK_GP1]); - clk_prepare_enable(eth->clks[MTK_CLK_GP2]); - eth->msg_enable = netif_msg_init(mtk_msg_level, MTK_DEFAULT_MSG_ENABLE); INIT_WORK(ð->pending_work, mtk_pending_work); @@ -1872,7 +2398,35 @@ static int mtk_probe(struct platform_device *pdev) err = mtk_add_mac(eth, mac_np); if (err) - goto err_free_dev; + goto err_deinit_hw; + } + + err = devm_request_irq(eth->dev, eth->irq[1], mtk_handle_irq_tx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = devm_request_irq(eth->dev, eth->irq[2], mtk_handle_irq_rx, 0, + dev_name(eth->dev), eth); + if (err) + goto err_free_dev; + + err = mtk_mdio_init(eth); + if (err) + goto err_free_dev; + + for (i = 0; i < MTK_MAX_DEVS; i++) { + if (!eth->netdev[i]) + continue; + + err = register_netdev(eth->netdev[i]); + if (err) { + dev_err(eth->dev, "error bringing up device\n"); + goto err_deinit_mdio; + } else + netif_info(eth, probe, eth->netdev[i], + "mediatek frame engine at 0x%08lx, irq %d\n", + eth->netdev[i]->base_addr, eth->irq[0]); } /* we run 2 devices on the same DMA ring so we need a dummy device @@ -1888,8 +2442,13 @@ static int mtk_probe(struct platform_device *pdev) return 0; +err_deinit_mdio: + mtk_mdio_cleanup(eth); err_free_dev: - mtk_cleanup(eth); + mtk_free_dev(eth); +err_deinit_hw: + mtk_hw_deinit(eth); + return err; } @@ -1905,16 +2464,12 @@ static int mtk_remove(struct platform_device *pdev) mtk_stop(eth->netdev[i]); } - clk_disable_unprepare(eth->clks[MTK_CLK_ETHIF]); - clk_disable_unprepare(eth->clks[MTK_CLK_ESW]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP1]); - clk_disable_unprepare(eth->clks[MTK_CLK_GP2]); + mtk_hw_deinit(eth); netif_napi_del(ð->tx_napi); netif_napi_del(ð->rx_napi); mtk_cleanup(eth); mtk_mdio_cleanup(eth); - platform_set_drvdata(pdev, NULL); return 0; } diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 6e1ade7a25c5..30031959d6de 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -39,7 +39,21 @@ NETIF_F_SG | NETIF_F_TSO | \ NETIF_F_TSO6 | \ NETIF_F_IPV6_CSUM) -#define NEXT_RX_DESP_IDX(X) (((X) + 1) & (MTK_DMA_SIZE - 1)) +#define NEXT_RX_DESP_IDX(X, Y) (((X) + 1) & ((Y) - 1)) + +#define MTK_MAX_RX_RING_NUM 4 +#define MTK_HW_LRO_DMA_SIZE 8 + +#define MTK_MAX_LRO_RX_LENGTH (4096 * 3) +#define MTK_MAX_LRO_IP_CNT 2 +#define MTK_HW_LRO_TIMER_UNIT 1 /* 20 us */ +#define MTK_HW_LRO_REFRESH_TIME 50000 /* 1 sec. */ +#define MTK_HW_LRO_AGG_TIME 10 /* 200us */ +#define MTK_HW_LRO_AGE_TIME 50 /* 1ms */ +#define MTK_HW_LRO_MAX_AGG_CNT 64 +#define MTK_HW_LRO_BW_THRE 3000 +#define MTK_HW_LRO_REPLACE_DELTA 1000 +#define MTK_HW_LRO_SDL_REMAIN_ROOM 1522 /* Frame Engine Global Reset Register */ #define MTK_RST_GL 0x04 @@ -50,6 +64,9 @@ #define MTK_GDM1_AF BIT(28) #define MTK_GDM2_AF BIT(29) +/* PDMA HW LRO Alter Flow Timer Register */ +#define MTK_PDMA_LRO_ALT_REFRESH_TIMER 0x1c + /* Frame Engine Interrupt Grouping Register */ #define MTK_FE_INT_GRP 0x20 @@ -68,10 +85,77 @@ /* Unicast Filter MAC Address Register - High */ #define MTK_GDMA_MAC_ADRH(x) (0x50C + (x * 0x1000)) +/* PDMA RX Base Pointer Register */ +#define MTK_PRX_BASE_PTR0 0x900 +#define MTK_PRX_BASE_PTR_CFG(x) (MTK_PRX_BASE_PTR0 + (x * 0x10)) + +/* PDMA RX Maximum Count Register */ +#define MTK_PRX_MAX_CNT0 0x904 +#define MTK_PRX_MAX_CNT_CFG(x) (MTK_PRX_MAX_CNT0 + (x * 0x10)) + +/* PDMA RX CPU Pointer Register */ +#define MTK_PRX_CRX_IDX0 0x908 +#define MTK_PRX_CRX_IDX_CFG(x) (MTK_PRX_CRX_IDX0 + (x * 0x10)) + +/* PDMA HW LRO Control Registers */ +#define MTK_PDMA_LRO_CTRL_DW0 0x980 +#define MTK_LRO_EN BIT(0) +#define MTK_L3_CKS_UPD_EN BIT(7) +#define MTK_LRO_ALT_PKT_CNT_MODE BIT(21) +#define MTK_LRO_RING_RELINQUISH_REQ (0x7 << 26) +#define MTK_LRO_RING_RELINQUISH_DONE (0x7 << 29) + +#define MTK_PDMA_LRO_CTRL_DW1 0x984 +#define MTK_PDMA_LRO_CTRL_DW2 0x988 +#define MTK_PDMA_LRO_CTRL_DW3 0x98c +#define MTK_ADMA_MODE BIT(15) +#define MTK_LRO_MIN_RXD_SDL (MTK_HW_LRO_SDL_REMAIN_ROOM << 16) + +/* PDMA Global Configuration Register */ +#define MTK_PDMA_GLO_CFG 0xa04 +#define MTK_MULTI_EN BIT(10) + +/* PDMA Reset Index Register */ +#define MTK_PDMA_RST_IDX 0xa08 +#define MTK_PST_DRX_IDX0 BIT(16) +#define MTK_PST_DRX_IDX_CFG(x) (MTK_PST_DRX_IDX0 << (x)) + +/* PDMA Delay Interrupt Register */ +#define MTK_PDMA_DELAY_INT 0xa0c + +/* PDMA Interrupt Status Register */ +#define MTK_PDMA_INT_STATUS 0xa20 + +/* PDMA Interrupt Mask Register */ +#define MTK_PDMA_INT_MASK 0xa28 + +/* PDMA HW LRO Alter Flow Delta Register */ +#define MTK_PDMA_LRO_ALT_SCORE_DELTA 0xa4c + /* PDMA Interrupt grouping registers */ #define MTK_PDMA_INT_GRP1 0xa50 #define MTK_PDMA_INT_GRP2 0xa54 +/* PDMA HW LRO IP Setting Registers */ +#define MTK_LRO_RX_RING0_DIP_DW0 0xb04 +#define MTK_LRO_DIP_DW0_CFG(x) (MTK_LRO_RX_RING0_DIP_DW0 + (x * 0x40)) +#define MTK_RING_MYIP_VLD BIT(9) + +/* PDMA HW LRO Ring Control Registers */ +#define MTK_LRO_RX_RING0_CTRL_DW1 0xb28 +#define MTK_LRO_RX_RING0_CTRL_DW2 0xb2c +#define MTK_LRO_RX_RING0_CTRL_DW3 0xb30 +#define MTK_LRO_CTRL_DW1_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW1 + (x * 0x40)) +#define MTK_LRO_CTRL_DW2_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW2 + (x * 0x40)) +#define MTK_LRO_CTRL_DW3_CFG(x) (MTK_LRO_RX_RING0_CTRL_DW3 + (x * 0x40)) +#define MTK_RING_AGE_TIME_L ((MTK_HW_LRO_AGE_TIME & 0x3ff) << 22) +#define MTK_RING_AGE_TIME_H ((MTK_HW_LRO_AGE_TIME >> 10) & 0x3f) +#define MTK_RING_AUTO_LERAN_MODE (3 << 6) +#define MTK_RING_VLD BIT(8) +#define MTK_RING_MAX_AGG_TIME ((MTK_HW_LRO_AGG_TIME & 0xffff) << 10) +#define MTK_RING_MAX_AGG_CNT_L ((MTK_HW_LRO_MAX_AGG_CNT & 0x3f) << 26) +#define MTK_RING_MAX_AGG_CNT_H ((MTK_HW_LRO_MAX_AGG_CNT >> 6) & 0x3) + /* QDMA TX Queue Configuration Registers */ #define MTK_QTX_CFG(x) (0x1800 + (x * 0x10)) #define QDMA_RES_THRES 4 @@ -106,7 +190,6 @@ /* QDMA Reset Index Register */ #define MTK_QDMA_RST_IDX 0x1A08 -#define MTK_PST_DRX_IDX0 BIT(16) /* QDMA Delay Interrupt Register */ #define MTK_QDMA_DELAY_INT 0x1A0C @@ -119,13 +202,16 @@ /* QDMA Interrupt Status Register */ #define MTK_QMTK_INT_STATUS 0x1A18 +#define MTK_RX_DONE_INT3 BIT(19) +#define MTK_RX_DONE_INT2 BIT(18) #define MTK_RX_DONE_INT1 BIT(17) #define MTK_RX_DONE_INT0 BIT(16) #define MTK_TX_DONE_INT3 BIT(3) #define MTK_TX_DONE_INT2 BIT(2) #define MTK_TX_DONE_INT1 BIT(1) #define MTK_TX_DONE_INT0 BIT(0) -#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1) +#define MTK_RX_DONE_INT (MTK_RX_DONE_INT0 | MTK_RX_DONE_INT1 | \ + MTK_RX_DONE_INT2 | MTK_RX_DONE_INT3) #define MTK_TX_DONE_INT (MTK_TX_DONE_INT0 | MTK_TX_DONE_INT1 | \ MTK_TX_DONE_INT2 | MTK_TX_DONE_INT3) @@ -227,6 +313,30 @@ MAC_MCR_FORCE_TX_FC | MAC_MCR_SPEED_1000 | \ MAC_MCR_FORCE_DPX | MAC_MCR_FORCE_LINK) +/* TRGMII RXC control register */ +#define TRGMII_RCK_CTRL 0x10300 +#define DQSI0(x) ((x << 0) & GENMASK(6, 0)) +#define DQSI1(x) ((x << 8) & GENMASK(14, 8)) +#define RXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define RXC_DQSISEL BIT(30) +#define RCK_CTRL_RGMII_1000 (RXC_DQSISEL | RXCTL_DMWTLAT(2) | DQSI1(16)) +#define RCK_CTRL_RGMII_10_100 RXCTL_DMWTLAT(2) + +/* TRGMII RXC control register */ +#define TRGMII_TCK_CTRL 0x10340 +#define TXCTL_DMWTLAT(x) ((x << 16) & GENMASK(18, 16)) +#define TXC_INV BIT(30) +#define TCK_CTRL_RGMII_1000 TXCTL_DMWTLAT(2) +#define TCK_CTRL_RGMII_10_100 (TXC_INV | TXCTL_DMWTLAT(2)) + +/* TRGMII Interface mode register */ +#define INTF_MODE 0x10390 +#define TRGMII_INTF_DIS BIT(0) +#define TRGMII_MODE BIT(1) +#define TRGMII_CENTRAL_ALIGNED BIT(2) +#define INTF_MODE_RGMII_1000 (TRGMII_MODE | TRGMII_CENTRAL_ALIGNED) +#define INTF_MODE_RGMII_10_100 0 + /* GPIO port control registers for GMAC 2*/ #define GPIO_OD33_CTRL8 0x4c0 #define GPIO_BIAS_CTRL 0xed0 @@ -237,6 +347,15 @@ #define SYSCFG0_GE_MASK 0x3 #define SYSCFG0_GE_MODE(x, y) (x << (12 + (y * 2))) +/* ethernet subsystem clock register */ +#define ETHSYS_CLKCFG0 0x2c +#define ETHSYS_TRGMII_CLK_SEL362_5 BIT(11) + +/* ethernet reset control register */ +#define ETHSYS_RSTCTRL 0x34 +#define RSTCTRL_FE BIT(6) +#define RSTCTRL_PPE BIT(31) + struct mtk_rx_dma { unsigned int rxd1; unsigned int rxd2; @@ -298,9 +417,15 @@ enum mtk_clks_map { MTK_CLK_ESW, MTK_CLK_GP1, MTK_CLK_GP2, + MTK_CLK_TRGPLL, MTK_CLK_MAX }; +enum mtk_dev_state { + MTK_HW_INIT, + MTK_RESETTING +}; + /* struct mtk_tx_buf - This struct holds the pointers to the memory pointed at * by the TX descriptor s * @skb: The SKB pointer of the packet being sent @@ -338,6 +463,12 @@ struct mtk_tx_ring { atomic_t free_count; }; +/* PDMA rx ring mode */ +enum mtk_rx_flags { + MTK_RX_FLAGS_NORMAL = 0, + MTK_RX_FLAGS_HWLRO, +}; + /* struct mtk_rx_ring - This struct holds info describing a RX ring * @dma: The descriptor ring * @data: The memory pointed at by the ring @@ -352,7 +483,10 @@ struct mtk_rx_ring { dma_addr_t phys; u16 frag_size; u16 buf_size; + u16 dma_size; + bool calc_idx_update; u16 calc_idx; + u32 crx_idx_reg; }; /* currently no SoC has more than 2 macs */ @@ -384,12 +518,12 @@ struct mtk_rx_ring { * @clks: clock array for all clocks required * @mii_bus: If there is a bus we need to create an instance for it * @pending_work: The workqueue used to reset the dma ring + * @state Initialization and runtime state of the device. */ struct mtk_eth { struct device *dev; void __iomem *base; - struct reset_control *rstc; spinlock_t page_lock; spinlock_t irq_lock; struct net_device dummy_dev; @@ -400,9 +534,10 @@ struct mtk_eth { unsigned long sysclk; struct regmap *ethsys; struct regmap *pctl; + bool hwlro; atomic_t dma_refcnt; struct mtk_tx_ring tx_ring; - struct mtk_rx_ring rx_ring; + struct mtk_rx_ring rx_ring[MTK_MAX_RX_RING_NUM]; struct napi_struct tx_napi; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; @@ -412,22 +547,28 @@ struct mtk_eth { struct mii_bus *mii_bus; struct work_struct pending_work; + unsigned long state; }; /* struct mtk_mac - the structure that holds the info about the MACs of the * SoC * @id: The number of the MAC + * @ge_mode: Interface mode kept for setup restoring * @of_node: Our devicetree node * @hw: Backpointer to our main datastruture * @hw_stats: Packet statistics counter - * @phy_dev: The attached PHY if available + * @trgmii Indicate if the MAC uses TRGMII connected to internal + switch */ struct mtk_mac { int id; + int ge_mode; struct device_node *of_node; struct mtk_eth *hw; struct mtk_hw_stats *hw_stats; - struct phy_device *phy_dev; + __be32 hwlro_ip[MTK_MAX_LRO_IP_CNT]; + int hwlro_ip_cnt; + bool trgmii; }; /* the struct describing the SoC. these are declared in the soc_xyz.c files */ diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index f04a423ff79d..b1cef7a0f7ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -785,17 +785,23 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param, return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO); if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) { + int ret; + if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) return mlx4_internal_err_ret_value(dev, op, op_modifier); + down_read(&mlx4_priv(dev)->cmd.switch_sem); if (mlx4_priv(dev)->cmd.use_events) - return mlx4_cmd_wait(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_wait(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); else - return mlx4_cmd_poll(dev, in_param, out_param, - out_is_imm, in_modifier, - op_modifier, op, timeout); + ret = mlx4_cmd_poll(dev, in_param, out_param, + out_is_imm, in_modifier, + op_modifier, op, timeout); + + up_read(&mlx4_priv(dev)->cmd.switch_sem); + return ret; } return mlx4_slave_cmd(dev, in_param, out_param, out_is_imm, in_modifier, op_modifier, op, timeout); @@ -1845,6 +1851,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, if (vp_oper->state.default_vlan == vp_admin->default_vlan && vp_oper->state.default_qos == vp_admin->default_qos && + vp_oper->state.vlan_proto == vp_admin->vlan_proto && vp_oper->state.link_state == vp_admin->link_state && vp_oper->state.qos_vport == vp_admin->qos_vport) return 0; @@ -1903,6 +1910,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, vp_oper->state.default_vlan = vp_admin->default_vlan; vp_oper->state.default_qos = vp_admin->default_qos; + vp_oper->state.vlan_proto = vp_admin->vlan_proto; vp_oper->state.link_state = vp_admin->link_state; vp_oper->state.qos_vport = vp_admin->qos_vport; @@ -1916,6 +1924,7 @@ static int mlx4_master_immediate_activate_vlan_qos(struct mlx4_priv *priv, work->qos_vport = vp_oper->state.qos_vport; work->vlan_id = vp_oper->state.default_vlan; work->vlan_ix = vp_oper->vlan_idx; + work->vlan_proto = vp_oper->state.vlan_proto; work->priv = priv; INIT_WORK(&work->work, mlx4_vf_immed_vlan_work_handler); queue_work(priv->mfunc.master.comm_wq, &work->work); @@ -1986,6 +1995,8 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) int port, err; struct mlx4_vport_state *vp_admin; struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; struct mlx4_active_ports actv_ports = mlx4_get_active_ports( &priv->dev, slave); int min_port = find_first_bit(actv_ports.ports, @@ -2000,12 +2011,26 @@ static int mlx4_master_activate_admin_state(struct mlx4_priv *priv, int slave) priv->mfunc.master.vf_admin[slave].enable_smi[port]; vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; - vp_oper->state = *vp_admin; + if (vp_admin->vlan_proto != htons(ETH_P_8021AD) || + slave_state->vst_qinq_supported) { + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + } + vp_oper->state.link_state = vp_admin->link_state; + vp_oper->state.mac = vp_admin->mac; + vp_oper->state.spoofchk = vp_admin->spoofchk; + vp_oper->state.tx_rate = vp_admin->tx_rate; + vp_oper->state.qos_vport = vp_admin->qos_vport; + vp_oper->state.guid = vp_admin->guid; + if (MLX4_VGT != vp_admin->default_vlan) { err = __mlx4_register_vlan(&priv->dev, port, vp_admin->default_vlan, &(vp_oper->vlan_idx)); if (err) { vp_oper->vlan_idx = NO_INDX; + vp_oper->state.default_vlan = MLX4_VGT; + vp_oper->state.vlan_proto = htons(ETH_P_8021Q); mlx4_warn(&priv->dev, "No vlan resources slave %d, port %d\n", slave, port); @@ -2086,6 +2111,7 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd, mlx4_warn(dev, "Received reset from slave:%d\n", slave); slave_state[slave].active = false; slave_state[slave].old_vlan_api = false; + slave_state[slave].vst_qinq_supported = false; mlx4_master_deactivate_admin_state(priv, slave); for (i = 0; i < MLX4_EVENT_TYPES_NUM; ++i) { slave_state[slave].event_eq[i].eqn = -1; @@ -2353,6 +2379,7 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) vf_oper = &priv->mfunc.master.vf_oper[i]; s_state = &priv->mfunc.master.slave_state[i]; s_state->last_cmd = MLX4_COMM_CMD_RESET; + s_state->vst_qinq_supported = false; mutex_init(&priv->mfunc.master.gen_eqe_mutex[i]); for (j = 0; j < MLX4_EVENT_TYPES_NUM; ++j) s_state->event_eq[j].eqn = -1; @@ -2382,6 +2409,8 @@ int mlx4_multi_func_init(struct mlx4_dev *dev) admin_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; oper_vport->qos_vport = MLX4_VPP_DEFAULT_VPORT; + admin_vport->vlan_proto = htons(ETH_P_8021Q); + oper_vport->vlan_proto = htons(ETH_P_8021Q); vf_oper->vport[port].vlan_idx = NO_INDX; vf_oper->vport[port].mac_idx = NO_INDX; mlx4_set_random_admin_guid(dev, i, port); @@ -2454,6 +2483,7 @@ int mlx4_cmd_init(struct mlx4_dev *dev) int flags = 0; if (!priv->cmd.initialized) { + init_rwsem(&priv->cmd.switch_sem); mutex_init(&priv->cmd.slave_cmd_mutex); sema_init(&priv->cmd.poll_sem, 1); priv->cmd.use_events = 0; @@ -2583,6 +2613,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; priv->cmd.context[i].next = i + 1; @@ -2606,6 +2637,7 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; + up_write(&priv->cmd.switch_sem); return err; } @@ -2618,6 +2650,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; for (i = 0; i < priv->cmd.max_cmds; ++i) @@ -2626,6 +2659,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) kfree(priv->cmd.context); up(&priv->cmd.poll_sem); + up_write(&priv->cmd.switch_sem); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) @@ -2937,10 +2971,13 @@ int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac) EXPORT_SYMBOL_GPL(mlx4_set_vf_mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos, + __be16 proto) { struct mlx4_priv *priv = mlx4_priv(dev); struct mlx4_vport_state *vf_admin; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_oper_state *vf_oper; int slave; if ((!mlx4_is_master(dev)) || @@ -2950,12 +2987,31 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) if ((vlan > 4095) || (qos > 7)) return -EINVAL; + if (proto == htons(ETH_P_8021AD) && + !(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP)) + return -EPROTONOSUPPORT; + + if (proto != htons(ETH_P_8021Q) && + proto != htons(ETH_P_8021AD)) + return -EINVAL; + + if ((proto == htons(ETH_P_8021AD)) && + ((vlan == 0) || (vlan == MLX4_VGT))) + return -EINVAL; + slave = mlx4_get_slave_indx(dev, vf); if (slave < 0) return -EINVAL; + slave_state = &priv->mfunc.master.slave_state[slave]; + if ((proto == htons(ETH_P_8021AD)) && (slave_state->active) && + (!slave_state->vst_qinq_supported)) { + mlx4_err(dev, "vf %d does not support VST QinQ mode\n", vf); + return -EPROTONOSUPPORT; + } port = mlx4_slaves_closest_port(dev, slave, port); vf_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + vf_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; if (!mlx4_valid_vf_state_change(dev, port, vf_admin, vlan, qos)) return -EPERM; @@ -2965,6 +3021,7 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) else vf_admin->default_vlan = vlan; vf_admin->default_qos = qos; + vf_admin->vlan_proto = proto; /* If rate was configured prior to VST, we saved the configured rate * in vf_admin->rate and now, if priority supported we enforce the QoS @@ -2973,7 +3030,12 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos) vf_admin->tx_rate) vf_admin->qos_vport = slave; - if (mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) + /* Try to activate new vf state without restart, + * this option is not supported while moving to VST QinQ mode. + */ + if ((proto == htons(ETH_P_8021AD) && + vf_oper->state.vlan_proto != proto) || + mlx4_master_immediate_activate_vlan_qos(priv, slave, port)) mlx4_info(dev, "updating vf %d port %d config will take effect on next VF restart\n", vf, port); @@ -3117,6 +3179,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in ivf->vlan = s_info->default_vlan; ivf->qos = s_info->default_qos; + ivf->vlan_proto = s_info->vlan_proto; if (mlx4_is_vf_vst_and_prio_qos(dev, port, s_info)) ivf->max_tx_rate = s_info->tx_rate; diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c index 1494997c4f7e..08fc5fc56d43 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c @@ -298,7 +298,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev) if (IS_ERR(mdev->ptp_clock)) { mdev->ptp_clock = NULL; mlx4_err(mdev, "ptp_clock_register failed\n"); - } else { + } else if (mdev->ptp_clock) { mlx4_info(mdev, "registered PHC clock\n"); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index fedb829276f4..7e703bed7b82 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2400,12 +2400,14 @@ static int mlx4_en_set_vf_mac(struct net_device *dev, int queue, u8 *mac) return mlx4_set_vf_mac(mdev->dev, en_priv->port, queue, mac_u64); } -static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx4_en_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx4_en_priv *en_priv = netdev_priv(dev); struct mlx4_en_dev *mdev = en_priv->mdev; - return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos); + return mlx4_set_vf_vlan(mdev->dev, en_priv->port, vf, vlan, qos, + vlan_proto); } static int mlx4_en_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate, @@ -2643,12 +2645,16 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (IS_ERR(prog)) return PTR_ERR(prog); } + mutex_lock(&mdev->state_lock); for (i = 0; i < priv->rx_ring_num; i++) { - /* This xchg is paired with READ_ONCE in the fastpath */ - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } + mutex_unlock(&mdev->state_lock); return 0; } @@ -2681,7 +2687,10 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) priv->xdp_ring_num); for (i = 0; i < priv->rx_ring_num; i++) { - old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + old_prog = rcu_dereference_protected( + priv->rx_ring[i]->xdp_prog, + lockdep_is_held(&mdev->state_lock)); + rcu_assign_pointer(priv->rx_ring[i]->xdp_prog, prog); if (old_prog) bpf_prog_put(old_prog); } @@ -3217,6 +3226,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, } if (mlx4_is_slave(mdev->dev)) { + bool vlan_offload_disabled; int phv; err = get_phv_bit(mdev->dev, port, &phv); @@ -3224,6 +3234,18 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, dev->hw_features |= NETIF_F_HW_VLAN_STAG_TX; priv->pflags |= MLX4_EN_PRIV_FLAGS_PHV; } + err = mlx4_get_is_vlan_offload_disabled(mdev->dev, port, + &vlan_offload_disabled); + if (!err && vlan_offload_disabled) { + dev->hw_features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + dev->features &= ~(NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_STAG_TX | + NETIF_F_HW_VLAN_STAG_RX); + } } else { if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PHV_EN && !(mdev->dev->caps.flags2 & diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index 2040dad8611d..f2e8beddcf44 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -72,7 +72,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, frag_info->dma_dir); - if (dma_mapping_error(priv->ddev, dma)) { + if (unlikely(dma_mapping_error(priv->ddev, dma))) { put_page(page); return -ENOMEM; } @@ -108,7 +108,8 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv, ring_alloc[i].page_size) continue; - if (mlx4_alloc_pages(priv, &page_alloc[i], frag_info, gfp)) + if (unlikely(mlx4_alloc_pages(priv, &page_alloc[i], + frag_info, gfp))) goto out; } @@ -537,7 +538,9 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring = *pring; struct bpf_prog *old_prog; - old_prog = READ_ONCE(ring->xdp_prog); + old_prog = rcu_dereference_protected( + ring->xdp_prog, + lockdep_is_held(&mdev->state_lock)); if (old_prog) bpf_prog_put(old_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); @@ -583,7 +586,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv, frag_info = &priv->frag_info[nr]; if (length <= frag_info->frag_prefix_size) break; - if (!frags[nr].page) + if (unlikely(!frags[nr].page)) goto fail; dma = be64_to_cpu(rx_desc->data[nr].addr); @@ -623,7 +626,7 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv, dma_addr_t dma; skb = netdev_alloc_skb(priv->dev, SMALL_PACKET_SIZE + NET_IP_ALIGN); - if (!skb) { + if (unlikely(!skb)) { en_dbg(RX_ERR, priv, "Failed allocating skb\n"); return NULL; } @@ -734,7 +737,8 @@ static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb, { __wsum csum_pseudo_hdr = 0; - if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS) + if (unlikely(ipv6h->nexthdr == IPPROTO_FRAGMENT || + ipv6h->nexthdr == IPPROTO_HOPOPTS)) return -1; hw_checksum = csum_add(hw_checksum, (__force __wsum)htons(ipv6h->nexthdr)); @@ -767,7 +771,7 @@ static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va, get_fixed_ipv4_csum(hw_checksum, skb, hdr); #if IS_ENABLED(CONFIG_IPV6) else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6)) - if (get_fixed_ipv6_csum(hw_checksum, skb, hdr)) + if (unlikely(get_fixed_ipv6_csum(hw_checksum, skb, hdr))) return -1; #endif return 0; @@ -794,13 +798,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud u64 timestamp; bool l2_tunnel; - if (!priv->port_up) + if (unlikely(!priv->port_up)) return 0; - if (budget <= 0) + if (unlikely(budget <= 0)) return polled; - xdp_prog = READ_ONCE(ring->xdp_prog); + /* Protect accesses to: ring->xdp_prog, priv->mac_hash list */ + rcu_read_lock(); + xdp_prog = rcu_dereference(ring->xdp_prog); doorbell_pending = 0; tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; @@ -858,15 +864,11 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* Drop the packet, since HW loopback-ed it */ mac_hash = ethh->h_source[MLX4_EN_MAC_HASH_IDX]; bucket = &priv->mac_hash[mac_hash]; - rcu_read_lock(); hlist_for_each_entry_rcu(entry, bucket, hlist) { if (ether_addr_equal_64bits(entry->mac, - ethh->h_source)) { - rcu_read_unlock(); + ethh->h_source)) goto next; - } } - rcu_read_unlock(); } } @@ -902,16 +904,17 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud case XDP_PASS: break; case XDP_TX: - if (!mlx4_en_xmit_frame(frags, dev, + if (likely(!mlx4_en_xmit_frame(frags, dev, length, tx_index, - &doorbell_pending)) + &doorbell_pending))) goto consumed; - break; + goto xdp_drop; /* Drop on xmit failure */ default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: case XDP_DROP: - if (mlx4_en_rx_recycle(ring, frags)) +xdp_drop: + if (likely(mlx4_en_rx_recycle(ring, frags))) goto consumed; goto next; } @@ -1015,12 +1018,12 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud /* GRO not possible, complete processing here */ skb = mlx4_en_rx_skb(priv, rx_desc, frags, length); - if (!skb) { + if (unlikely(!skb)) { ring->dropped++; goto next; } - if (unlikely(priv->validate_loopback)) { + if (unlikely(priv->validate_loopback)) { validate_loopback(priv, skb); goto next; } @@ -1077,6 +1080,7 @@ consumed: } out: + rcu_read_unlock(); if (doorbell_pending) mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c index d728704d0c7b..f9cbc67f1694 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.c +++ b/drivers/net/ethernet/mellanox/mlx4/fw.c @@ -158,7 +158,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags) [31] = "Modifying loopback source checks using UPDATE_QP support", [32] = "Loopback source checks support", [33] = "RoCEv2 support", - [34] = "DMFS Sniffer support (UC & MC)" + [34] = "DMFS Sniffer support (UC & MC)", + [35] = "QinQ VST mode support", }; int i; @@ -248,6 +249,72 @@ out: return err; } +static int mlx4_activate_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + + if (vp_admin->default_vlan != vp_oper->state.default_vlan) { + err = __mlx4_register_vlan(&priv->dev, port, + vp_admin->default_vlan, + &vp_oper->vlan_idx); + if (err) { + vp_oper->vlan_idx = NO_INDX; + mlx4_warn(&priv->dev, + "No vlan resources slave %d, port %d\n", + slave, port); + return err; + } + mlx4_dbg(&priv->dev, "alloc vlan %d idx %d slave %d port %d\n", + (int)(vp_oper->state.default_vlan), + vp_oper->vlan_idx, slave, port); + } + vp_oper->state.vlan_proto = vp_admin->vlan_proto; + vp_oper->state.default_vlan = vp_admin->default_vlan; + vp_oper->state.default_qos = vp_admin->default_qos; + + return 0; +} + +static int mlx4_handle_vst_qinq(struct mlx4_priv *priv, int slave, int port) +{ + struct mlx4_vport_oper_state *vp_oper; + struct mlx4_slave_state *slave_state; + struct mlx4_vport_state *vp_admin; + int err; + + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + vp_admin = &priv->mfunc.master.vf_admin[slave].vport[port]; + slave_state = &priv->mfunc.master.slave_state[slave]; + + if ((vp_admin->vlan_proto != htons(ETH_P_8021AD)) || + (!slave_state->active)) + return 0; + + if (vp_oper->state.vlan_proto == vp_admin->vlan_proto && + vp_oper->state.default_vlan == vp_admin->default_vlan && + vp_oper->state.default_qos == vp_admin->default_qos) + return 0; + + if (!slave_state->vst_qinq_supported) { + /* Warn and revert the request to set vst QinQ mode */ + vp_admin->vlan_proto = vp_oper->state.vlan_proto; + vp_admin->default_vlan = vp_oper->state.default_vlan; + vp_admin->default_qos = vp_oper->state.default_qos; + + mlx4_warn(&priv->dev, + "Slave %d does not support VST QinQ mode\n", slave); + return 0; + } + + err = mlx4_activate_vst_qinq(priv, slave, port); + return err; +} + int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, struct mlx4_vhcr *vhcr, struct mlx4_cmd_mailbox *inbox, @@ -311,14 +378,18 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, #define QUERY_FUNC_CAP_VF_ENABLE_QP0 0x08 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80 -#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31) #define QUERY_FUNC_CAP_PHV_BIT 0x40 +#define QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE 0x20 + +#define QUERY_FUNC_CAP_SUPPORTS_VST_QINQ BIT(30) +#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS BIT(31) if (vhcr->op_modifier == 1) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); int converted_port = mlx4_slave_convert_port( dev, slave, vhcr->in_modifier); + struct mlx4_vport_oper_state *vp_oper; if (converted_port < 0) return -EINVAL; @@ -357,15 +428,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, MLX4_PUT(outbox->buf, dev->caps.phys_port_id[vhcr->in_modifier], QUERY_FUNC_CAP_PHYS_PORT_ID); - if (dev->caps.phv_bit[port]) { - field = QUERY_FUNC_CAP_PHV_BIT; - MLX4_PUT(outbox->buf, field, - QUERY_FUNC_CAP_FLAGS0_OFFSET); - } + vp_oper = &priv->mfunc.master.vf_oper[slave].vport[port]; + err = mlx4_handle_vst_qinq(priv, slave, port); + if (err) + return err; + + field = 0; + if (dev->caps.phv_bit[port]) + field |= QUERY_FUNC_CAP_PHV_BIT; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + field |= QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE; + MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS0_OFFSET); } else if (vhcr->op_modifier == 0) { struct mlx4_active_ports actv_ports = mlx4_get_active_ports(dev, slave); + struct mlx4_slave_state *slave_state = + &priv->mfunc.master.slave_state[slave]; + /* enable rdma and ethernet interfaces, new quota locations, * and reserved lkey */ @@ -439,6 +519,10 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave, size = dev->caps.reserved_lkey + ((slave << 8) & 0xFF00); MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_QP_RESD_LKEY_OFFSET); + + if (vhcr->in_modifier & QUERY_FUNC_CAP_SUPPORTS_VST_QINQ) + slave_state->vst_qinq_supported = true; + } else err = -EINVAL; @@ -454,10 +538,12 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, u32 size, qkey; int err = 0, quotas = 0; u32 in_modifier; + u32 slave_caps; op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */ - in_modifier = op_modifier ? gen_or_port : + slave_caps = QUERY_FUNC_CAP_SUPPORTS_VST_QINQ | QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS; + in_modifier = op_modifier ? gen_or_port : slave_caps; mailbox = mlx4_alloc_cmd_mailbox(dev); if (IS_ERR(mailbox)) @@ -612,8 +698,7 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port, MLX4_GET(func_cap->phys_port_id, outbox, QUERY_FUNC_CAP_PHYS_PORT_ID); - MLX4_GET(field, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); - func_cap->flags |= (field & QUERY_FUNC_CAP_PHV_BIT); + MLX4_GET(func_cap->flags0, outbox, QUERY_FUNC_CAP_FLAGS0_OFFSET); /* All other resources are allocated by the master, but we still report * 'num' and 'reserved' capabilities as follows: @@ -690,6 +775,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) #define QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET 0x52 #define QUERY_DEV_CAP_MAX_SG_RQ_OFFSET 0x55 #define QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET 0x56 +#define QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET 0x5D #define QUERY_DEV_CAP_MAX_QP_MCG_OFFSET 0x61 #define QUERY_DEV_CAP_RSVD_MCG_OFFSET 0x62 #define QUERY_DEV_CAP_MAX_MCG_OFFSET 0x63 @@ -767,12 +853,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) dev_cap->max_eqs = 1 << (field & 0xf); MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MTT_OFFSET); dev_cap->reserved_mtts = 1 << (field >> 4); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET); - dev_cap->max_mrw_sz = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MRW_OFFSET); dev_cap->reserved_mrws = 1 << (field & 0xf); - MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET); - dev_cap->max_mtt_seg = 1 << (field & 0x3f); MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET); dev_cap->num_sys_eqs = size & 0xfff; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET); @@ -857,6 +939,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap) MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_SQ_OFFSET); dev_cap->max_sq_desc_sz = size; + MLX4_GET(field, outbox, QUERY_DEV_CAP_SVLAN_BY_QP_OFFSET); + if (field & 0x1) + dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP; MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_QP_MCG_OFFSET); dev_cap->max_qp_per_mcg = 1 << field; MLX4_GET(field, outbox, QUERY_DEV_CAP_RSVD_MCG_OFFSET); @@ -2914,7 +2999,7 @@ int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv) memset(&func_cap, 0, sizeof(func_cap)); err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); if (!err) - *phv = func_cap.flags & QUERY_FUNC_CAP_PHV_BIT; + *phv = func_cap.flags0 & QUERY_FUNC_CAP_PHV_BIT; return err; } EXPORT_SYMBOL(get_phv_bit); @@ -2938,6 +3023,22 @@ int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val) } EXPORT_SYMBOL(set_phv_bit); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled) +{ + struct mlx4_func_cap func_cap; + int err; + + memset(&func_cap, 0, sizeof(func_cap)); + err = mlx4_QUERY_FUNC_CAP(dev, port, &func_cap); + if (!err) + *vlan_offload_disabled = + !!(func_cap.flags0 & + QUERY_FUNC_CAP_VLAN_OFFLOAD_DISABLE); + return err; +} +EXPORT_SYMBOL(mlx4_get_is_vlan_offload_disabled); + void mlx4_replace_zero_macs(struct mlx4_dev *dev) { int i; diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h index cdbd76f10ced..5343a0599253 100644 --- a/drivers/net/ethernet/mellanox/mlx4/fw.h +++ b/drivers/net/ethernet/mellanox/mlx4/fw.h @@ -80,9 +80,7 @@ struct mlx4_dev_cap { int max_eqs; int num_sys_eqs; int reserved_mtts; - int max_mrw_sz; int reserved_mrws; - int max_mtt_seg; int max_requester_per_qp; int max_responder_per_qp; int max_rdma_global; @@ -152,7 +150,7 @@ struct mlx4_func_cap { u32 qp1_proxy_qpn; u32 reserved_lkey; u8 physical_port; - u8 port_flags; + u8 flags0; u8 flags1; u64 phys_port_id; u32 extra_flags; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h index c9d7fc5159f2..e4878f31e45d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include @@ -482,6 +483,7 @@ struct mlx4_slave_state { u8 init_port_mask; bool active; bool old_vlan_api; + bool vst_qinq_supported; u8 function; dma_addr_t vhcr_dma; u16 mtu[MLX4_MAX_PORTS + 1]; @@ -507,6 +509,7 @@ struct mlx4_vport_state { u64 mac; u16 default_vlan; u8 default_qos; + __be16 vlan_proto; u32 tx_rate; bool spoofchk; u32 link_state; @@ -627,6 +630,7 @@ struct mlx4_cmd { struct mutex slave_cmd_mutex; struct semaphore poll_sem; struct semaphore event_sem; + struct rw_semaphore switch_sem; int max_cmds; spinlock_t context_lock; int free_head; @@ -655,6 +659,7 @@ struct mlx4_vf_immed_vlan_work { u8 qos_vport; u16 vlan_id; u16 orig_vlan_id; + __be16 vlan_proto; }; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index 9099dbd04951..a3528dd1e72e 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -340,7 +340,7 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; - struct bpf_prog *xdp_prog; + struct bpf_prog __rcu *xdp_prog; struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index 8b81114bdc72..84d7857ccc27 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -790,10 +790,22 @@ static int update_vport_qp_param(struct mlx4_dev *dev, MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; } else if (0 != vp_oper->state.default_vlan) { - qpc->pri_path.vlan_control |= - MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | - MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) { + /* vst QinQ should block untagged on TX, + * but cvlan is in payload and phv is set so + * hw see it as untagged. Block tagged instead. + */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } else { /* vst 802.1Q */ + qpc->pri_path.vlan_control |= + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + } } else { /* priority tagged */ qpc->pri_path.vlan_control |= MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | @@ -802,7 +814,11 @@ static int update_vport_qp_param(struct mlx4_dev *dev, qpc->pri_path.fvl_rx |= MLX4_FVL_RX_FORCE_ETH_VLAN; qpc->pri_path.vlan_index = vp_oper->vlan_idx; - qpc->pri_path.fl |= MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qpc->pri_path.fl |= MLX4_FL_ETH_HIDE_CQE_VLAN; + if (vp_oper->state.vlan_proto == htons(ETH_P_8021AD)) + qpc->pri_path.fl |= MLX4_FL_SV; + else + qpc->pri_path.fl |= MLX4_FL_CV; qpc->pri_path.feup |= MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; qpc->pri_path.sched_queue &= 0xC7; qpc->pri_path.sched_queue |= (vp_oper->state.default_qos) << 3; @@ -5238,6 +5254,7 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) u64 qp_path_mask = ((1ULL << MLX4_UPD_QP_PATH_MASK_VLAN_INDEX) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL) | (1ULL << MLX4_UPD_QP_PATH_MASK_CV) | + (1ULL << MLX4_UPD_QP_PATH_MASK_SV) | (1ULL << MLX4_UPD_QP_PATH_MASK_ETH_HIDE_CQE_VLAN) | (1ULL << MLX4_UPD_QP_PATH_MASK_FEUP) | (1ULL << MLX4_UPD_QP_PATH_MASK_FVL_RX) | @@ -5266,7 +5283,12 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) else if (!work->vlan_id) vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_TAGGED; - else + else if (work->vlan_proto == htons(ETH_P_8021AD)) + vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | + MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; + else /* vst 802.1Q */ vlan_control = MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_PRIO_TAGGED | MLX4_VLAN_CTRL_ETH_RX_BLOCK_UNTAGGED; @@ -5311,7 +5333,11 @@ void mlx4_vf_immed_vlan_work_handler(struct work_struct *_work) upd_context->qp_context.pri_path.fvl_rx = qp->fvl_rx | MLX4_FVL_RX_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.fl = - qp->pri_path_fl | MLX4_FL_CV | MLX4_FL_ETH_HIDE_CQE_VLAN; + qp->pri_path_fl | MLX4_FL_ETH_HIDE_CQE_VLAN; + if (work->vlan_proto == htons(ETH_P_8021AD)) + upd_context->qp_context.pri_path.fl |= MLX4_FL_SV; + else + upd_context->qp_context.pri_path.fl |= MLX4_FL_CV; upd_context->qp_context.pri_path.feup = qp->feup | MLX4_FEUP_FORCE_ETH_UP | MLX4_FVL_FORCE_ETH_VLAN; upd_context->qp_context.pri_path.sched_queue = diff --git a/drivers/net/ethernet/mellanox/mlx4/srq.c b/drivers/net/ethernet/mellanox/mlx4/srq.c index 67146624eb58..f44d089e2ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/srq.c +++ b/drivers/net/ethernet/mellanox/mlx4/srq.c @@ -45,15 +45,12 @@ void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type) struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - spin_lock(&srq_table->lock); - + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); + rcu_read_unlock(); if (srq) atomic_inc(&srq->refcount); - - spin_unlock(&srq_table->lock); - - if (!srq) { + else { mlx4_warn(dev, "Async event for bogus SRQ %08x\n", srqn); return; } @@ -301,12 +298,11 @@ struct mlx4_srq *mlx4_srq_lookup(struct mlx4_dev *dev, u32 srqn) { struct mlx4_srq_table *srq_table = &mlx4_priv(dev)->srq_table; struct mlx4_srq *srq; - unsigned long flags; - spin_lock_irqsave(&srq_table->lock, flags); + rcu_read_lock(); srq = radix_tree_lookup(&srq_table->tree, srqn & (dev->caps.num_srqs - 1)); - spin_unlock_irqrestore(&srq_table->lock, flags); + rcu_read_unlock(); return srq; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 05cc1effc13c..0343725d7f44 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o srq.o alloc.o qp.o port.o mr.o pd.o \ mad.o transobj.o vport.o sriov.o fs_cmd.o fs_core.o \ - fs_counters.o rl.o + fs_counters.o rl.o lag.o dev.o mlx5_core-$(CONFIG_MLX5_CORE_EN) += wq.o eswitch.o eswitch_offloads.o \ en_main.o en_common.o en_fs.o en_ethtool.o en_tx.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index c2ec01a22d55..1e639f886021 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -294,11 +294,13 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_DEALLOC_Q_COUNTER: case MLX5_CMD_OP_DEALLOC_PD: case MLX5_CMD_OP_DEALLOC_UAR: - case MLX5_CMD_OP_DETTACH_FROM_MCG: + case MLX5_CMD_OP_DETACH_FROM_MCG: case MLX5_CMD_OP_DEALLOC_XRCD: case MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN: case MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT: case MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY: + case MLX5_CMD_OP_DESTROY_LAG: + case MLX5_CMD_OP_DESTROY_VPORT_LAG: case MLX5_CMD_OP_DESTROY_TIR: case MLX5_CMD_OP_DESTROY_SQ: case MLX5_CMD_OP_DESTROY_RQ: @@ -315,6 +317,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_MODIFY_FLOW_TABLE: case MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_SET_FLOW_TABLE_ROOT: + case MLX5_CMD_OP_DEALLOC_ENCAP_HEADER: return MLX5_CMD_STAT_OK; case MLX5_CMD_OP_QUERY_HCA_CAP: @@ -389,6 +392,10 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT: case MLX5_CMD_OP_SET_L2_TABLE_ENTRY: case MLX5_CMD_OP_QUERY_L2_TABLE_ENTRY: + case MLX5_CMD_OP_CREATE_LAG: + case MLX5_CMD_OP_MODIFY_LAG: + case MLX5_CMD_OP_QUERY_LAG: + case MLX5_CMD_OP_CREATE_VPORT_LAG: case MLX5_CMD_OP_CREATE_TIR: case MLX5_CMD_OP_MODIFY_TIR: case MLX5_CMD_OP_QUERY_TIR: @@ -416,6 +423,7 @@ static int mlx5_internal_err_ret_value(struct mlx5_core_dev *dev, u16 op, case MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY: case MLX5_CMD_OP_ALLOC_FLOW_COUNTER: case MLX5_CMD_OP_QUERY_FLOW_COUNTER: + case MLX5_CMD_OP_ALLOC_ENCAP_HEADER: *status = MLX5_DRIVER_STATUS_ABORTED; *synd = MLX5_DRIVER_SYND; return -EIO; @@ -504,7 +512,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(CONFIG_INT_MODERATION); MLX5_COMMAND_STR_CASE(ACCESS_REG); MLX5_COMMAND_STR_CASE(ATTACH_TO_MCG); - MLX5_COMMAND_STR_CASE(DETTACH_FROM_MCG); + MLX5_COMMAND_STR_CASE(DETACH_FROM_MCG); MLX5_COMMAND_STR_CASE(GET_DROPPED_PACKET_LOG); MLX5_COMMAND_STR_CASE(MAD_IFC); MLX5_COMMAND_STR_CASE(QUERY_MAD_DEMUX); @@ -526,6 +534,12 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DELETE_L2_TABLE_ENTRY); MLX5_COMMAND_STR_CASE(SET_WOL_ROL); MLX5_COMMAND_STR_CASE(QUERY_WOL_ROL); + MLX5_COMMAND_STR_CASE(CREATE_LAG); + MLX5_COMMAND_STR_CASE(MODIFY_LAG); + MLX5_COMMAND_STR_CASE(QUERY_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_LAG); + MLX5_COMMAND_STR_CASE(CREATE_VPORT_LAG); + MLX5_COMMAND_STR_CASE(DESTROY_VPORT_LAG); MLX5_COMMAND_STR_CASE(CREATE_TIR); MLX5_COMMAND_STR_CASE(MODIFY_TIR); MLX5_COMMAND_STR_CASE(DESTROY_TIR); @@ -564,15 +578,130 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); + MLX5_COMMAND_STR_CASE(ALLOC_ENCAP_HEADER); + MLX5_COMMAND_STR_CASE(DEALLOC_ENCAP_HEADER); default: return "unknown command opcode"; } } +static const char *cmd_status_str(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: + return "OK"; + case MLX5_CMD_STAT_INT_ERR: + return "internal error"; + case MLX5_CMD_STAT_BAD_OP_ERR: + return "bad operation"; + case MLX5_CMD_STAT_BAD_PARAM_ERR: + return "bad parameter"; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: + return "bad system state"; + case MLX5_CMD_STAT_BAD_RES_ERR: + return "bad resource"; + case MLX5_CMD_STAT_RES_BUSY: + return "resource busy"; + case MLX5_CMD_STAT_LIM_ERR: + return "limits exceeded"; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: + return "bad resource state"; + case MLX5_CMD_STAT_IX_ERR: + return "bad index"; + case MLX5_CMD_STAT_NO_RES_ERR: + return "no resources"; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: + return "bad input length"; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: + return "bad output length"; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: + return "bad QP state"; + case MLX5_CMD_STAT_BAD_PKT_ERR: + return "bad packet (discarded)"; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: + return "bad size too many outstanding CQEs"; + default: + return "unknown status"; + } +} + +static int cmd_status_to_err(u8 status) +{ + switch (status) { + case MLX5_CMD_STAT_OK: return 0; + case MLX5_CMD_STAT_INT_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; + case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; + case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; + case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_IX_ERR: return -EINVAL; + case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; + case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; + case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; + case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; + default: return -EIO; + } +} + +struct mlx5_ifc_mbox_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_mbox_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome) +{ + *status = MLX5_GET(mbox_out, out, status); + *syndrome = MLX5_GET(mbox_out, out, syndrome); +} + +static int mlx5_cmd_check(struct mlx5_core_dev *dev, void *in, void *out) +{ + u32 syndrome; + u8 status; + u16 opcode; + u16 op_mod; + + mlx5_cmd_mbox_status(out, &status, &syndrome); + if (!status) + return 0; + + opcode = MLX5_GET(mbox_in, in, opcode); + op_mod = MLX5_GET(mbox_in, in, op_mod); + + mlx5_core_err(dev, + "%s(0x%x) op_mod(0x%x) failed, status %s(0x%x), syndrome (0x%x)\n", + mlx5_command_str(opcode), + opcode, op_mod, + cmd_status_str(status), + status, + syndrome); + + return cmd_status_to_err(status); +} + static void dump_command(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent, int input) { - u16 op = be16_to_cpu(((struct mlx5_inbox_hdr *)(ent->lay->in))->opcode); struct mlx5_cmd_msg *msg = input ? ent->in : ent->out; + u16 op = MLX5_GET(mbox_in, ent->lay->in, opcode); struct mlx5_cmd_mailbox *next = msg->next; int data_only; u32 offset = 0; @@ -622,9 +751,7 @@ static void dump_command(struct mlx5_core_dev *dev, static u16 msg_to_opcode(struct mlx5_cmd_msg *in) { - struct mlx5_inbox_hdr *hdr = (struct mlx5_inbox_hdr *)(in->first.data); - - return be16_to_cpu(hdr->opcode); + return MLX5_GET(mbox_in, in->first.data, opcode); } static void cb_timeout_handler(struct work_struct *work) @@ -762,16 +889,6 @@ static int wait_func(struct mlx5_core_dev *dev, struct mlx5_cmd_work_ent *ent) return err; } -static __be32 *get_synd_ptr(struct mlx5_outbox_hdr *out) -{ - return &out->syndrome; -} - -static u8 *get_status_ptr(struct mlx5_outbox_hdr *out) -{ - return &out->status; -} - /* Notes: * 1. Callback functions may not sleep * 2. page queue commands do not support asynchrous completion @@ -820,7 +937,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in, goto out_free; ds = ent->ts2 - ent->ts1; - op = be16_to_cpu(((struct mlx5_inbox_hdr *)in->first.data)->opcode); + op = MLX5_GET(mbox_in, in->first.data, opcode); if (op < ARRAY_SIZE(cmd->stats)) { stats = &cmd->stats[op]; spin_lock_irq(&stats->lock); @@ -1035,7 +1152,6 @@ static ssize_t data_write(struct file *filp, const char __user *buf, struct mlx5_core_dev *dev = filp->private_data; struct mlx5_cmd_debug *dbg = &dev->cmd.dbg; void *ptr; - int err; if (*pos != 0) return -EINVAL; @@ -1043,25 +1159,15 @@ static ssize_t data_write(struct file *filp, const char __user *buf, kfree(dbg->in_msg); dbg->in_msg = NULL; dbg->inlen = 0; - - ptr = kzalloc(count, GFP_KERNEL); - if (!ptr) - return -ENOMEM; - - if (copy_from_user(ptr, buf, count)) { - err = -EFAULT; - goto out; - } + ptr = memdup_user(buf, count); + if (IS_ERR(ptr)) + return PTR_ERR(ptr); dbg->in_msg = ptr; dbg->inlen = count; *pos = count; return count; - -out: - kfree(ptr); - return err; } static ssize_t data_read(struct file *filp, char __user *buf, size_t count, @@ -1321,11 +1427,16 @@ void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec) callback = ent->callback; context = ent->context; err = ent->ret; - if (!err) + if (!err) { err = mlx5_copy_from_msg(ent->uout, ent->out, ent->uout_size); + err = err ? err : mlx5_cmd_check(dev, + ent->in->first.data, + ent->uout); + } + mlx5_free_cmd_msg(dev, ent->out); free_msg(dev, ent->in); @@ -1377,14 +1488,9 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, return msg; } -static u16 opcode_from_in(struct mlx5_inbox_hdr *in) +static int is_manage_pages(void *in) { - return be16_to_cpu(in->opcode); -} - -static int is_manage_pages(struct mlx5_inbox_hdr *in) -{ - return be16_to_cpu(in->opcode) == MLX5_CMD_OP_MANAGE_PAGES; + return MLX5_GET(mbox_in, in, opcode) == MLX5_CMD_OP_MANAGE_PAGES; } static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, @@ -1401,9 +1507,11 @@ static int cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, if (pci_channel_offline(dev->pdev) || dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { - err = mlx5_internal_err_ret_value(dev, opcode_from_in(in), &drv_synd, &status); - *get_synd_ptr(out) = cpu_to_be32(drv_synd); - *get_status_ptr(out) = status; + u16 opcode = MLX5_GET(mbox_in, in, opcode); + + err = mlx5_internal_err_ret_value(dev, opcode, &drv_synd, &status); + MLX5_SET(mbox_out, out, status, status); + MLX5_SET(mbox_out, out, syndrome, drv_synd); return err; } @@ -1457,7 +1565,10 @@ out_in: int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size) { - return cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + int err; + + err = cmd_exec(dev, in, in_size, out, out_size, NULL, NULL); + return err ? : mlx5_cmd_check(dev, in, out); } EXPORT_SYMBOL(mlx5_cmd_exec); @@ -1694,96 +1805,3 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) pci_pool_destroy(cmd->pool); } EXPORT_SYMBOL(mlx5_cmd_cleanup); - -static const char *cmd_status_str(u8 status) -{ - switch (status) { - case MLX5_CMD_STAT_OK: - return "OK"; - case MLX5_CMD_STAT_INT_ERR: - return "internal error"; - case MLX5_CMD_STAT_BAD_OP_ERR: - return "bad operation"; - case MLX5_CMD_STAT_BAD_PARAM_ERR: - return "bad parameter"; - case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: - return "bad system state"; - case MLX5_CMD_STAT_BAD_RES_ERR: - return "bad resource"; - case MLX5_CMD_STAT_RES_BUSY: - return "resource busy"; - case MLX5_CMD_STAT_LIM_ERR: - return "limits exceeded"; - case MLX5_CMD_STAT_BAD_RES_STATE_ERR: - return "bad resource state"; - case MLX5_CMD_STAT_IX_ERR: - return "bad index"; - case MLX5_CMD_STAT_NO_RES_ERR: - return "no resources"; - case MLX5_CMD_STAT_BAD_INP_LEN_ERR: - return "bad input length"; - case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: - return "bad output length"; - case MLX5_CMD_STAT_BAD_QP_STATE_ERR: - return "bad QP state"; - case MLX5_CMD_STAT_BAD_PKT_ERR: - return "bad packet (discarded)"; - case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: - return "bad size too many outstanding CQEs"; - default: - return "unknown status"; - } -} - -static int cmd_status_to_err(u8 status) -{ - switch (status) { - case MLX5_CMD_STAT_OK: return 0; - case MLX5_CMD_STAT_INT_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_OP_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_PARAM_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_SYS_STATE_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_RES_ERR: return -EINVAL; - case MLX5_CMD_STAT_RES_BUSY: return -EBUSY; - case MLX5_CMD_STAT_LIM_ERR: return -ENOMEM; - case MLX5_CMD_STAT_BAD_RES_STATE_ERR: return -EINVAL; - case MLX5_CMD_STAT_IX_ERR: return -EINVAL; - case MLX5_CMD_STAT_NO_RES_ERR: return -EAGAIN; - case MLX5_CMD_STAT_BAD_INP_LEN_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_OUTP_LEN_ERR: return -EIO; - case MLX5_CMD_STAT_BAD_QP_STATE_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_PKT_ERR: return -EINVAL; - case MLX5_CMD_STAT_BAD_SIZE_OUTS_CQES_ERR: return -EINVAL; - default: return -EIO; - } -} - -/* this will be available till all the commands use set/get macros */ -int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr) -{ - if (!hdr->status) - return 0; - - pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", - cmd_status_str(hdr->status), hdr->status, - be32_to_cpu(hdr->syndrome)); - - return cmd_status_to_err(hdr->status); -} - -int mlx5_cmd_status_to_err_v2(void *ptr) -{ - u32 syndrome; - u8 status; - - status = be32_to_cpu(*(__be32 *)ptr) >> 24; - if (!status) - return 0; - - syndrome = be32_to_cpu(*(__be32 *)(ptr + 4)); - - pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n", - cmd_status_str(status), status, syndrome); - - return cmd_status_to_err(status); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 873a631ad155..32d4af9b594d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -134,33 +134,29 @@ void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type) complete(&cq->free); } - int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_create_cq_mbox_in *in, int inlen) + u32 *in, int inlen) { - int err; struct mlx5_cq_table *table = &dev->priv.cq_table; - struct mlx5_create_cq_mbox_out out; - struct mlx5_destroy_cq_mbox_in din; - struct mlx5_destroy_cq_mbox_out dout; + u32 out[MLX5_ST_SZ_DW(create_cq_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_cq_in)]; + u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)]; int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn); struct mlx5_eq *eq; + int err; eq = mlx5_eqn2eq(dev, eqn); if (IS_ERR(eq)) return PTR_ERR(eq); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_CQ); - memset(&out, 0, sizeof(out)); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + memset(out, 0, sizeof(out)); + MLX5_SET(create_cq_in, in, opcode, MLX5_CMD_OP_CREATE_CQ); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - cq->cqn = be32_to_cpu(out.cqn) & 0xffffff; + cq->cqn = MLX5_GET(create_cq_out, out, cqn); cq->cons_index = 0; cq->arm_sn = 0; atomic_set(&cq->refcount, 1); @@ -186,10 +182,11 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, return 0; err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); - mlx5_cmd_exec(dev, &din, sizeof(din), &dout, sizeof(dout)); + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, din, cqn, cq->cqn); + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } EXPORT_SYMBOL(mlx5_core_create_cq); @@ -197,8 +194,8 @@ EXPORT_SYMBOL(mlx5_core_create_cq); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) { struct mlx5_cq_table *table = &dev->priv.cq_table; - struct mlx5_destroy_cq_mbox_in in; - struct mlx5_destroy_cq_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0}; struct mlx5_core_cq *tmp; int err; @@ -214,17 +211,12 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) return -EINVAL; } - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_CQ); - in.cqn = cpu_to_be32(cq->cqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ); + MLX5_SET(destroy_cq_in, in, cqn, cq->cqn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - synchronize_irq(cq->irqn); mlx5_debug_cq_remove(dev, cq); @@ -237,44 +229,23 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq) EXPORT_SYMBOL(mlx5_core_destroy_cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_query_cq_mbox_out *out) + u32 *out, int outlen) { - struct mlx5_query_cq_mbox_in in; - int err; + u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(out, 0, sizeof(*out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_CQ); - in.cqn = cpu_to_be32(cq->cqn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out)); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ); + MLX5_SET(query_cq_in, in, cqn, cq->cqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_cq); - int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in, int in_sz) + u32 *in, int inlen) { - struct mlx5_modify_cq_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0}; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MODIFY_CQ); - err = mlx5_cmd_exec(dev, in, in_sz, &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return 0; + MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_cq); @@ -283,18 +254,20 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, u16 cq_period, u16 cq_max_count) { - struct mlx5_modify_cq_mbox_in in; + u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0}; + void *cqc; - memset(&in, 0, sizeof(in)); + MLX5_SET(modify_cq_in, in, cqn, cq->cqn); + cqc = MLX5_ADDR_OF(modify_cq_in, in, cq_context); + MLX5_SET(cqc, cqc, cq_period, cq_period); + MLX5_SET(cqc, cqc, cq_max_count, cq_max_count); + MLX5_SET(modify_cq_in, in, + modify_field_select_resize_field_select.modify_field_select.modify_field_select, + MLX5_CQ_MODIFY_PERIOD | MLX5_CQ_MODIFY_COUNT); - in.cqn = cpu_to_be32(cq->cqn); - in.ctx.cq_period = cpu_to_be16(cq_period); - in.ctx.cq_max_count = cpu_to_be16(cq_max_count); - in.field_select = cpu_to_be32(MLX5_CQ_MODIFY_PERIOD | - MLX5_CQ_MODIFY_COUNT); - - return mlx5_core_modify_cq(dev, cq, &in, sizeof(in)); + return mlx5_core_modify_cq(dev, cq, in, sizeof(in)); } +EXPORT_SYMBOL(mlx5_core_modify_cq_moderation); int mlx5_init_cq_table(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c index 5210d92e6bc7..e94a9532e218 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c @@ -277,24 +277,28 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev) static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, int index, int *is_str) { - struct mlx5_query_qp_mbox_out *out; + int outlen = MLX5_ST_SZ_BYTES(query_qp_out); struct mlx5_qp_context *ctx; u64 param = 0; + u32 *out; int err; int no_sq; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return param; - err = mlx5_core_qp_query(dev, qp, out, sizeof(*out)); + err = mlx5_core_qp_query(dev, qp, out, outlen); if (err) { - mlx5_core_warn(dev, "failed to query qp\n"); + mlx5_core_warn(dev, "failed to query qp err=%d\n", err); goto out; } *is_str = 0; - ctx = &out->ctx; + + /* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */ + ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc); + switch (index) { case QP_PID: param = qp->pid; @@ -358,32 +362,32 @@ out: static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq, int index) { - struct mlx5_query_eq_mbox_out *out; - struct mlx5_eq_context *ctx; + int outlen = MLX5_ST_SZ_BYTES(query_eq_out); u64 param = 0; + void *ctx; + u32 *out; int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); if (!out) return param; - ctx = &out->ctx; - - err = mlx5_core_eq_query(dev, eq, out, sizeof(*out)); + err = mlx5_core_eq_query(dev, eq, out, outlen); if (err) { mlx5_core_warn(dev, "failed to query eq\n"); goto out; } + ctx = MLX5_ADDR_OF(query_eq_out, out, eq_context_entry); switch (index) { case EQ_NUM_EQES: - param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + param = 1 << MLX5_GET(eqc, ctx, log_eq_size); break; case EQ_INTR: - param = ctx->intr; + param = MLX5_GET(eqc, ctx, intr); break; case EQ_LOG_PG_SZ: - param = (ctx->log_page_size & 0x1f) + 12; + param = MLX5_GET(eqc, ctx, log_page_size) + 12; break; } @@ -395,37 +399,37 @@ out: static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, int index) { - struct mlx5_query_cq_mbox_out *out; - struct mlx5_cq_context *ctx; + int outlen = MLX5_ST_SZ_BYTES(query_cq_out); u64 param = 0; + void *ctx; + u32 *out; int err; - out = kzalloc(sizeof(*out), GFP_KERNEL); + out = mlx5_vzalloc(outlen); if (!out) return param; - ctx = &out->ctx; - - err = mlx5_core_query_cq(dev, cq, out); + err = mlx5_core_query_cq(dev, cq, out, outlen); if (err) { mlx5_core_warn(dev, "failed to query cq\n"); goto out; } + ctx = MLX5_ADDR_OF(query_cq_out, out, cq_context); switch (index) { case CQ_PID: param = cq->pid; break; case CQ_NUM_CQES: - param = 1 << ((be32_to_cpu(ctx->log_sz_usr_page) >> 24) & 0x1f); + param = 1 << MLX5_GET(cqc, ctx, log_cq_size); break; case CQ_LOG_PG_SZ: - param = (ctx->log_pg_sz & 0x1f) + 12; + param = MLX5_GET(cqc, ctx, log_page_size); break; } out: - kfree(out); + kvfree(out); return param; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c new file mode 100644 index 000000000000..a9dbc28f6b97 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -0,0 +1,345 @@ +/* + * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include "mlx5_core.h" + +static LIST_HEAD(intf_list); +static LIST_HEAD(mlx5_dev_list); +/* intf dev list mutex */ +static DEFINE_MUTEX(mlx5_intf_mutex); + +struct mlx5_device_context { + struct list_head list; + struct mlx5_interface *intf; + void *context; + unsigned long state; +}; + +enum { + MLX5_INTERFACE_ADDED, + MLX5_INTERFACE_ATTACHED, +}; + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + if (!mlx5_lag_intf_add(intf, priv)) + return; + + dev_ctx = kzalloc(sizeof(*dev_ctx), GFP_KERNEL); + if (!dev_ctx) + return; + + dev_ctx->intf = intf; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + if (intf->attach) + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + + if (dev_ctx->context) { + spin_lock_irq(&priv->ctx_lock); + list_add_tail(&dev_ctx->list, &priv->ctx_list); + spin_unlock_irq(&priv->ctx_lock); + } else { + kfree(dev_ctx); + } +} + +static struct mlx5_device_context *mlx5_get_device(struct mlx5_interface *intf, + struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf == intf) + return dev_ctx; + return NULL; +} + +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + spin_lock_irq(&priv->ctx_lock); + list_del(&dev_ctx->list); + spin_unlock_irq(&priv->ctx_lock); + + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + intf->remove(dev, dev_ctx->context); + + kfree(dev_ctx); +} + +static void mlx5_attach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->attach) { + if (test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->attach(dev, dev_ctx->context); + set_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + dev_ctx->context = intf->add(dev); + set_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_attach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_attach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +static void mlx5_detach_interface(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_device_context *dev_ctx; + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); + + dev_ctx = mlx5_get_device(intf, priv); + if (!dev_ctx) + return; + + if (intf->detach) { + if (!test_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state)) + return; + intf->detach(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ATTACHED, &dev_ctx->state); + } else { + if (!test_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state)) + return; + intf->remove(dev, dev_ctx->context); + clear_bit(MLX5_INTERFACE_ADDED, &dev_ctx->state); + } +} + +void mlx5_detach_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_detach_interface(intf, priv); + mutex_unlock(&mlx5_intf_mutex); +} + +bool mlx5_device_registered(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv; + bool found = false; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + if (priv == &dev->priv) + found = true; + mutex_unlock(&mlx5_intf_mutex); + + return found; +} + +int mlx5_register_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&priv->dev_list, &mlx5_dev_list); + list_for_each_entry(intf, &intf_list, list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} + +void mlx5_unregister_device(struct mlx5_core_dev *dev) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_interface *intf; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(intf, &intf_list, list) + mlx5_remove_device(intf, priv); + list_del(&priv->dev_list); + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_register_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + if (!intf->add || !intf->remove) + return -EINVAL; + + mutex_lock(&mlx5_intf_mutex); + list_add_tail(&intf->list, &intf_list); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_add_device(intf, priv); + mutex_unlock(&mlx5_intf_mutex); + + return 0; +} +EXPORT_SYMBOL(mlx5_register_interface); + +void mlx5_unregister_interface(struct mlx5_interface *intf) +{ + struct mlx5_priv *priv; + + mutex_lock(&mlx5_intf_mutex); + list_for_each_entry(priv, &mlx5_dev_list, dev_list) + mlx5_remove_device(intf, priv); + list_del(&intf->list); + mutex_unlock(&mlx5_intf_mutex); +} +EXPORT_SYMBOL(mlx5_unregister_interface); + +void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) +{ + struct mlx5_priv *priv = &mdev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + void *result = NULL; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) + if ((dev_ctx->intf->protocol == protocol) && + dev_ctx->intf->get_dev) { + result = dev_ctx->intf->get_dev(dev_ctx->context); + break; + } + + spin_unlock_irqrestore(&priv->ctx_lock, flags); + + return result; +} +EXPORT_SYMBOL(mlx5_get_protocol_dev); + +/* Must be called with intf_mutex held */ +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_add_device(intf, &dev->priv); + break; + } +} + +/* Must be called with intf_mutex held */ +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol) +{ + struct mlx5_interface *intf; + + list_for_each_entry(intf, &intf_list, list) + if (intf->protocol == protocol) { + mlx5_remove_device(intf, &dev->priv); + break; + } +} + +static u16 mlx5_gen_pci_id(struct mlx5_core_dev *dev) +{ + return (u16)((dev->pdev->bus->number << 8) | + PCI_SLOT(dev->pdev->devfn)); +} + +/* Must be called with intf_mutex held */ +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev) +{ + u16 pci_id = mlx5_gen_pci_id(dev); + struct mlx5_core_dev *res = NULL; + struct mlx5_core_dev *tmp_dev; + struct mlx5_priv *priv; + + list_for_each_entry(priv, &mlx5_dev_list, dev_list) { + tmp_dev = container_of(priv, struct mlx5_core_dev, priv); + if ((dev != tmp_dev) && (mlx5_gen_pci_id(tmp_dev) == pci_id)) { + res = tmp_dev; + break; + } + } + + return res; +} + +void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, + unsigned long param) +{ + struct mlx5_priv *priv = &dev->priv; + struct mlx5_device_context *dev_ctx; + unsigned long flags; + + spin_lock_irqsave(&priv->ctx_lock, flags); + + list_for_each_entry(dev_ctx, &priv->ctx_list, list) + if (dev_ctx->intf->event) + dev_ctx->intf->event(dev, dev_ctx->context, event, param); + + spin_unlock_irqrestore(&priv->ctx_lock, flags); +} + +void mlx5_dev_list_lock(void) +{ + mutex_lock(&mlx5_intf_mutex); +} + +void mlx5_dev_list_unlock(void) +{ + mutex_unlock(&mlx5_intf_mutex); +} + +int mlx5_dev_list_trylock(void) +{ + return mutex_trylock(&mlx5_intf_mutex); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bf722aa88cf0..460363b66cb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -62,12 +62,14 @@ #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE 0xd #define MLX5E_PARAMS_MINIMUM_LOG_RQ_SIZE_MPW 0x1 -#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 +#define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x3 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 +#define MLX5_RX_HEADROOM NET_SKB_PAD + #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_LOG_WQE_SZ 17 +#define MLX5_MPWRQ_LOG_WQE_SZ 18 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) @@ -99,6 +101,18 @@ #define MLX5E_UPDATE_STATS_INTERVAL 200 /* msecs */ #define MLX5E_SQ_BF_BUDGET 16 +#define MLX5E_ICOSQ_MAX_WQEBBS \ + (DIV_ROUND_UP(sizeof(struct mlx5e_umr_wqe), MLX5_SEND_WQE_BB)) + +#define MLX5E_XDP_MIN_INLINE (ETH_HLEN + VLAN_HLEN) +#define MLX5E_XDP_IHS_DS_COUNT \ + DIV_ROUND_UP(MLX5E_XDP_MIN_INLINE - 2, MLX5_SEND_WQE_DS) +#define MLX5E_XDP_TX_DS_COUNT \ + (MLX5E_XDP_IHS_DS_COUNT + \ + (sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS) + 1 /* SG DS */) +#define MLX5E_XDP_TX_WQEBBS \ + DIV_ROUND_UP(MLX5E_XDP_TX_DS_COUNT, MLX5_SEND_WQEBB_NUM_DS) + #define MLX5E_NUM_MAIN_GROUPS 9 static inline u16 mlx5_min_rx_wqes(int wq_type, u32 wq_size) @@ -287,29 +301,53 @@ struct mlx5e_rx_am { /* Adaptive Moderation */ u8 tired; }; +/* a single cache unit is capable to serve one napi call (for non-striding rq) + * or a MPWQE (for striding rq). + */ +#define MLX5E_CACHE_UNIT (MLX5_MPWRQ_PAGES_PER_WQE > NAPI_POLL_WEIGHT ? \ + MLX5_MPWRQ_PAGES_PER_WQE : NAPI_POLL_WEIGHT) +#define MLX5E_CACHE_SIZE (2 * roundup_pow_of_two(MLX5E_CACHE_UNIT)) +struct mlx5e_page_cache { + u32 head; + u32 tail; + struct mlx5e_dma_info page_cache[MLX5E_CACHE_SIZE]; +}; + struct mlx5e_rq { /* data path */ struct mlx5_wq_ll wq; - u32 wqe_sz; - struct sk_buff **skb; - struct mlx5e_mpw_info *wqe_info; + + union { + struct mlx5e_dma_info *dma_info; + struct { + struct mlx5e_mpw_info *info; + void *mtt_no_align; + u32 mtt_offset; + } mpwqe; + }; + struct { + u8 page_order; + u32 wqe_sz; /* wqe data buffer size */ + u8 map_dir; /* dma map direction */ + } buff; __be32 mkey_be; - __be32 umr_mkey_be; struct device *pdev; struct net_device *netdev; struct mlx5e_tstamp *tstamp; struct mlx5e_rq_stats stats; struct mlx5e_cq cq; + struct mlx5e_page_cache page_cache; + mlx5e_fp_handle_rx_cqe handle_rx_cqe; mlx5e_fp_alloc_wqe alloc_wqe; mlx5e_fp_dealloc_wqe dealloc_wqe; unsigned long state; int ix; - u32 mpwqe_mtt_offset; struct mlx5e_rx_am am; /* Adaptive Moderation */ + struct bpf_prog *xdp_prog; /* control */ struct mlx5_wq_ctrl wq_ctrl; @@ -323,32 +361,15 @@ struct mlx5e_rq { struct mlx5e_umr_dma_info { __be64 *mtt; - __be64 *mtt_no_align; dma_addr_t mtt_addr; - struct mlx5e_dma_info *dma_info; + struct mlx5e_dma_info dma_info[MLX5_MPWRQ_PAGES_PER_WQE]; + struct mlx5e_umr_wqe wqe; }; struct mlx5e_mpw_info { - union { - struct mlx5e_dma_info dma_info; - struct mlx5e_umr_dma_info umr; - }; + struct mlx5e_umr_dma_info umr; u16 consumed_strides; u16 skbs_frags[MLX5_MPWRQ_PAGES_PER_WQE]; - - void (*dma_pre_sync)(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len); - void (*add_skb_frag)(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, u32 len); - void (*copy_skb_header)(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen); - void (*free_wqe)(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); }; struct mlx5e_tx_wqe_info { @@ -373,11 +394,17 @@ enum { MLX5E_SQ_STATE_BF_ENABLE, }; -struct mlx5e_ico_wqe_info { +struct mlx5e_sq_wqe_info { u8 opcode; u8 num_wqebbs; }; +enum mlx5e_sq_type { + MLX5E_SQ_TXQ, + MLX5E_SQ_ICO, + MLX5E_SQ_XDP +}; + struct mlx5e_sq { /* data path */ @@ -395,10 +422,20 @@ struct mlx5e_sq { struct mlx5e_cq cq; - /* pointers to per packet info: write@xmit, read@completion */ - struct sk_buff **skb; - struct mlx5e_sq_dma *dma_fifo; - struct mlx5e_tx_wqe_info *wqe_info; + /* pointers to per tx element info: write@xmit, read@completion */ + union { + struct { + struct sk_buff **skb; + struct mlx5e_sq_dma *dma_fifo; + struct mlx5e_tx_wqe_info *wqe_info; + } txq; + struct mlx5e_sq_wqe_info *ico_wqe; + struct { + struct mlx5e_sq_wqe_info *wqe_info; + struct mlx5e_dma_info *di; + bool doorbell; + } xdp; + } db; /* read only */ struct mlx5_wq_cyc wq; @@ -420,8 +457,8 @@ struct mlx5e_sq { struct mlx5_uar uar; struct mlx5e_channel *channel; int tc; - struct mlx5e_ico_wqe_info *ico_wqe_info; u32 rate_limit; + u8 type; } ____cacheline_aligned_in_smp; static inline bool mlx5e_sq_has_room_for(struct mlx5e_sq *sq, u16 n) @@ -437,8 +474,10 @@ enum channel_flags { struct mlx5e_channel { /* data path */ struct mlx5e_rq rq; + struct mlx5e_sq xdp_sq; struct mlx5e_sq sq[MLX5E_MAX_NUM_TC]; struct mlx5e_sq icosq; /* internal control operations */ + bool xdp; struct napi_struct napi; struct device *pdev; struct net_device *netdev; @@ -620,6 +659,7 @@ struct mlx5e_priv { /* priv data path fields - start */ struct mlx5e_sq **txq_to_sq_map; int channeltc_to_txq_map[MLX5E_MAX_NUM_CHANNELS][MLX5E_MAX_NUM_TC]; + struct bpf_prog *xdp_prog; /* priv data path fields - end */ unsigned long state; @@ -654,40 +694,6 @@ struct mlx5e_priv { void *ppriv; }; -enum mlx5e_link_mode { - MLX5E_1000BASE_CX_SGMII = 0, - MLX5E_1000BASE_KX = 1, - MLX5E_10GBASE_CX4 = 2, - MLX5E_10GBASE_KX4 = 3, - MLX5E_10GBASE_KR = 4, - MLX5E_20GBASE_KR2 = 5, - MLX5E_40GBASE_CR4 = 6, - MLX5E_40GBASE_KR4 = 7, - MLX5E_56GBASE_R4 = 8, - MLX5E_10GBASE_CR = 12, - MLX5E_10GBASE_SR = 13, - MLX5E_10GBASE_ER = 14, - MLX5E_40GBASE_SR4 = 15, - MLX5E_40GBASE_LR4 = 16, - MLX5E_50GBASE_SR2 = 18, - MLX5E_100GBASE_CR4 = 20, - MLX5E_100GBASE_SR4 = 21, - MLX5E_100GBASE_KR4 = 22, - MLX5E_100GBASE_LR4 = 23, - MLX5E_100BASE_TX = 24, - MLX5E_1000BASE_T = 25, - MLX5E_10GBASE_T = 26, - MLX5E_25GBASE_CR = 27, - MLX5E_25GBASE_KR = 28, - MLX5E_25GBASE_SR = 29, - MLX5E_50GBASE_CR2 = 30, - MLX5E_50GBASE_KR2 = 31, - MLX5E_LINK_MODES_NUMBER, -}; - -#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) - - void mlx5e_build_ptys2ethtool_map(void); void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw); @@ -700,30 +706,19 @@ void mlx5e_cq_error_event(struct mlx5_core_cq *mcq, enum mlx5_event event); int mlx5e_napi_poll(struct napi_struct *napi, int budget); bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget); int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget); -void mlx5e_free_tx_descs(struct mlx5e_sq *sq); +void mlx5e_free_sq_descs(struct mlx5e_sq *sq); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle); void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq); int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); -int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); +int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix); void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix); void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix); -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq); -void mlx5e_complete_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_complete_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5_cqe64 *cqe, - u16 byte_cnt, - struct mlx5e_mpw_info *wi, - struct sk_buff *skb); -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi); +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq); +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi); struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq); void mlx5e_rx_am(struct mlx5e_rq *rq); @@ -810,6 +805,12 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); } +static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) +{ + return rq->mpwqe.mtt_offset + + wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); +} + static inline int mlx5e_get_max_num_channels(struct mlx5_core_dev *mdev) { return min_t(int, mdev->priv.eq_table.num_comp_vectors, @@ -868,6 +869,7 @@ void mlx5e_nic_rep_unload(struct mlx5_eswitch *esw, int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv); void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv); int mlx5e_attr_get(struct net_device *dev, struct switchdev_attr *attr); +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe); int mlx5e_create_direct_rqts(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); @@ -878,9 +880,12 @@ void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv); int mlx5e_close(struct net_device *netdev); int mlx5e_open(struct net_device *netdev); void mlx5e_update_stats_work(struct work_struct *work); -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv); +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv); void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv); +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev); struct rtnl_link_stats64 * mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 847a8f3ac2b2..13dc388667b6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -273,7 +273,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv) tstamp->ptp = ptp_clock_register(&tstamp->ptp_info, &priv->mdev->pdev->dev); - if (IS_ERR_OR_NULL(tstamp->ptp)) { + if (IS_ERR(tstamp->ptp)) { mlx5_core_warn(priv->mdev, "ptp_clock_register failed %ld\n", PTR_ERR(tstamp->ptp)); tstamp->ptp = NULL; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index 9cce153e1035..029e856f72a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -60,24 +60,27 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev, static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, struct mlx5_core_mkey *mkey) { - struct mlx5_create_mkey_mbox_in *in; + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; int err; - in = mlx5_vzalloc(sizeof(*in)); + in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - in->seg.flags = MLX5_PERM_LOCAL_WRITE | - MLX5_PERM_LOCAL_READ | - MLX5_ACCESS_MODE_PA; - in->seg.flags_pd = cpu_to_be32(pdn | MLX5_MKEY_LEN64); - in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_PA); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); - err = mlx5_core_create_mkey(mdev, mkey, in, sizeof(*in), NULL, NULL, - NULL); + MLX5_SET(mkc, mkc, pd, pdn); + MLX5_SET(mkc, mkc, length64, 1); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + + err = mlx5_core_create_mkey(mdev, mkey, in, inlen); kvfree(in); - return err; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 7a346bb2ed00..27ff401cec20 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -805,7 +805,7 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, { struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5_core_dev *mdev = priv->mdev; - u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0}; u32 eth_proto_cap; u32 eth_proto_admin; u32 eth_proto_lp; @@ -815,7 +815,6 @@ static int mlx5e_get_link_ksettings(struct net_device *netdev, int err; err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN, 1); - if (err) { netdev_err(netdev, "%s: query port ptys failed: %d\n", __func__, err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 1587a9fd5724..36fbc6b21a33 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -294,6 +294,36 @@ int mlx5e_vlan_rx_kill_vid(struct net_device *dev, __always_unused __be16 proto, return 0; } +static void mlx5e_add_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + +static void mlx5e_del_vlan_rules(struct mlx5e_priv *priv) +{ + int i; + + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); + + for_each_set_bit(i, priv->fs.vlan.active_vlans, VLAN_N_VID) { + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_MATCH_VID, i); + } + + if (priv->fs.vlan.filter_disabled && + !(priv->netdev->flags & IFF_PROMISC)) + mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_ANY_VID, 0); +} + #define mlx5e_for_each_hash_node(hn, tmp, hash, i) \ for (i = 0; i < MLX5E_L2_ADDR_HASH_SIZE; i++) \ hlist_for_each_entry_safe(hn, tmp, &hash[i], hlist) @@ -1024,14 +1054,10 @@ static int mlx5e_create_vlan_table(struct mlx5e_priv *priv) if (err) goto err_free_g; - err = mlx5e_add_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); - if (err) - goto err_destroy_vlan_flow_groups; + mlx5e_add_vlan_rules(priv); return 0; -err_destroy_vlan_flow_groups: - mlx5e_destroy_groups(ft); err_free_g: kfree(ft->g); err_destroy_vlan_table: @@ -1043,6 +1069,7 @@ err_destroy_vlan_table: static void mlx5e_destroy_vlan_table(struct mlx5e_priv *priv) { + mlx5e_del_vlan_rules(priv); mlx5e_destroy_flow_table(&priv->fs.vlan.ft); } @@ -1100,7 +1127,6 @@ err_destroy_arfs_tables: void mlx5e_destroy_flow_steering(struct mlx5e_priv *priv) { - mlx5e_del_vlan_rule(priv, MLX5E_VLAN_RULE_TYPE_UNTAGGED, 0); mlx5e_destroy_vlan_table(priv); mlx5e_destroy_l2_table(priv); mlx5e_destroy_ttc_table(priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2459c7f3db8d..7eaf38020a8f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -34,6 +34,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -50,7 +51,7 @@ struct mlx5e_sq_param { struct mlx5_wq_param wq; u16 max_inline; u8 min_inline_mode; - bool icosq; + enum mlx5e_sq_type type; }; struct mlx5e_cq_param { @@ -63,12 +64,55 @@ struct mlx5e_cq_param { struct mlx5e_channel_param { struct mlx5e_rq_param rq; struct mlx5e_sq_param sq; + struct mlx5e_sq_param xdp_sq; struct mlx5e_sq_param icosq; struct mlx5e_cq_param rx_cq; struct mlx5e_cq_param tx_cq; struct mlx5e_cq_param icosq_cq; }; +static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) +{ + return MLX5_CAP_GEN(mdev, striding_rq) && + MLX5_CAP_GEN(mdev, umr_ptr_rlky) && + MLX5_CAP_ETH(mdev, reg_umr_sq); +} + +static void mlx5e_set_rq_type_params(struct mlx5e_priv *priv, u8 rq_type) +{ + priv->params.rq_wq_type = rq_type; + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; + } + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, + BIT(priv->params.log_rq_size)); + + mlx5_core_info(priv->mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); +} + +static void mlx5e_set_rq_priv_params(struct mlx5e_priv *priv) +{ + u8 rq_type = mlx5e_check_fragmented_striding_rq_cap(priv->mdev) && + !priv->xdp_prog ? + MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : + MLX5_WQ_TYPE_LINKED_LIST; + mlx5e_set_rq_type_params(priv, rq_type); +} + static void mlx5e_update_carrier(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; @@ -136,12 +180,18 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_csum_none += rq_stats->csum_none; s->rx_csum_complete += rq_stats->csum_complete; s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; + s->rx_xdp_drop += rq_stats->xdp_drop; + s->rx_xdp_tx += rq_stats->xdp_tx; + s->rx_xdp_tx_full += rq_stats->xdp_tx_full; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; - s->rx_mpwqe_frag += rq_stats->mpwqe_frag; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; + s->rx_cache_reuse += rq_stats->cache_reuse; + s->rx_cache_full += rq_stats->cache_full; + s->rx_cache_empty += rq_stats->cache_empty; + s->rx_cache_busy += rq_stats->cache_busy; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -174,18 +224,15 @@ static void mlx5e_update_vport_counters(struct mlx5e_priv *priv) { int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); u32 *out = (u32 *)priv->stats.vport.query_vport_out; - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; struct mlx5_core_dev *mdev = priv->mdev; - memset(in, 0, sizeof(in)); - MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); MLX5_SET(query_vport_counter_in, in, other_vport, 0); memset(out, 0, outlen); - mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } @@ -298,6 +345,117 @@ static void mlx5e_disable_async_events(struct mlx5e_priv *priv) #define MLX5E_HW2SW_MTU(hwmtu) (hwmtu - (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) #define MLX5E_SW2HW_MTU(swmtu) (swmtu + (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)) +static inline int mlx5e_get_wqe_mtt_sz(void) +{ + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. + * To avoid copying garbage after the mtt array, we allocate + * a little more. + */ + return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), + MLX5_UMR_MTT_ALIGNMENT); +} + +static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, struct mlx5e_sq *sq, + struct mlx5e_umr_wqe *wqe, u16 ix) +{ + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; + struct mlx5_wqe_data_seg *dseg = &wqe->data; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; + u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); + u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); + + cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | + ds_cnt); + cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + cseg->imm = rq->mkey_be; + + ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; + ucseg->klm_octowords = + cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); + ucseg->bsf_octowords = + cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); + ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); + + dseg->lkey = sq->mkey_be; + dseg->addr = cpu_to_be64(wi->umr.mtt_addr); +} + +static int mlx5e_rq_alloc_mpwqe_info(struct mlx5e_rq *rq, + struct mlx5e_channel *c) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int mtt_alloc = mtt_sz + MLX5_UMR_ALIGN - 1; + int i; + + rq->mpwqe.info = kzalloc_node(wq_sz * sizeof(*rq->mpwqe.info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->mpwqe.info) + goto err_out; + + /* We allocate more than mtt_sz as we will align the pointer */ + rq->mpwqe.mtt_no_align = kzalloc_node(mtt_alloc * wq_sz, GFP_KERNEL, + cpu_to_node(c->cpu)); + if (unlikely(!rq->mpwqe.mtt_no_align)) + goto err_free_wqe_info; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + wi->umr.mtt = PTR_ALIGN(rq->mpwqe.mtt_no_align + i * mtt_alloc, + MLX5_UMR_ALIGN); + wi->umr.mtt_addr = dma_map_single(c->pdev, wi->umr.mtt, mtt_sz, + PCI_DMA_TODEVICE); + if (unlikely(dma_mapping_error(c->pdev, wi->umr.mtt_addr))) + goto err_unmap_mtts; + + mlx5e_build_umr_wqe(rq, &c->icosq, &wi->umr.wqe, i); + } + + return 0; + +err_unmap_mtts: + while (--i >= 0) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + dma_unmap_single(c->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mpwqe.mtt_no_align); +err_free_wqe_info: + kfree(rq->mpwqe.info); + +err_out: + return -ENOMEM; +} + +static void mlx5e_rq_free_mpwqe_info(struct mlx5e_rq *rq) +{ + int wq_sz = mlx5_wq_ll_get_size(&rq->wq); + int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int i; + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[i]; + + dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, + PCI_DMA_TODEVICE); + } + kfree(rq->mpwqe.mtt_no_align); + kfree(rq->mpwqe.info); +} + +static bool mlx5e_is_vf_vport_rep(struct mlx5e_priv *priv) +{ + struct mlx5_eswitch_rep *rep = (struct mlx5_eswitch_rep *)priv->ppriv; + + if (rep && rep->vport != FDB_UPLINK_VPORT) + return true; + + return false; +} + static int mlx5e_create_rq(struct mlx5e_channel *c, struct mlx5e_rq_param *param, struct mlx5e_rq *rq) @@ -307,6 +465,8 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, void *rqc = param->rqc; void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq); u32 byte_count; + u32 frag_sz; + int npages; int wq_sz; int err; int i; @@ -322,54 +482,6 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, wq_sz = mlx5_wq_ll_get_size(&rq->wq); - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - rq->wqe_info = kzalloc_node(wq_sz * sizeof(*rq->wqe_info), - GFP_KERNEL, cpu_to_node(c->cpu)); - if (!rq->wqe_info) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; - rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; - rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; - - rq->mpwqe_mtt_offset = c->ix * - MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); - - rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); - rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); - rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; - byte_count = rq->wqe_sz; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - rq->skb = kzalloc_node(wq_sz * sizeof(*rq->skb), GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!rq->skb) { - err = -ENOMEM; - goto err_rq_wq_destroy; - } - rq->handle_rx_cqe = mlx5e_handle_rx_cqe; - rq->alloc_wqe = mlx5e_alloc_rx_wqe; - rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; - - rq->wqe_sz = (priv->params.lro_en) ? - priv->params.lro_wqe_sz : - MLX5E_SW2HW_MTU(priv->netdev->mtu); - rq->wqe_sz = SKB_DATA_ALIGN(rq->wqe_sz); - byte_count = rq->wqe_sz; - byte_count |= MLX5_HW_START_PADDING; - } - - for (i = 0; i < wq_sz; i++) { - struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); - - wqe->data.byte_count = cpu_to_be32(byte_count); - } - - INIT_WORK(&rq->am.work, mlx5e_rx_am_work); - rq->am.mode = priv->params.rx_cq_period_mode; - rq->wq_type = priv->params.rq_wq_type; rq->pdev = c->pdev; rq->netdev = c->netdev; @@ -377,8 +489,85 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->channel = c; rq->ix = c->ix; rq->priv = c->priv; - rq->mkey_be = c->mkey_be; - rq->umr_mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + rq->xdp_prog = priv->xdp_prog; + + rq->buff.map_dir = DMA_FROM_DEVICE; + if (rq->xdp_prog) + rq->buff.map_dir = DMA_BIDIRECTIONAL; + + switch (priv->params.rq_wq_type) { + case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: + if (mlx5e_is_vf_vport_rep(priv)) { + err = -EINVAL; + goto err_rq_wq_destroy; + } + + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; + rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_mpwqe; + + rq->mpwqe.mtt_offset = c->ix * + MLX5E_REQUIRED_MTTS(1, BIT(priv->params.log_rq_size)); + + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + + rq->buff.wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; + byte_count = rq->buff.wqe_sz; + rq->mkey_be = cpu_to_be32(c->priv->umr_mkey.key); + err = mlx5e_rq_alloc_mpwqe_info(rq, c); + if (err) + goto err_rq_wq_destroy; + break; + default: /* MLX5_WQ_TYPE_LINKED_LIST */ + rq->dma_info = kzalloc_node(wq_sz * sizeof(*rq->dma_info), + GFP_KERNEL, cpu_to_node(c->cpu)); + if (!rq->dma_info) { + err = -ENOMEM; + goto err_rq_wq_destroy; + } + + if (mlx5e_is_vf_vport_rep(priv)) + rq->handle_rx_cqe = mlx5e_handle_rx_cqe_rep; + else + rq->handle_rx_cqe = mlx5e_handle_rx_cqe; + + rq->alloc_wqe = mlx5e_alloc_rx_wqe; + rq->dealloc_wqe = mlx5e_dealloc_rx_wqe; + + rq->buff.wqe_sz = (priv->params.lro_en) ? + priv->params.lro_wqe_sz : + MLX5E_SW2HW_MTU(priv->netdev->mtu); + byte_count = rq->buff.wqe_sz; + + /* calc the required page order */ + frag_sz = MLX5_RX_HEADROOM + + byte_count /* packet data */ + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + frag_sz = SKB_DATA_ALIGN(frag_sz); + + npages = DIV_ROUND_UP(frag_sz, PAGE_SIZE); + rq->buff.page_order = order_base_2(npages); + + byte_count |= MLX5_HW_START_PADDING; + rq->mkey_be = c->mkey_be; + } + + for (i = 0; i < wq_sz; i++) { + struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, i); + + wqe->data.byte_count = cpu_to_be32(byte_count); + wqe->data.lkey = rq->mkey_be; + } + + INIT_WORK(&rq->am.work, mlx5e_rx_am_work); + rq->am.mode = priv->params.rx_cq_period_mode; + + rq->page_cache.head = 0; + rq->page_cache.tail = 0; + + if (rq->xdp_prog) + bpf_prog_add(rq->xdp_prog, 1); return 0; @@ -390,14 +579,25 @@ err_rq_wq_destroy: static void mlx5e_destroy_rq(struct mlx5e_rq *rq) { + int i; + + if (rq->xdp_prog) + bpf_prog_put(rq->xdp_prog); + switch (rq->wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - kfree(rq->wqe_info); + mlx5e_rq_free_mpwqe_info(rq); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ - kfree(rq->skb); + kfree(rq->dma_info); } + for (i = rq->page_cache.head; i != rq->page_cache.tail; + i = (i + 1) & (MLX5E_CACHE_SIZE - 1)) { + struct mlx5e_dma_info *dma_info = &rq->page_cache.page_cache[i]; + + mlx5e_page_release(rq, dma_info, false); + } mlx5_wq_destroy(&rq->wq_ctrl); } @@ -488,7 +688,8 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd) rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx); MLX5_SET(modify_rq_in, in, rq_state, MLX5_RQC_STATE_RDY); - MLX5_SET64(modify_rq_in, in, modify_bitmask, MLX5_RQ_BITMASK_VSD); + MLX5_SET64(modify_rq_in, in, modify_bitmask, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD); MLX5_SET(rqc, rqc, vsd, vsd); MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY); @@ -530,7 +731,7 @@ static void mlx5e_free_rx_descs(struct mlx5e_rq *rq) /* UMR WQE (if in progress) is always at wq->head */ if (test_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state)) - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); while (!mlx5_wq_ll_is_empty(wq)) { wqe_ix_be = *wq->tail_next; @@ -565,8 +766,8 @@ static int mlx5e_open_rq(struct mlx5e_channel *c, if (param->am_enabled) set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); /* trigger mlx5e_post_rx_wqes() */ return 0; @@ -590,26 +791,65 @@ static void mlx5e_close_rq(struct mlx5e_rq *rq) mlx5e_destroy_rq(rq); } -static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +static void mlx5e_free_sq_xdp_db(struct mlx5e_sq *sq) { - kfree(sq->wqe_info); - kfree(sq->dma_fifo); - kfree(sq->skb); + kfree(sq->db.xdp.di); + kfree(sq->db.xdp.wqe_info); } -static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +static int mlx5e_alloc_sq_xdp_db(struct mlx5e_sq *sq, int numa) +{ + int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.xdp.di = kzalloc_node(sizeof(*sq->db.xdp.di) * wq_sz, + GFP_KERNEL, numa); + sq->db.xdp.wqe_info = kzalloc_node(sizeof(*sq->db.xdp.wqe_info) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.xdp.di || !sq->db.xdp.wqe_info) { + mlx5e_free_sq_xdp_db(sq); + return -ENOMEM; + } + + return 0; +} + +static void mlx5e_free_sq_ico_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.ico_wqe); +} + +static int mlx5e_alloc_sq_ico_db(struct mlx5e_sq *sq, int numa) +{ + u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); + + sq->db.ico_wqe = kzalloc_node(sizeof(*sq->db.ico_wqe) * wq_sz, + GFP_KERNEL, numa); + if (!sq->db.ico_wqe) + return -ENOMEM; + + return 0; +} + +static void mlx5e_free_sq_txq_db(struct mlx5e_sq *sq) +{ + kfree(sq->db.txq.wqe_info); + kfree(sq->db.txq.dma_fifo); + kfree(sq->db.txq.skb); +} + +static int mlx5e_alloc_sq_txq_db(struct mlx5e_sq *sq, int numa) { int wq_sz = mlx5_wq_cyc_get_size(&sq->wq); int df_sz = wq_sz * MLX5_SEND_WQEBB_NUM_DS; - sq->skb = kzalloc_node(wq_sz * sizeof(*sq->skb), GFP_KERNEL, numa); - sq->dma_fifo = kzalloc_node(df_sz * sizeof(*sq->dma_fifo), GFP_KERNEL, - numa); - sq->wqe_info = kzalloc_node(wq_sz * sizeof(*sq->wqe_info), GFP_KERNEL, - numa); - - if (!sq->skb || !sq->dma_fifo || !sq->wqe_info) { - mlx5e_free_sq_db(sq); + sq->db.txq.skb = kzalloc_node(wq_sz * sizeof(*sq->db.txq.skb), + GFP_KERNEL, numa); + sq->db.txq.dma_fifo = kzalloc_node(df_sz * sizeof(*sq->db.txq.dma_fifo), + GFP_KERNEL, numa); + sq->db.txq.wqe_info = kzalloc_node(wq_sz * sizeof(*sq->db.txq.wqe_info), + GFP_KERNEL, numa); + if (!sq->db.txq.skb || !sq->db.txq.dma_fifo || !sq->db.txq.wqe_info) { + mlx5e_free_sq_txq_db(sq); return -ENOMEM; } @@ -618,6 +858,46 @@ static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) return 0; } +static void mlx5e_free_sq_db(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_sq_txq_db(sq); + break; + case MLX5E_SQ_ICO: + mlx5e_free_sq_ico_db(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_sq_xdp_db(sq); + break; + } +} + +static int mlx5e_alloc_sq_db(struct mlx5e_sq *sq, int numa) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + return mlx5e_alloc_sq_txq_db(sq, numa); + case MLX5E_SQ_ICO: + return mlx5e_alloc_sq_ico_db(sq, numa); + case MLX5E_SQ_XDP: + return mlx5e_alloc_sq_xdp_db(sq, numa); + } + + return 0; +} + +static int mlx5e_sq_get_max_wqebbs(u8 sq_type) +{ + switch (sq_type) { + case MLX5E_SQ_ICO: + return MLX5E_ICOSQ_MAX_WQEBBS; + case MLX5E_SQ_XDP: + return MLX5E_XDP_TX_WQEBBS; + } + return MLX5_SEND_WQE_MAX_WQEBBS; +} + static int mlx5e_create_sq(struct mlx5e_channel *c, int tc, struct mlx5e_sq_param *param, @@ -630,6 +910,13 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; + sq->type = param->type; + sq->pdev = c->pdev; + sq->tstamp = &priv->tstamp; + sq->mkey_be = c->mkey_be; + sq->channel = c; + sq->tc = tc; + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; @@ -658,18 +945,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, if (err) goto err_sq_wq_destroy; - if (param->icosq) { - u8 wq_sz = mlx5_wq_cyc_get_size(&sq->wq); - - sq->ico_wqe_info = kzalloc_node(sizeof(*sq->ico_wqe_info) * - wq_sz, - GFP_KERNEL, - cpu_to_node(c->cpu)); - if (!sq->ico_wqe_info) { - err = -ENOMEM; - goto err_free_sq_db; - } - } else { + if (sq->type == MLX5E_SQ_TXQ) { int txq_ix; txq_ix = c->ix + tc * priv->params.num_channels; @@ -677,19 +953,11 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, priv->txq_to_sq_map[txq_ix] = sq; } - sq->pdev = c->pdev; - sq->tstamp = &priv->tstamp; - sq->mkey_be = c->mkey_be; - sq->channel = c; - sq->tc = tc; - sq->edge = (sq->wq.sz_m1 + 1) - MLX5_SEND_WQE_MAX_WQEBBS; + sq->edge = (sq->wq.sz_m1 + 1) - mlx5e_sq_get_max_wqebbs(sq->type); sq->bf_budget = MLX5E_SQ_BF_BUDGET; return 0; -err_free_sq_db: - mlx5e_free_sq_db(sq); - err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); @@ -704,7 +972,6 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) struct mlx5e_channel *c = sq->channel; struct mlx5e_priv *priv = c->priv; - kfree(sq->ico_wqe_info); mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); mlx5_unmap_free_uar(priv->mdev, &sq->uar); @@ -733,11 +1000,12 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) memcpy(sqc, param->sqc, sizeof(param->sqc)); - MLX5_SET(sqc, sqc, tis_num_0, param->icosq ? 0 : priv->tisn[sq->tc]); + MLX5_SET(sqc, sqc, tis_num_0, param->type == MLX5E_SQ_ICO ? + 0 : priv->tisn[sq->tc]); MLX5_SET(sqc, sqc, cqn, sq->cq.mcq.cqn); MLX5_SET(sqc, sqc, min_wqe_inline_mode, sq->min_inline_mode); MLX5_SET(sqc, sqc, state, MLX5_SQC_STATE_RST); - MLX5_SET(sqc, sqc, tis_lst_sz, param->icosq ? 0 : 1); + MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(sqc, sqc, flush_in_error_en, 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); @@ -852,12 +1120,14 @@ static void mlx5e_close_sq(struct mlx5e_sq *sq) netif_tx_disable_queue(sq->txq); /* last doorbell out, godspeed .. */ - if (mlx5e_sq_has_room_for(sq, 1)) + if (mlx5e_sq_has_room_for(sq, 1)) { + sq->db.txq.skb[(sq->pc & sq->wq.sz_m1)] = NULL; mlx5e_send_nop(sq, true); + } } mlx5e_disable_sq(sq); - mlx5e_free_tx_descs(sq); + mlx5e_free_sq_descs(sq); mlx5e_destroy_sq(sq); } @@ -1218,14 +1488,31 @@ static int mlx5e_open_channel(struct mlx5e_priv *priv, int ix, } } + if (priv->xdp_prog) { + /* XDP SQ CQ params are same as normal TXQ sq CQ params */ + err = mlx5e_open_cq(c, &cparam->tx_cq, &c->xdp_sq.cq, + priv->params.tx_cq_moderation); + if (err) + goto err_close_sqs; + + err = mlx5e_open_sq(c, 0, &cparam->xdp_sq, &c->xdp_sq); + if (err) { + mlx5e_close_cq(&c->xdp_sq.cq); + goto err_close_sqs; + } + } + + c->xdp = !!priv->xdp_prog; err = mlx5e_open_rq(c, &cparam->rq, &c->rq); if (err) - goto err_close_sqs; + goto err_close_xdp_sq; netif_set_xps_queue(netdev, get_cpu_mask(c->cpu), ix); *cp = c; return 0; +err_close_xdp_sq: + mlx5e_close_sq(&c->xdp_sq); err_close_sqs: mlx5e_close_sqs(c); @@ -1254,9 +1541,13 @@ err_napi_del: static void mlx5e_close_channel(struct mlx5e_channel *c) { mlx5e_close_rq(&c->rq); + if (c->xdp) + mlx5e_close_sq(&c->xdp_sq); mlx5e_close_sqs(c); mlx5e_close_sq(&c->icosq); napi_disable(&c->napi); + if (c->xdp) + mlx5e_close_cq(&c->xdp_sq.cq); mlx5e_close_cq(&c->rq.cq); mlx5e_close_tx_cqs(c); mlx5e_close_cq(&c->icosq.cq); @@ -1330,6 +1621,7 @@ static void mlx5e_build_sq_param(struct mlx5e_priv *priv, param->max_inline = priv->params.tx_max_inline; param->min_inline_mode = priv->params.tx_min_inline_mode; + param->type = MLX5E_SQ_TXQ; } static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, @@ -1403,7 +1695,22 @@ static void mlx5e_build_icosq_param(struct mlx5e_priv *priv, MLX5_SET(wq, wq, log_wq_sz, log_wq_size); MLX5_SET(sqc, sqc, reg_umr, MLX5_CAP_ETH(priv->mdev, reg_umr_sq)); - param->icosq = true; + param->type = MLX5E_SQ_ICO; +} + +static void mlx5e_build_xdpsq_param(struct mlx5e_priv *priv, + struct mlx5e_sq_param *param) +{ + void *sqc = param->sqc; + void *wq = MLX5_ADDR_OF(sqc, sqc, wq); + + mlx5e_build_sq_param_common(priv, param); + MLX5_SET(wq, wq, log_wq_sz, priv->params.log_sq_size); + + param->max_inline = priv->params.tx_max_inline; + /* FOR XDP SQs will support only L2 inline mode */ + param->min_inline_mode = MLX5_INLINE_MODE_NONE; + param->type = MLX5E_SQ_XDP; } static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_channel_param *cparam) @@ -1412,6 +1719,7 @@ static void mlx5e_build_channel_param(struct mlx5e_priv *priv, struct mlx5e_chan mlx5e_build_rq_param(priv, &cparam->rq); mlx5e_build_sq_param(priv, &cparam->sq); + mlx5e_build_xdpsq_param(priv, &cparam->xdp_sq); mlx5e_build_icosq_param(priv, &cparam->icosq, icosq_log_wq_sz); mlx5e_build_rx_cq_param(priv, &cparam->rx_cq); mlx5e_build_tx_cq_param(priv, &cparam->tx_cq); @@ -1885,6 +2193,9 @@ int mlx5e_close(struct net_device *netdev) struct mlx5e_priv *priv = netdev_priv(netdev); int err; + if (!netif_device_present(netdev)) + return -ENODEV; + mutex_lock(&priv->state_lock); err = mlx5e_close_locked(netdev); mutex_unlock(&priv->state_lock); @@ -1999,14 +2310,15 @@ static void mlx5e_close_drop_rq(struct mlx5e_priv *priv) static int mlx5e_create_tis(struct mlx5e_priv *priv, int tc) { struct mlx5_core_dev *mdev = priv->mdev; - u32 in[MLX5_ST_SZ_DW(create_tis_in)]; + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0}; void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); - memset(in, 0, sizeof(in)); - MLX5_SET(tisc, tisc, prio, tc << 1); MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.td.tdn); + if (mlx5_lag_is_lacp_owner(mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); + return mlx5_core_create_tis(mdev, in, sizeof(in), &priv->tisn[tc]); } @@ -2605,11 +2917,15 @@ static int mlx5e_set_vf_mac(struct net_device *dev, int vf, u8 *mac) return mlx5_eswitch_set_vport_mac(mdev->priv.eswitch, vf + 1, mac); } -static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos) +static int mlx5e_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5_core_dev *mdev = priv->mdev; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return mlx5_eswitch_set_vport_vlan(mdev->priv.eswitch, vf + 1, vlan, qos); } @@ -2786,6 +3102,106 @@ static void mlx5e_tx_timeout(struct net_device *dev) schedule_work(&priv->tx_timeout_work); } +static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog) +{ + struct mlx5e_priv *priv = netdev_priv(netdev); + struct bpf_prog *old_prog; + int err = 0; + bool reset, was_opened; + int i; + + mutex_lock(&priv->state_lock); + + if ((netdev->features & NETIF_F_LRO) && prog) { + netdev_warn(netdev, "can't set XDP while LRO is on, disable LRO first\n"); + err = -EINVAL; + goto unlock; + } + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + /* no need for full reset when exchanging programs */ + reset = (!priv->xdp_prog || !prog); + + if (was_opened && reset) + mlx5e_close_locked(netdev); + + /* exchange programs */ + old_prog = xchg(&priv->xdp_prog, prog); + if (prog) + bpf_prog_add(prog, 1); + if (old_prog) + bpf_prog_put(old_prog); + + if (reset) /* change RQ type according to priv->xdp_prog */ + mlx5e_set_rq_priv_params(priv); + + if (was_opened && reset) + mlx5e_open_locked(netdev); + + if (!test_bit(MLX5E_STATE_OPENED, &priv->state) || reset) + goto unlock; + + /* exchanging programs w/o reset, we update ref counts on behalf + * of the channels RQs here. + */ + bpf_prog_add(prog, priv->params.num_channels); + for (i = 0; i < priv->params.num_channels; i++) { + struct mlx5e_channel *c = priv->channel[i]; + + set_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + napi_synchronize(&c->napi); + /* prevent mlx5e_poll_rx_cq from accessing rq->xdp_prog */ + + old_prog = xchg(&c->rq.xdp_prog, prog); + + clear_bit(MLX5E_RQ_STATE_FLUSH, &c->rq.state); + /* napi_schedule in case we have missed anything */ + set_bit(MLX5E_CHANNEL_NAPI_SCHED, &c->flags); + napi_schedule(&c->napi); + + if (old_prog) + bpf_prog_put(old_prog); + } + +unlock: + mutex_unlock(&priv->state_lock); + return err; +} + +static bool mlx5e_xdp_attached(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + + return !!priv->xdp_prog; +} + +static int mlx5e_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx5e_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx5e_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* Fake "interrupt" called by netpoll (eg netconsole) to send skbs without + * reenabling interrupts. + */ +static void mlx5e_netpoll(struct net_device *dev) +{ + struct mlx5e_priv *priv = netdev_priv(dev); + int i; + + for (i = 0; i < priv->params.num_channels; i++) + napi_schedule(&priv->channel[i]->napi); +} +#endif + static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_open = mlx5e_open, .ndo_stop = mlx5e_close, @@ -2805,6 +3221,10 @@ static const struct net_device_ops mlx5e_netdev_ops_basic = { .ndo_rx_flow_steer = mlx5e_rx_flow_steer, #endif .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static const struct net_device_ops mlx5e_netdev_ops_sriov = { @@ -2836,6 +3256,10 @@ static const struct net_device_ops mlx5e_netdev_ops_sriov = { .ndo_set_vf_link_state = mlx5e_set_vf_link_state, .ndo_get_vf_stats = mlx5e_get_vf_stats, .ndo_tx_timeout = mlx5e_tx_timeout, + .ndo_xdp = mlx5e_xdp, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = mlx5e_netpoll, +#endif }; static int mlx5e_check_required_hca_cap(struct mlx5_core_dev *mdev) @@ -2910,13 +3334,6 @@ void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, indirection_rqt[i] = i % num_channels; } -static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) -{ - return MLX5_CAP_GEN(mdev, striding_rq) && - MLX5_CAP_GEN(mdev, umr_ptr_rlky) && - MLX5_CAP_ETH(mdev, reg_umr_sq); -} - static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) { enum pcie_link_width width; @@ -2996,11 +3413,13 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, MLX5_CQ_PERIOD_MODE_START_FROM_CQE : MLX5_CQ_PERIOD_MODE_START_FROM_EQE; - priv->params.log_sq_size = - MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; - priv->params.rq_wq_type = mlx5e_check_fragmented_striding_rq_cap(mdev) ? - MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : - MLX5_WQ_TYPE_LINKED_LIST; + priv->mdev = mdev; + priv->netdev = netdev; + priv->params.num_channels = profile->max_nch(mdev); + priv->profile = profile; + priv->ppriv = ppriv; + + priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; /* set CQE compression */ priv->params.rx_cqe_compress_admin = false; @@ -3013,33 +3432,11 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, priv->params.rx_cqe_compress_admin = cqe_compress_heuristic(link_speed, pci_bw); } - priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; - switch (priv->params.rq_wq_type) { - case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; - priv->params.mpwqe_log_stride_sz = - priv->params.rx_cqe_compress ? - MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : - MLX5_MPWRQ_LOG_STRIDE_SIZE; - priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - - priv->params.mpwqe_log_stride_sz; + mlx5e_set_rq_priv_params(priv); + if (priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ) priv->params.lro_en = true; - break; - default: /* MLX5_WQ_TYPE_LINKED_LIST */ - priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; - } - - mlx5_core_info(mdev, - "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", - priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, - BIT(priv->params.log_rq_size), - BIT(priv->params.mpwqe_log_stride_sz), - priv->params.rx_cqe_compress_admin); - - priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, - BIT(priv->params.log_rq_size)); priv->params.rx_am_enabled = MLX5_CAP_GEN(mdev, cq_moderation); mlx5e_set_rx_cq_mode_params(&priv->params, cq_period_mode); @@ -3059,19 +3456,16 @@ static void mlx5e_build_nic_netdev_priv(struct mlx5_core_dev *mdev, mlx5e_build_default_indir_rqt(mdev, priv->params.indirection_rqt, MLX5E_INDIR_RQT_SIZE, profile->max_nch(mdev)); - priv->params.lro_wqe_sz = - MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ; + priv->params.lro_wqe_sz = + MLX5E_PARAMS_DEFAULT_LRO_WQE_SZ - + /* Extra room needed for build_skb */ + MLX5_RX_HEADROOM - + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); /* Initialize pflags */ MLX5E_SET_PRIV_FLAG(priv, MLX5E_PFLAG_RX_CQE_BASED_MODER, priv->params.rx_cq_period_mode == MLX5_CQ_PERIOD_MODE_START_FROM_CQE); - priv->mdev = mdev; - priv->netdev = netdev; - priv->params.num_channels = profile->max_nch(mdev); - priv->profile = profile; - priv->ppriv = ppriv; - #ifdef CONFIG_MLX5_CORE_EN_DCB mlx5e_ets_init(priv); #endif @@ -3211,37 +3605,37 @@ static void mlx5e_destroy_q_counter(struct mlx5e_priv *priv) static int mlx5e_create_umr_mkey(struct mlx5e_priv *priv) { struct mlx5_core_dev *mdev = priv->mdev; - struct mlx5_create_mkey_mbox_in *in; - struct mlx5_mkey_seg *mkc; - int inlen = sizeof(*in); u64 npages = MLX5E_REQUIRED_MTTS(priv->profile->max_nch(mdev), BIT(MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW)); + int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); + void *mkc; + u32 *in; int err; in = mlx5_vzalloc(inlen); if (!in) return -ENOMEM; - mkc = &in->seg; - mkc->status = MLX5_MKEY_STATUS_FREE; - mkc->flags = MLX5_PERM_UMR_EN | - MLX5_PERM_LOCAL_READ | - MLX5_PERM_LOCAL_WRITE | - MLX5_ACCESS_MODE_MTT; + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); npages = min_t(u32, ALIGN(U16_MAX, 4) * 2, npages); - mkc->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); - mkc->flags_pd = cpu_to_be32(mdev->mlx5e_res.pdn); - mkc->len = cpu_to_be64(npages << PAGE_SHIFT); - mkc->xlt_oct_size = cpu_to_be32(MLX5_MTT_OCTW(npages)); - mkc->log2_page_size = PAGE_SHIFT; + MLX5_SET(mkc, mkc, free, 1); + MLX5_SET(mkc, mkc, umr_en, 1); + MLX5_SET(mkc, mkc, lw, 1); + MLX5_SET(mkc, mkc, lr, 1); + MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT); - err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen, NULL, - NULL, NULL); + MLX5_SET(mkc, mkc, qpn, 0xffffff); + MLX5_SET(mkc, mkc, pd, mdev->mlx5e_res.pdn); + MLX5_SET64(mkc, mkc, len, npages << PAGE_SHIFT); + MLX5_SET(mkc, mkc, translations_octword_size, + MLX5_MTT_OCTW(npages)); + MLX5_SET(mkc, mkc, log_page_size, PAGE_SHIFT); + + err = mlx5_core_create_mkey(mdev, &priv->umr_mkey, in, inlen); kvfree(in); - return err; } @@ -3360,6 +3754,8 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) struct mlx5_eswitch *esw = mdev->priv.eswitch; struct mlx5_eswitch_rep rep; + mlx5_lag_add(mdev, netdev); + if (mlx5e_vxlan_allowed(mdev)) { rtnl_lock(); udp_tunnel_get_rx_info(netdev); @@ -3373,9 +3769,9 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv) mlx5_query_nic_vport_mac_address(mdev, 0, rep.hw_id); rep.load = mlx5e_nic_rep_load; rep.unload = mlx5e_nic_rep_unload; - rep.vport = 0; + rep.vport = FDB_UPLINK_VPORT; rep.priv_data = priv; - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, 0, &rep); } } @@ -3383,6 +3779,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv) { queue_work(priv->wq, &priv->set_rx_mode_work); mlx5e_disable_async_events(priv); + mlx5_lag_remove(priv->mdev); } static const struct mlx5e_profile mlx5e_nic_profile = { @@ -3399,13 +3796,13 @@ static const struct mlx5e_profile mlx5e_nic_profile = { .max_tc = MLX5E_MAX_NUM_TC, }; -void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, - const struct mlx5e_profile *profile, void *ppriv) +struct net_device *mlx5e_create_netdev(struct mlx5_core_dev *mdev, + const struct mlx5e_profile *profile, + void *ppriv) { + int nch = profile->max_nch(mdev); struct net_device *netdev; struct mlx5e_priv *priv; - int nch = profile->max_nch(mdev); - int err; netdev = alloc_etherdev_mqs(sizeof(struct mlx5e_priv), nch * profile->max_tc, @@ -3423,12 +3820,31 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, priv->wq = create_singlethread_workqueue("mlx5e"); if (!priv->wq) - goto err_free_netdev; + goto err_cleanup_nic; + + return netdev; + +err_cleanup_nic: + profile->cleanup(priv); + free_netdev(netdev); + + return NULL; +} + +int mlx5e_attach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) +{ + const struct mlx5e_profile *profile; + struct mlx5e_priv *priv; + int err; + + priv = netdev_priv(netdev); + profile = priv->profile; + clear_bit(MLX5E_STATE_DESTROYING, &priv->state); err = mlx5e_create_umr_mkey(priv); if (err) { mlx5_core_err(mdev, "create umr mkey failed, %d\n", err); - goto err_destroy_wq; + goto out; } err = profile->init_tx(priv); @@ -3451,20 +3867,16 @@ void *mlx5e_create_netdev(struct mlx5_core_dev *mdev, mlx5e_set_dev_port_mtu(netdev); - err = register_netdev(netdev); - if (err) { - mlx5_core_err(mdev, "register_netdev failed, %d\n", err); - goto err_dealloc_q_counters; - } - if (profile->enable) profile->enable(priv); - return priv; + rtnl_lock(); + if (netif_running(netdev)) + mlx5e_open(netdev); + netif_device_attach(netdev); + rtnl_unlock(); -err_dealloc_q_counters: - mlx5e_destroy_q_counter(priv); - profile->cleanup_rx(priv); + return 0; err_close_drop_rq: mlx5e_close_drop_rq(priv); @@ -3475,13 +3887,8 @@ err_cleanup_tx: err_destroy_umr_mkey: mlx5_core_destroy_mkey(mdev, &priv->umr_mkey); -err_destroy_wq: - destroy_workqueue(priv->wq); - -err_free_netdev: - free_netdev(netdev); - - return NULL; +out: + return err; } static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) @@ -3503,51 +3910,26 @@ static void mlx5e_register_vport_rep(struct mlx5_core_dev *mdev) rep.unload = mlx5e_vport_rep_unload; rep.vport = vport; ether_addr_copy(rep.hw_id, mac); - mlx5_eswitch_register_vport_rep(esw, &rep); + mlx5_eswitch_register_vport_rep(esw, vport, &rep); } } -static void *mlx5e_add(struct mlx5_core_dev *mdev) -{ - struct mlx5_eswitch *esw = mdev->priv.eswitch; - void *ppriv = NULL; - void *ret; - - if (mlx5e_check_required_hca_cap(mdev)) - return NULL; - - if (mlx5e_create_mdev_resources(mdev)) - return NULL; - - mlx5e_register_vport_rep(mdev); - - if (MLX5_CAP_GEN(mdev, vport_group_manager)) - ppriv = &esw->offloads.vport_reps[0]; - - ret = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); - if (!ret) { - mlx5e_destroy_mdev_resources(mdev); - return NULL; - } - return ret; -} - -void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +void mlx5e_detach_netdev(struct mlx5_core_dev *mdev, struct net_device *netdev) { + struct mlx5e_priv *priv = netdev_priv(netdev); const struct mlx5e_profile *profile = priv->profile; - struct net_device *netdev = priv->netdev; set_bit(MLX5E_STATE_DESTROYING, &priv->state); if (profile->disable) profile->disable(priv); flush_workqueue(priv->wq); - if (test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) { - netif_device_detach(netdev); + + rtnl_lock(); + if (netif_running(netdev)) mlx5e_close(netdev); - } else { - unregister_netdev(netdev); - } + netif_device_detach(netdev); + rtnl_unlock(); mlx5e_destroy_q_counter(priv); profile->cleanup_rx(priv); @@ -3555,12 +3937,109 @@ void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) profile->cleanup_tx(priv); mlx5_core_destroy_mkey(priv->mdev, &priv->umr_mkey); cancel_delayed_work_sync(&priv->update_stats_work); +} + +/* mlx5e_attach and mlx5e_detach scope should be only creating/destroying + * hardware contexts and to connect it to the current netdev. + */ +static int mlx5e_attach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + int err; + + if (netif_device_present(netdev)) + return 0; + + err = mlx5e_create_mdev_resources(mdev); + if (err) + return err; + + err = mlx5e_attach_netdev(mdev, netdev); + if (err) { + mlx5e_destroy_mdev_resources(mdev); + return err; + } + + return 0; +} + +static void mlx5e_detach(struct mlx5_core_dev *mdev, void *vpriv) +{ + struct mlx5e_priv *priv = vpriv; + struct net_device *netdev = priv->netdev; + + if (!netif_device_present(netdev)) + return; + + mlx5e_detach_netdev(mdev, netdev); + mlx5e_destroy_mdev_resources(mdev); +} + +static void *mlx5e_add(struct mlx5_core_dev *mdev) +{ + struct mlx5_eswitch *esw = mdev->priv.eswitch; + int total_vfs = MLX5_TOTAL_VPORTS(mdev); + void *ppriv = NULL; + void *priv; + int vport; + int err; + struct net_device *netdev; + + err = mlx5e_check_required_hca_cap(mdev); + if (err) + return NULL; + + mlx5e_register_vport_rep(mdev); + + if (MLX5_CAP_GEN(mdev, vport_group_manager)) + ppriv = &esw->offloads.vport_reps[0]; + + netdev = mlx5e_create_netdev(mdev, &mlx5e_nic_profile, ppriv); + if (!netdev) { + mlx5_core_err(mdev, "mlx5e_create_netdev failed\n"); + goto err_unregister_reps; + } + + priv = netdev_priv(netdev); + + err = mlx5e_attach(mdev, priv); + if (err) { + mlx5_core_err(mdev, "mlx5e_attach failed, %d\n", err); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + mlx5_core_err(mdev, "register_netdev failed, %d\n", err); + goto err_detach; + } + + return priv; + +err_detach: + mlx5e_detach(mdev, priv); + +err_destroy_netdev: + mlx5e_destroy_netdev(mdev, priv); + +err_unregister_reps: + for (vport = 1; vport < total_vfs; vport++) + mlx5_eswitch_unregister_vport_rep(esw, vport); + + return NULL; +} + +void mlx5e_destroy_netdev(struct mlx5_core_dev *mdev, struct mlx5e_priv *priv) +{ + const struct mlx5e_profile *profile = priv->profile; + struct net_device *netdev = priv->netdev; + + unregister_netdev(netdev); destroy_workqueue(priv->wq); if (profile->cleanup) profile->cleanup(priv); - - if (!test_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &mdev->intf_state)) - free_netdev(netdev); + free_netdev(netdev); } static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) @@ -3570,12 +4049,11 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv) struct mlx5e_priv *priv = vpriv; int vport; - mlx5e_destroy_netdev(mdev, priv); - for (vport = 1; vport < total_vfs; vport++) mlx5_eswitch_unregister_vport_rep(esw, vport); - mlx5e_destroy_mdev_resources(mdev); + mlx5e_detach(mdev, vpriv); + mlx5e_destroy_netdev(mdev, priv); } static void *mlx5e_get_netdev(void *vpriv) @@ -3588,6 +4066,8 @@ static void *mlx5e_get_netdev(void *vpriv) static struct mlx5_interface mlx5e_interface = { .add = mlx5e_add, .remove = mlx5e_remove, + .attach = mlx5e_attach, + .detach = mlx5e_detach, .event = mlx5e_async_event, .protocol = MLX5_INTERFACE_PROTOCOL_ETH, .get_dev = mlx5e_get_netdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 134de4a11f1d..3c97da103d30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -413,19 +413,50 @@ static struct mlx5e_profile mlx5e_rep_profile = { int mlx5e_vport_rep_load(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { - rep->priv_data = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); - if (!rep->priv_data) { - pr_warn("Failed to create representor for vport %d\n", + struct net_device *netdev; + int err; + + netdev = mlx5e_create_netdev(esw->dev, &mlx5e_rep_profile, rep); + if (!netdev) { + pr_warn("Failed to create representor netdev for vport %d\n", rep->vport); return -EINVAL; } + + rep->priv_data = netdev_priv(netdev); + + err = mlx5e_attach_netdev(esw->dev, netdev); + if (err) { + pr_warn("Failed to attach representor netdev for vport %d\n", + rep->vport); + goto err_destroy_netdev; + } + + err = register_netdev(netdev); + if (err) { + pr_warn("Failed to register representor netdev for vport %d\n", + rep->vport); + goto err_detach_netdev; + } + return 0; + +err_detach_netdev: + mlx5e_detach_netdev(esw->dev, netdev); + +err_destroy_netdev: + mlx5e_destroy_netdev(esw->dev, rep->priv_data); + + return err; + } void mlx5e_vport_rep_unload(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep) { struct mlx5e_priv *priv = rep->priv_data; + struct net_device *netdev = priv->netdev; + mlx5e_detach_netdev(esw->dev, netdev); mlx5e_destroy_netdev(esw->dev, priv); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index e7c969df3dad..c6de6fba5843 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include "en.h" #include "en_tc.h" +#include "eswitch.h" static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) { @@ -179,51 +180,99 @@ unlock: mutex_unlock(&priv->state_lock); } -int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) -{ - struct sk_buff *skb; - dma_addr_t dma_addr; +#define RQ_PAGE_SIZE(rq) ((1 << rq->buff.page_order) << PAGE_SHIFT) - skb = napi_alloc_skb(rq->cq.napi, rq->wqe_sz); - if (unlikely(!skb)) +static inline bool mlx5e_rx_cache_put(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + u32 tail_next = (cache->tail + 1) & (MLX5E_CACHE_SIZE - 1); + + if (tail_next == cache->head) { + rq->stats.cache_full++; + return false; + } + + cache->page_cache[cache->tail] = *dma_info; + cache->tail = tail_next; + return true; +} + +static inline bool mlx5e_rx_cache_get(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct mlx5e_page_cache *cache = &rq->page_cache; + + if (unlikely(cache->head == cache->tail)) { + rq->stats.cache_empty++; + return false; + } + + if (page_ref_count(cache->page_cache[cache->head].page) != 1) { + rq->stats.cache_busy++; + return false; + } + + *dma_info = cache->page_cache[cache->head]; + cache->head = (cache->head + 1) & (MLX5E_CACHE_SIZE - 1); + rq->stats.cache_reuse++; + + dma_sync_single_for_device(rq->pdev, dma_info->addr, + RQ_PAGE_SIZE(rq), + DMA_FROM_DEVICE); + return true; +} + +static inline int mlx5e_page_alloc_mapped(struct mlx5e_rq *rq, + struct mlx5e_dma_info *dma_info) +{ + struct page *page; + + if (mlx5e_rx_cache_get(rq, dma_info)) + return 0; + + page = dev_alloc_pages(rq->buff.page_order); + if (unlikely(!page)) return -ENOMEM; - dma_addr = dma_map_single(rq->pdev, - /* hw start padding */ - skb->data, - /* hw end padding */ - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely(dma_mapping_error(rq->pdev, dma_addr))) - goto err_free_skb; - - *((dma_addr_t *)skb->cb) = dma_addr; - wqe->data.addr = cpu_to_be64(dma_addr); - wqe->data.lkey = rq->mkey_be; - - rq->skb[ix] = skb; + dma_info->page = page; + dma_info->addr = dma_map_page(rq->pdev, page, 0, + RQ_PAGE_SIZE(rq), rq->buff.map_dir); + if (unlikely(dma_mapping_error(rq->pdev, dma_info->addr))) { + put_page(page); + return -ENOMEM; + } return 0; +} -err_free_skb: - dev_kfree_skb(skb); +void mlx5e_page_release(struct mlx5e_rq *rq, struct mlx5e_dma_info *dma_info, + bool recycle) +{ + if (likely(recycle) && mlx5e_rx_cache_put(rq, dma_info)) + return; - return -ENOMEM; + dma_unmap_page(rq->pdev, dma_info->addr, RQ_PAGE_SIZE(rq), + rq->buff.map_dir); + put_page(dma_info->page); +} + +int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) +{ + struct mlx5e_dma_info *di = &rq->dma_info[ix]; + + if (unlikely(mlx5e_page_alloc_mapped(rq, di))) + return -ENOMEM; + + wqe->data.addr = cpu_to_be64(di->addr + MLX5_RX_HEADROOM); + return 0; } void mlx5e_dealloc_rx_wqe(struct mlx5e_rq *rq, u16 ix) { - struct sk_buff *skb = rq->skb[ix]; + struct mlx5e_dma_info *di = &rq->dma_info[ix]; - if (skb) { - rq->skb[ix] = NULL; - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - dev_kfree_skb(skb); - } + mlx5e_page_release(rq, di, true); } static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) @@ -231,44 +280,11 @@ static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; } -static inline void -mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - dma_sync_single_for_cpu(pdev, wi->dma_info.addr + wqe_offset, - len, DMA_FROM_DEVICE); -} - -static inline void -mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, - struct mlx5e_mpw_info *wi, - u32 wqe_offset, u32 len) -{ - /* No dma pre sync for fragmented MPWQE */ -} - -static inline void -mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) -{ - unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); - - wi->skbs_frags[page_idx]++; - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, - &wi->dma_info.page[page_idx], frag_offset, - len, truesize); -} - -static inline void -mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 frag_offset, - u32 len) +static inline void mlx5e_add_skb_frag_mpwqe(struct mlx5e_rq *rq, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 frag_offset, + u32 len) { unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); @@ -282,24 +298,11 @@ mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, } static inline void -mlx5e_copy_skb_header_linear_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) -{ - struct page *page = &wi->dma_info.page[page_idx]; - - skb_copy_to_linear_data(skb, page_address(page) + offset, - ALIGN(headlen, sizeof(long))); -} - -static inline void -mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, - struct sk_buff *skb, - struct mlx5e_mpw_info *wi, - u32 page_idx, u32 offset, - u32 headlen) +mlx5e_copy_skb_header_mpwqe(struct device *pdev, + struct sk_buff *skb, + struct mlx5e_mpw_info *wi, + u32 page_idx, u32 offset, + u32 headlen) { u16 headlen_pg = min_t(u32, headlen, PAGE_SIZE - offset); struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[page_idx]; @@ -324,46 +327,9 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, } } -static u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) -{ - return rq->mpwqe_mtt_offset + - wqe_ix * ALIGN(MLX5_MPWRQ_PAGES_PER_WQE, 8); -} - -static void mlx5e_build_umr_wqe(struct mlx5e_rq *rq, - struct mlx5e_sq *sq, - struct mlx5e_umr_wqe *wqe, - u16 ix) -{ - struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; - struct mlx5_wqe_umr_ctrl_seg *ucseg = &wqe->uctrl; - struct mlx5_wqe_data_seg *dseg = &wqe->data; - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - u8 ds_cnt = DIV_ROUND_UP(sizeof(*wqe), MLX5_SEND_WQE_DS); - u32 umr_wqe_mtt_offset = mlx5e_get_wqe_mtt_offset(rq, ix); - - memset(wqe, 0, sizeof(*wqe)); - cseg->opmod_idx_opcode = - cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | - MLX5_OPCODE_UMR); - cseg->qpn_ds = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) | - ds_cnt); - cseg->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; - cseg->imm = rq->umr_mkey_be; - - ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN; - ucseg->klm_octowords = - cpu_to_be16(MLX5_MTT_OCTW(MLX5_MPWRQ_PAGES_PER_WQE)); - ucseg->bsf_octowords = - cpu_to_be16(MLX5_MTT_OCTW(umr_wqe_mtt_offset)); - ucseg->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - - dseg->lkey = sq->mkey_be; - dseg->addr = cpu_to_be64(wi->umr.mtt_addr); -} - -static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) +static inline void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) { + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; struct mlx5e_sq *sq = &rq->channel->icosq; struct mlx5_wq_cyc *wq = &sq->wq; struct mlx5e_umr_wqe *wqe; @@ -372,135 +338,74 @@ static void mlx5e_post_umr_wqe(struct mlx5e_rq *rq, u16 ix) /* fill sq edge with nops to avoid wqe wrap around */ while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_NOP; - sq->ico_wqe_info[pi].num_wqebbs = 1; + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP; + sq->db.ico_wqe[pi].num_wqebbs = 1; mlx5e_send_nop(sq, true); } wqe = mlx5_wq_cyc_get_wqe(wq, pi); - mlx5e_build_umr_wqe(rq, sq, wqe, ix); - sq->ico_wqe_info[pi].opcode = MLX5_OPCODE_UMR; - sq->ico_wqe_info[pi].num_wqebbs = num_wqebbs; + memcpy(wqe, &wi->umr.wqe, sizeof(*wqe)); + wqe->ctrl.opmod_idx_opcode = + cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) | + MLX5_OPCODE_UMR); + + sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR; + sq->db.ico_wqe[pi].num_wqebbs = num_wqebbs; sq->pc += num_wqebbs; mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); } -static inline int mlx5e_get_wqe_mtt_sz(void) +static int mlx5e_alloc_rx_umr_mpwqe(struct mlx5e_rq *rq, + struct mlx5e_rx_wqe *wqe, + u16 ix) { - /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. - * To avoid copying garbage after the mtt array, we allocate - * a little more. - */ - return ALIGN(MLX5_MPWRQ_PAGES_PER_WQE * sizeof(__be64), - MLX5_UMR_MTT_ALIGNMENT); -} - -static int mlx5e_alloc_and_map_page(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi, - int i) -{ - struct page *page; - - page = dev_alloc_page(); - if (unlikely(!page)) - return -ENOMEM; - - wi->umr.dma_info[i].page = page; - wi->umr.dma_info[i].addr = dma_map_page(rq->pdev, page, 0, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.dma_info[i].addr))) { - put_page(page); - return -ENOMEM; - } - wi->umr.mtt[i] = cpu_to_be64(wi->umr.dma_info[i].addr | MLX5_EN_WR); - - return 0; -} - -static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; u64 dma_offset = (u64)mlx5e_get_wqe_mtt_offset(rq, ix) << PAGE_SHIFT; + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); + int err; int i; - wi->umr.dma_info = kmalloc(sizeof(*wi->umr.dma_info) * - MLX5_MPWRQ_PAGES_PER_WQE, - GFP_ATOMIC); - if (unlikely(!wi->umr.dma_info)) - goto err_out; - - /* We allocate more than mtt_sz as we will align the pointer */ - wi->umr.mtt_no_align = kzalloc(mtt_sz + MLX5_UMR_ALIGN - 1, - GFP_ATOMIC); - if (unlikely(!wi->umr.mtt_no_align)) - goto err_free_umr; - - wi->umr.mtt = PTR_ALIGN(wi->umr.mtt_no_align, MLX5_UMR_ALIGN); - wi->umr.mtt_addr = dma_map_single(rq->pdev, wi->umr.mtt, mtt_sz, - PCI_DMA_TODEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->umr.mtt_addr))) - goto err_free_mtt; - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + err = mlx5e_page_alloc_mapped(rq, dma_info); + if (unlikely(err)) goto err_unmap; - page_ref_add(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); + wi->umr.mtt[i] = cpu_to_be64(dma_info->addr | MLX5_EN_WR); + page_ref_add(dma_info->page, pg_strides); wi->skbs_frags[i] = 0; } wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_fragmented_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_fragmented_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_fragmented_mpwqe; - wi->free_wqe = mlx5e_free_rx_fragmented_mpwqe; - wqe->data.lkey = rq->umr_mkey_be; wqe->data.addr = cpu_to_be64(dma_offset); return 0; err_unmap: while (--i >= 0) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq)); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides); + mlx5e_page_release(rq, dma_info, true); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); -err_free_mtt: - kfree(wi->umr.mtt_no_align); - -err_free_umr: - kfree(wi->umr.dma_info); - -err_out: - return -ENOMEM; + return err; } -void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) +void mlx5e_free_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi) { - int mtt_sz = mlx5e_get_wqe_mtt_sz(); + int pg_strides = mlx5e_mpwqe_strides_per_page(rq); int i; for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, - PCI_DMA_FROMDEVICE); - page_ref_sub(wi->umr.dma_info[i].page, - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(wi->umr.dma_info[i].page); + struct mlx5e_dma_info *dma_info = &wi->umr.dma_info[i]; + + page_ref_sub(dma_info->page, pg_strides - wi->skbs_frags[i]); + mlx5e_page_release(rq, dma_info, true); } - dma_unmap_single(rq->pdev, wi->umr.mtt_addr, mtt_sz, PCI_DMA_TODEVICE); - kfree(wi->umr.mtt_no_align); - kfree(wi->umr.dma_info); } -void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) +void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq) { struct mlx5_wq_ll *wq = &rq->wq; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(wq, wq->head); @@ -508,12 +413,11 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) clear_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) { - mlx5e_free_rx_fragmented_mpwqe(rq, &rq->wqe_info[wq->head]); + mlx5e_free_rx_mpwqe(rq, &rq->mpwqe.info[wq->head]); return; } mlx5_wq_ll_push(wq, be16_to_cpu(wqe->next.next_wqe_index)); - rq->stats.mpwqe_frag++; /* ensure wqes are visible to device before updating doorbell record */ dma_wmb(); @@ -521,84 +425,23 @@ void mlx5e_post_rx_fragmented_mpwqe(struct mlx5e_rq *rq) mlx5_wq_ll_update_db_record(wq); } -static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_rx_wqe *wqe, - u16 ix) -{ - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; - gfp_t gfp_mask; - int i; - - gfp_mask = GFP_ATOMIC | __GFP_COLD | __GFP_MEMALLOC; - wi->dma_info.page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, - MLX5_MPWRQ_WQE_PAGE_ORDER); - if (unlikely(!wi->dma_info.page)) - return -ENOMEM; - - wi->dma_info.addr = dma_map_page(rq->pdev, wi->dma_info.page, 0, - rq->wqe_sz, PCI_DMA_FROMDEVICE); - if (unlikely(dma_mapping_error(rq->pdev, wi->dma_info.addr))) { - put_page(wi->dma_info.page); - return -ENOMEM; - } - - /* We split the high-order page into order-0 ones and manage their - * reference counter to minimize the memory held by small skb fragments - */ - split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_add(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq)); - wi->skbs_frags[i] = 0; - } - - wi->consumed_strides = 0; - wi->dma_pre_sync = mlx5e_dma_pre_sync_linear_mpwqe; - wi->add_skb_frag = mlx5e_add_skb_frag_linear_mpwqe; - wi->copy_skb_header = mlx5e_copy_skb_header_linear_mpwqe; - wi->free_wqe = mlx5e_free_rx_linear_mpwqe; - wqe->data.lkey = rq->mkey_be; - wqe->data.addr = cpu_to_be64(wi->dma_info.addr); - - return 0; -} - -void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, - struct mlx5e_mpw_info *wi) -{ - int i; - - dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, - PCI_DMA_FROMDEVICE); - for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - page_ref_sub(&wi->dma_info.page[i], - mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i]); - put_page(&wi->dma_info.page[i]); - } -} - int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { int err; - err = mlx5e_alloc_rx_linear_mpwqe(rq, wqe, ix); - if (unlikely(err)) { - err = mlx5e_alloc_rx_fragmented_mpwqe(rq, wqe, ix); - if (unlikely(err)) - return err; - set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); - mlx5e_post_umr_wqe(rq, ix); - return -EBUSY; - } - - return 0; + err = mlx5e_alloc_rx_umr_mpwqe(rq, wqe, ix); + if (unlikely(err)) + return err; + set_bit(MLX5E_RQ_STATE_UMR_WQE_IN_PROGRESS, &rq->state); + mlx5e_post_umr_wqe(rq, ix); + return -EBUSY; } void mlx5e_dealloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix) { - struct mlx5e_mpw_info *wi = &rq->wqe_info[ix]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix]; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); } #define RQ_CANNOT_POST(rq) \ @@ -617,9 +460,10 @@ bool mlx5e_post_rx_wqes(struct mlx5e_rq *rq) int err; err = rq->alloc_wqe(rq, wqe, wq->head); + if (err == -EBUSY) + return true; if (unlikely(err)) { - if (err != -EBUSY) - rq->stats.buff_alloc_err++; + rq->stats.buff_alloc_err++; break; } @@ -786,11 +630,185 @@ static inline void mlx5e_complete_rx_cqe(struct mlx5e_rq *rq, rq->stats.packets++; rq->stats.bytes += cqe_bcnt; mlx5e_build_rx_skb(cqe, cqe_bcnt, rq, skb); - napi_gro_receive(rq->cq.napi, skb); +} + +static inline void mlx5e_xmit_xdp_doorbell(struct mlx5e_sq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + struct mlx5e_tx_wqe *wqe; + u16 pi = (sq->pc - MLX5E_XDP_TX_WQEBBS) & wq->sz_m1; /* last pi */ + + wqe = mlx5_wq_cyc_get_wqe(wq, pi); + + wqe->ctrl.fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE; + mlx5e_tx_notify_hw(sq, &wqe->ctrl, 0); +} + +static inline void mlx5e_xmit_xdp_frame(struct mlx5e_rq *rq, + struct mlx5e_dma_info *di, + unsigned int data_offset, + int len) +{ + struct mlx5e_sq *sq = &rq->channel->xdp_sq; + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = sq->pc & wq->sz_m1; + struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); + struct mlx5e_sq_wqe_info *wi = &sq->db.xdp.wqe_info[pi]; + + struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; + struct mlx5_wqe_eth_seg *eseg = &wqe->eth; + struct mlx5_wqe_data_seg *dseg; + + dma_addr_t dma_addr = di->addr + data_offset + MLX5E_XDP_MIN_INLINE; + unsigned int dma_len = len - MLX5E_XDP_MIN_INLINE; + void *data = page_address(di->page) + data_offset; + + if (unlikely(!mlx5e_sq_has_room_for(sq, MLX5E_XDP_TX_WQEBBS))) { + if (sq->db.xdp.doorbell) { + /* SQ is full, ring doorbell */ + mlx5e_xmit_xdp_doorbell(sq); + sq->db.xdp.doorbell = false; + } + rq->stats.xdp_tx_full++; + mlx5e_page_release(rq, di, true); + return; + } + + dma_sync_single_for_device(sq->pdev, dma_addr, dma_len, + PCI_DMA_TODEVICE); + + memset(wqe, 0, sizeof(*wqe)); + + /* copy the inline part */ + memcpy(eseg->inline_hdr_start, data, MLX5E_XDP_MIN_INLINE); + eseg->inline_hdr_sz = cpu_to_be16(MLX5E_XDP_MIN_INLINE); + + dseg = (struct mlx5_wqe_data_seg *)cseg + (MLX5E_XDP_TX_DS_COUNT - 1); + + /* write the dma part */ + dseg->addr = cpu_to_be64(dma_addr); + dseg->byte_count = cpu_to_be32(dma_len); + dseg->lkey = sq->mkey_be; + + cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_SEND); + cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | MLX5E_XDP_TX_DS_COUNT); + + sq->db.xdp.di[pi] = *di; + wi->opcode = MLX5_OPCODE_SEND; + wi->num_wqebbs = MLX5E_XDP_TX_WQEBBS; + sq->pc += MLX5E_XDP_TX_WQEBBS; + + sq->db.xdp.doorbell = true; + rq->stats.xdp_tx++; +} + +/* returns true if packet was consumed by xdp */ +static inline bool mlx5e_xdp_handle(struct mlx5e_rq *rq, + const struct bpf_prog *prog, + struct mlx5e_dma_info *di, + void *data, u16 len) +{ + struct xdp_buff xdp; + u32 act; + + if (!prog) + return false; + + xdp.data = data; + xdp.data_end = xdp.data + len; + act = bpf_prog_run_xdp(prog, &xdp); + switch (act) { + case XDP_PASS: + return false; + case XDP_TX: + mlx5e_xmit_xdp_frame(rq, di, MLX5_RX_HEADROOM, len); + return true; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + rq->stats.xdp_drop++; + mlx5e_page_release(rq, di, true); + return true; + } +} + +static inline +struct sk_buff *skb_from_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe, + u16 wqe_counter, u32 cqe_bcnt) +{ + struct bpf_prog *xdp_prog = READ_ONCE(rq->xdp_prog); + struct mlx5e_dma_info *di; + struct sk_buff *skb; + void *va, *data; + + di = &rq->dma_info[wqe_counter]; + va = page_address(di->page); + data = va + MLX5_RX_HEADROOM; + + dma_sync_single_range_for_cpu(rq->pdev, + di->addr, + MLX5_RX_HEADROOM, + rq->buff.wqe_sz, + DMA_FROM_DEVICE); + prefetch(data); + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { + rq->stats.wqe_err++; + mlx5e_page_release(rq, di, true); + return NULL; + } + + if (mlx5e_xdp_handle(rq, xdp_prog, di, data, cqe_bcnt)) + return NULL; /* page/packet was consumed by XDP */ + + skb = build_skb(va, RQ_PAGE_SIZE(rq)); + if (unlikely(!skb)) { + rq->stats.buff_alloc_err++; + mlx5e_page_release(rq, di, true); + return NULL; + } + + /* queue up for recycling ..*/ + page_ref_inc(di->page); + mlx5e_page_release(rq, di, true); + + skb_reserve(skb, MLX5_RX_HEADROOM); + skb_put(skb, cqe_bcnt); + + return skb; } void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { + struct mlx5e_rx_wqe *wqe; + __be16 wqe_counter_be; + struct sk_buff *skb; + u16 wqe_counter; + u32 cqe_bcnt; + + wqe_counter_be = cqe->wqe_counter; + wqe_counter = be16_to_cpu(wqe_counter_be); + wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); + + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) + goto wq_ll_pop; + + mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); + +wq_ll_pop: + mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, + &wqe->next.next_wqe_index); +} + +void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) +{ + struct net_device *netdev = rq->netdev; + struct mlx5e_priv *priv = netdev_priv(netdev); + struct mlx5_eswitch_rep *rep = priv->ppriv; struct mlx5e_rx_wqe *wqe; struct sk_buff *skb; __be16 wqe_counter_be; @@ -800,26 +818,19 @@ void mlx5e_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) wqe_counter_be = cqe->wqe_counter; wqe_counter = be16_to_cpu(wqe_counter_be); wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_counter); - skb = rq->skb[wqe_counter]; - prefetch(skb->data); - rq->skb[wqe_counter] = NULL; + cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - dma_unmap_single(rq->pdev, - *((dma_addr_t *)skb->cb), - rq->wqe_sz, - DMA_FROM_DEVICE); - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_RESP_SEND)) { - rq->stats.wqe_err++; - dev_kfree_skb(skb); + skb = skb_from_cqe(rq, cqe, wqe_counter, cqe_bcnt); + if (!skb) goto wq_ll_pop; - } - - cqe_bcnt = be32_to_cpu(cqe->byte_cnt); - skb_put(skb, cqe_bcnt); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + if (rep->vlan && skb_vlan_tag_present(skb)) + skb_vlan_pop(skb); + + napi_gro_receive(rq->cq.napi, skb); + wq_ll_pop: mlx5_wq_ll_pop(&rq->wq, wqe_counter_be, &wqe->next.next_wqe_index); @@ -831,7 +842,6 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); @@ -845,21 +855,20 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, page_idx++; frag_offset -= PAGE_SIZE; } - wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); while (byte_cnt) { u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, - pg_consumed_bytes); + mlx5e_add_skb_frag_mpwqe(rq, skb, wi, page_idx, frag_offset, + pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; page_idx++; } /* copy header */ - wi->copy_skb_header(rq->pdev, skb, wi, head_page_idx, head_offset, - headlen); + mlx5e_copy_skb_header_mpwqe(rq->pdev, skb, wi, head_page_idx, + head_offset, headlen); /* skb linear part was allocated with headlen and aligned to long */ skb->tail += headlen; skb->len += headlen; @@ -869,7 +878,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) { u16 cstrides = mpwrq_get_cqe_consumed_strides(cqe); u16 wqe_id = be16_to_cpu(cqe->wqe_id); - struct mlx5e_mpw_info *wi = &rq->wqe_info[wqe_id]; + struct mlx5e_mpw_info *wi = &rq->mpwqe.info[wqe_id]; struct mlx5e_rx_wqe *wqe = mlx5_wq_ll_get_wqe(&rq->wq, wqe_id); struct sk_buff *skb; u16 cqe_bcnt; @@ -899,18 +908,20 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_mpwqe_fill_rx_skb(rq, cqe, wi, cqe_bcnt, skb); mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); + napi_gro_receive(rq->cq.napi, skb); mpwrq_cqe_out: if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) return; - wi->free_wqe(rq, wi); + mlx5e_free_rx_mpwqe(rq, wi); mlx5_wq_ll_pop(&rq->wq, cqe->wqe_id, &wqe->next.next_wqe_index); } int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); + struct mlx5e_sq *xdp_sq = &rq->channel->xdp_sq; int work_done = 0; if (unlikely(test_bit(MLX5E_RQ_STATE_FLUSH, &rq->state))) @@ -937,6 +948,11 @@ int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) rq->handle_rx_cqe(rq, cqe); } + if (xdp_sq->db.xdp.doorbell) { + mlx5e_xmit_xdp_doorbell(xdp_sq); + xdp_sq->db.xdp.doorbell = false; + } + mlx5_cqwq_update_db_record(&cq->wq); /* ensure cq space is freed before enabling more cqes */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 499487ce3b53..57452fdc5154 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -65,6 +65,9 @@ struct mlx5e_sw_stats { u64 rx_csum_none; u64 rx_csum_complete; u64 rx_csum_unnecessary_inner; + u64 rx_xdp_drop; + u64 rx_xdp_tx; + u64 rx_xdp_tx_full; u64 tx_csum_partial; u64 tx_csum_partial_inner; u64 tx_queue_stopped; @@ -73,10 +76,13 @@ struct mlx5e_sw_stats { u64 tx_xmit_more; u64 rx_wqe_err; u64 rx_mpwqe_filler; - u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; u64 rx_cqe_compress_blks; u64 rx_cqe_compress_pkts; + u64 rx_cache_reuse; + u64 rx_cache_full; + u64 rx_cache_empty; + u64 rx_cache_busy; /* Special handling counters */ u64 link_down_events_phy; @@ -97,6 +103,9 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_drop) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_xdp_tx_full) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, @@ -105,10 +114,13 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_xmit_more) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_wqe_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_reuse) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_full) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_empty) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cache_busy) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; @@ -272,12 +284,18 @@ struct mlx5e_rq_stats { u64 csum_none; u64 lro_packets; u64 lro_bytes; + u64 xdp_drop; + u64 xdp_tx; + u64 xdp_tx_full; u64 wqe_err; u64 mpwqe_filler; - u64 mpwqe_frag; u64 buff_alloc_err; u64 cqe_compress_blks; u64 cqe_compress_pkts; + u64 cache_reuse; + u64 cache_full; + u64 cache_empty; + u64 cache_busy; }; static const struct counter_desc rq_stats_desc[] = { @@ -286,14 +304,20 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_drop) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, xdp_tx_full) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_reuse) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_full) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_empty) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cache_busy) }, }; struct mlx5e_sq_stats { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index 22cfc4ac1837..ce8c54d18906 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -39,6 +39,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -47,6 +48,7 @@ struct mlx5e_tc_flow { struct rhash_head node; u64 cookie; struct mlx5_flow_rule *rule; + struct mlx5_esw_flow_attr *attr; }; #define MLX5E_TC_TABLE_NUM_ENTRIES 1024 @@ -114,27 +116,30 @@ err_create_ft: static struct mlx5_flow_rule *mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec, - u32 action, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; - struct mlx5_eswitch_rep *rep = priv->ppriv; - u32 src_vport; + int err; - if (rep->vport) /* set source vport for the flow */ - src_vport = rep->vport; - else - src_vport = FDB_UPLINK_VPORT; + err = mlx5_eswitch_add_vlan_action(esw, attr); + if (err) + return ERR_PTR(err); - return mlx5_eswitch_add_offloaded_rule(esw, spec, action, src_vport, dst_vport); + return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); } static void mlx5e_tc_del_flow(struct mlx5e_priv *priv, - struct mlx5_flow_rule *rule) + struct mlx5_flow_rule *rule, + struct mlx5_esw_flow_attr *attr) { + struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; struct mlx5_fc *counter = NULL; counter = mlx5_flow_rule_counter(rule); + if (esw && esw->mode == SRIOV_OFFLOADS) + mlx5_eswitch_del_vlan_action(esw, attr); + mlx5_del_flow_rule(rule); mlx5_fc_destroy(priv->mdev, counter); @@ -159,6 +164,7 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | BIT(FLOW_DISSECTOR_KEY_BASIC) | BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | BIT(FLOW_DISSECTOR_KEY_PORTS))) { @@ -222,6 +228,24 @@ static int parse_cls_flower(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec key->src); } + if (dissector_uses_key(f->dissector, FLOW_DISSECTOR_KEY_VLAN)) { + struct flow_dissector_key_vlan *key = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->key); + struct flow_dissector_key_vlan *mask = + skb_flow_dissector_target(f->dissector, + FLOW_DISSECTOR_KEY_VLAN, + f->mask); + if (mask->vlan_id) { + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); + + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, mask->vlan_id); + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, key->vlan_id); + } + } + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { struct flow_dissector_key_ipv4_addrs *key = skb_flow_dissector_target(f->dissector, @@ -361,7 +385,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, } static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, - u32 *action, u32 *dest_vport) + struct mlx5_esw_flow_attr *attr) { const struct tc_action *a; LIST_HEAD(actions); @@ -369,17 +393,14 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tc_no_actions(exts)) return -EINVAL; - *action = 0; + memset(attr, 0, sizeof(*attr)); + attr->in_rep = priv->ppriv; tcf_exts_to_list(exts, &actions); list_for_each_entry(a, &actions, list) { - /* Only support a single action per rule */ - if (*action) - return -EINVAL; - if (is_tcf_gact_shot(a)) { - *action = MLX5_FLOW_CONTEXT_ACTION_DROP | - MLX5_FLOW_CONTEXT_ACTION_COUNT; + attr->action |= MLX5_FLOW_CONTEXT_ACTION_DROP | + MLX5_FLOW_CONTEXT_ACTION_COUNT; continue; } @@ -387,7 +408,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, int ifindex = tcf_mirred_ifindex(a); struct net_device *out_dev; struct mlx5e_priv *out_priv; - struct mlx5_eswitch_rep *out_rep; out_dev = __dev_get_by_index(dev_net(priv->netdev), ifindex); @@ -397,13 +417,22 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, return -EINVAL; } + attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; out_priv = netdev_priv(out_dev); - out_rep = out_priv->ppriv; - if (out_rep->vport == 0) - *dest_vport = FDB_UPLINK_VPORT; - else - *dest_vport = out_rep->vport; - *action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; + attr->out_rep = out_priv->ppriv; + continue; + } + + if (is_tcf_vlan(a)) { + if (tcf_vlan_action(a) == VLAN_F_POP) { + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; + } else if (tcf_vlan_action(a) == VLAN_F_PUSH) { + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + return -EOPNOTSUPP; + + attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; + attr->vlan = tcf_vlan_push_vid(a); + } continue; } @@ -417,18 +446,29 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, { struct mlx5e_tc_table *tc = &priv->fs.tc; int err = 0; - u32 flow_tag, action, dest_vport = 0; + bool fdb_flow = false; + u32 flow_tag, action; struct mlx5e_tc_flow *flow; struct mlx5_flow_spec *spec; struct mlx5_flow_rule *old = NULL; + struct mlx5_esw_flow_attr *old_attr = NULL; struct mlx5_eswitch *esw = priv->mdev->priv.eswitch; + if (esw && esw->mode == SRIOV_OFFLOADS) + fdb_flow = true; + flow = rhashtable_lookup_fast(&tc->ht, &f->cookie, tc->ht_params); - if (flow) + if (flow) { old = flow->rule; - else - flow = kzalloc(sizeof(*flow), GFP_KERNEL); + old_attr = flow->attr; + } else { + if (fdb_flow) + flow = kzalloc(sizeof(*flow) + sizeof(struct mlx5_esw_flow_attr), + GFP_KERNEL); + else + flow = kzalloc(sizeof(*flow), GFP_KERNEL); + } spec = mlx5_vzalloc(sizeof(*spec)); if (!spec || !flow) { @@ -442,11 +482,12 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, if (err < 0) goto err_free; - if (esw && esw->mode == SRIOV_OFFLOADS) { - err = parse_tc_fdb_actions(priv, f->exts, &action, &dest_vport); + if (fdb_flow) { + flow->attr = (struct mlx5_esw_flow_attr *)(flow + 1); + err = parse_tc_fdb_actions(priv, f->exts, flow->attr); if (err < 0) goto err_free; - flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, action, dest_vport); + flow->rule = mlx5e_tc_add_fdb_flow(priv, spec, flow->attr); } else { err = parse_tc_nic_actions(priv, f->exts, &action, &flow_tag); if (err < 0) @@ -465,7 +506,7 @@ int mlx5e_configure_flower(struct mlx5e_priv *priv, __be16 protocol, goto err_del_rule; if (old) - mlx5e_tc_del_flow(priv, old); + mlx5e_tc_del_flow(priv, old, old_attr); goto out; @@ -493,7 +534,7 @@ int mlx5e_delete_flower(struct mlx5e_priv *priv, rhashtable_remove_fast(&tc->ht, &flow->node, tc->ht_params); - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); @@ -550,7 +591,7 @@ static void _mlx5e_tc_del_flow(void *ptr, void *arg) struct mlx5e_tc_flow *flow = ptr; struct mlx5e_priv *priv = arg; - mlx5e_tc_del_flow(priv, flow->rule); + mlx5e_tc_del_flow(priv, flow->rule, flow->attr); kfree(flow); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index eb0e72537f10..70a717382357 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -52,7 +52,6 @@ void mlx5e_send_nop(struct mlx5e_sq *sq, bool notify_hw) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_NOP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | 0x01); - sq->skb[pi] = NULL; sq->pc++; sq->stats.nop++; @@ -82,15 +81,17 @@ static inline void mlx5e_dma_push(struct mlx5e_sq *sq, u32 size, enum mlx5e_dma_map_type map_type) { - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].addr = addr; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].size = size; - sq->dma_fifo[sq->dma_fifo_pc & sq->dma_fifo_mask].type = map_type; + u32 i = sq->dma_fifo_pc & sq->dma_fifo_mask; + + sq->db.txq.dma_fifo[i].addr = addr; + sq->db.txq.dma_fifo[i].size = size; + sq->db.txq.dma_fifo[i].type = map_type; sq->dma_fifo_pc++; } static inline struct mlx5e_sq_dma *mlx5e_dma_get(struct mlx5e_sq *sq, u32 i) { - return &sq->dma_fifo[i & sq->dma_fifo_mask]; + return &sq->db.txq.dma_fifo[i & sq->dma_fifo_mask]; } static void mlx5e_dma_unmap_wqe_err(struct mlx5e_sq *sq, u8 num_dma) @@ -221,7 +222,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) u16 pi = sq->pc & wq->sz_m1; struct mlx5e_tx_wqe *wqe = mlx5_wq_cyc_get_wqe(wq, pi); - struct mlx5e_tx_wqe_info *wi = &sq->wqe_info[pi]; + struct mlx5e_tx_wqe_info *wi = &sq->db.txq.wqe_info[pi]; struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl; struct mlx5_wqe_eth_seg *eseg = &wqe->eth; @@ -341,7 +342,7 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | opcode); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); - sq->skb[pi] = skb; + sq->db.txq.skb[pi] = skb; wi->num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS); sq->pc += wi->num_wqebbs; @@ -368,8 +369,10 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) } /* fill sq edge with nops to avoid wqe wrap around */ - while ((sq->pc & wq->sz_m1) > sq->edge) + while ((pi = (sq->pc & wq->sz_m1)) > sq->edge) { + sq->db.txq.skb[pi] = NULL; mlx5e_send_nop(sq, false); + } if (bf) sq->bf_budget--; @@ -442,8 +445,8 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) last_wqe = (sqcc == wqe_counter); ci = sqcc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (unlikely(!skb)) { /* nop */ sqcc++; @@ -492,7 +495,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget) return (i == MLX5E_TX_CQ_POLL_BUDGET); } -void mlx5e_free_tx_descs(struct mlx5e_sq *sq) +static void mlx5e_free_txq_sq_descs(struct mlx5e_sq *sq) { struct mlx5e_tx_wqe_info *wi; struct sk_buff *skb; @@ -501,8 +504,8 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) while (sq->cc != sq->pc) { ci = sq->cc & sq->wq.sz_m1; - skb = sq->skb[ci]; - wi = &sq->wqe_info[ci]; + skb = sq->db.txq.skb[ci]; + wi = &sq->db.txq.wqe_info[ci]; if (!skb) { /* nop */ sq->cc++; @@ -520,3 +523,37 @@ void mlx5e_free_tx_descs(struct mlx5e_sq *sq) sq->cc += wi->num_wqebbs; } } + +static void mlx5e_free_xdp_sq_descs(struct mlx5e_sq *sq) +{ + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + while (sq->cc != sq->pc) { + ci = sq->cc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (wi->opcode == MLX5_OPCODE_NOP) { + sq->cc++; + continue; + } + + sq->cc += wi->num_wqebbs; + + mlx5e_page_release(&sq->channel->rq, di, false); + } +} + +void mlx5e_free_sq_descs(struct mlx5e_sq *sq) +{ + switch (sq->type) { + case MLX5E_SQ_TXQ: + mlx5e_free_txq_sq_descs(sq); + break; + case MLX5E_SQ_XDP: + mlx5e_free_xdp_sq_descs(sq); + break; + } +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 9bf33bb69210..5703f19a6a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -72,7 +72,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) do { u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_ico_wqe_info *icowi = &sq->ico_wqe_info[ci]; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; mlx5_cqwq_pop(&cq->wq); sqcc += icowi->num_wqebbs; @@ -87,7 +87,7 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) case MLX5_OPCODE_NOP: break; case MLX5_OPCODE_UMR: - mlx5e_post_rx_fragmented_mpwqe(&sq->channel->rq); + mlx5e_post_rx_mpwqe(&sq->channel->rq); break; default: WARN_ONCE(true, @@ -105,6 +105,66 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) sq->cc = sqcc; } +static inline bool mlx5e_poll_xdp_tx_cq(struct mlx5e_cq *cq) +{ + struct mlx5e_sq *sq; + u16 sqcc; + int i; + + sq = container_of(cq, struct mlx5e_sq, cq); + + if (unlikely(test_bit(MLX5E_SQ_STATE_FLUSH, &sq->state))) + return false; + + /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), + * otherwise a cq overrun may occur + */ + sqcc = sq->cc; + + for (i = 0; i < MLX5E_TX_CQ_POLL_BUDGET; i++) { + struct mlx5_cqe64 *cqe; + u16 wqe_counter; + bool last_wqe; + + cqe = mlx5e_get_cqe(cq); + if (!cqe) + break; + + mlx5_cqwq_pop(&cq->wq); + + wqe_counter = be16_to_cpu(cqe->wqe_counter); + + do { + struct mlx5e_sq_wqe_info *wi; + struct mlx5e_dma_info *di; + u16 ci; + + last_wqe = (sqcc == wqe_counter); + + ci = sqcc & sq->wq.sz_m1; + di = &sq->db.xdp.di[ci]; + wi = &sq->db.xdp.wqe_info[ci]; + + if (unlikely(wi->opcode == MLX5_OPCODE_NOP)) { + sqcc++; + continue; + } + + sqcc += wi->num_wqebbs; + /* Recycle RX page */ + mlx5e_page_release(&sq->channel->rq, di, true); + } while (!last_wqe); + } + + mlx5_cqwq_update_db_record(&cq->wq); + + /* ensure cq space is freed before enabling more cqes */ + wmb(); + + sq->cc = sqcc; + return (i == MLX5E_TX_CQ_POLL_BUDGET); +} + int mlx5e_napi_poll(struct napi_struct *napi, int budget) { struct mlx5e_channel *c = container_of(napi, struct mlx5e_channel, @@ -121,6 +181,9 @@ int mlx5e_napi_poll(struct napi_struct *napi, int budget) work_done = mlx5e_poll_rx_cq(&c->rq.cq, budget); busy |= work_done == budget; + if (c->xdp) + busy |= mlx5e_poll_xdp_tx_cq(&c->xdp_sq.cq); + mlx5e_poll_ico_cq(&c->icosq.cq); busy |= mlx5e_post_rx_wqes(&c->rq); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 0e30602ef76d..aaca09002ca6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -86,23 +86,12 @@ struct cre_des_eq { static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn) { - struct mlx5_destroy_eq_mbox_in in; - struct mlx5_destroy_eq_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_EQ); - in.eqn = eqn; - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (!err) - goto ex; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - -ex: - return err; + MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ); + MLX5_SET(destroy_eq_in, in, eq_number, eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static struct mlx5_eqe *get_eqe(struct mlx5_eq *eq, u32 entry) @@ -351,11 +340,13 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar) { + u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; - struct mlx5_create_eq_mbox_in *in; - struct mlx5_create_eq_mbox_out out; - int err; + __be64 *pas; + void *eqc; int inlen; + u32 *in; + int err; eq->nent = roundup_pow_of_two(nent + MLX5_NUM_SPARE_EQE); eq->cons_index = 0; @@ -365,35 +356,36 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, init_eq_buf(eq); - inlen = sizeof(*in) + sizeof(in->pas[0]) * eq->buf.npages; + inlen = MLX5_ST_SZ_BYTES(create_eq_in) + + MLX5_FLD_SZ_BYTES(create_eq_in, pas[0]) * eq->buf.npages; + in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; goto err_buf; } - memset(&out, 0, sizeof(out)); - mlx5_fill_page_array(&eq->buf, in->pas); + pas = (__be64 *)MLX5_ADDR_OF(create_eq_in, in, pas); + mlx5_fill_page_array(&eq->buf, pas); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_EQ); - in->ctx.log_sz_usr_page = cpu_to_be32(ilog2(eq->nent) << 24 | uar->index); - in->ctx.intr = vecidx; - in->ctx.log_page_size = eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT; - in->events_mask = cpu_to_be64(mask); + MLX5_SET(create_eq_in, in, opcode, MLX5_CMD_OP_CREATE_EQ); + MLX5_SET64(create_eq_in, in, event_bitmask, mask); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); + MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); + MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, intr, vecidx); + MLX5_SET(eqc, eqc, log_page_size, + eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) goto err_in; - if (out.hdr.status) { - err = mlx5_cmd_status_to_err(&out.hdr); - goto err_in; - } - snprintf(priv->irq_info[vecidx].name, MLX5_MAX_IRQ_NAME, "%s@pci:%s", name, pci_name(dev->pdev)); - eq->eqn = out.eq_number; + eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; @@ -547,22 +539,12 @@ int mlx5_stop_eqs(struct mlx5_core_dev *dev) } int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - struct mlx5_query_eq_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_eq_mbox_in in; - int err; + u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(out, 0, outlen); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_EQ); - in.eqn = eq->eqn; - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; - - if (out->hdr.status) - err = mlx5_cmd_status_to_err(&out->hdr); - - return err; + MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ); + MLX5_SET(query_eq_in, in, eq_number, eq->eqn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL_GPL(mlx5_core_eq_query); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index b247949df135..abbf2c369923 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -81,19 +81,12 @@ enum { MC_ADDR_CHANGE | \ PROMISC_CHANGE) -int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); -void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); - static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, u32 events_mask) { - int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]; - int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; void *nic_vport_ctx; - int err; - - memset(out, 0, sizeof(out)); - memset(in, 0, sizeof(in)); MLX5_SET(modify_nic_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); @@ -116,113 +109,44 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport, MLX5_SET(nic_vport_context, nic_vport_ctx, event_on_promisc_change, 1); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - goto ex; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto ex; - return 0; -ex: - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } /* E-Switch vport context HW commands */ -static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport, - u32 *out, int outlen) -{ - u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)]; - - memset(in, 0, sizeof(in)); - - MLX5_SET(query_nic_vport_context_in, in, opcode, - MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT); - - MLX5_SET(query_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(query_esw_vport_context_in, in, other_vport, 1); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); -} - -static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 *vlan, u8 *qos) -{ - u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)]; - int err; - bool cvlan_strip; - bool cvlan_insert; - - memset(out, 0, sizeof(out)); - - *vlan = 0; - *qos = 0; - - if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || - !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) - return -ENOTSUPP; - - err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out)); - if (err) - goto out; - - cvlan_strip = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_strip); - - cvlan_insert = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.vport_cvlan_insert); - - if (cvlan_strip || cvlan_insert) { - *vlan = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_id); - *qos = MLX5_GET(query_esw_vport_context_out, out, - esw_vport_context.cvlan_pcp); - } - - esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n", - vport, *vlan, *qos); -out: - return err; -} - static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport, void *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)]; - - memset(out, 0, sizeof(out)); - - MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); - if (vport) - MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0}; MLX5_SET(modify_esw_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT); - - return mlx5_cmd_exec_check_status(dev, in, inlen, - out, sizeof(out)); + MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport); + if (vport) + MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, - u16 vlan, u8 qos, bool set) + u16 vlan, u8 qos, u8 set_flags) { - u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0}; if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) || !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist)) return -ENOTSUPP; - esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n", - vport, vlan, qos, set); + esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%x\n", + vport, vlan, qos, set_flags); - if (set) { + if (set_flags & SET_VLAN_STRIP) MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_strip, 1); + + if (set_flags & SET_VLAN_INSERT) { /* insert only if no vlan in packet */ MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.vport_cvlan_insert, 1); + MLX5_SET(modify_esw_vport_context_in, in, esw_vport_context.cvlan_pcp, qos); MLX5_SET(modify_esw_vport_context_in, in, @@ -241,13 +165,10 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport, static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac, u8 vlan_valid, u16 vlan) { - u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]; - u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)]; + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0}; u8 *in_mac_addr; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); MLX5_SET(set_l2_table_entry_in, in, table_index, index); @@ -257,23 +178,18 @@ static int set_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index, in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); ether_addr_copy(&in_mac_addr[2], mac); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int del_l2_table_entry_cmd(struct mlx5_core_dev *dev, u32 index) { - u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]; - u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0}; MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); MLX5_SET(delete_l2_table_entry_in, in, table_index, index); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int alloc_l2_table_index(struct mlx5_l2_table *l2_table, u32 *ix) @@ -340,7 +256,7 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { - pr_warn("FDB: Failed to alloc match parameters\n"); + esw_warn(esw->dev, "FDB: Failed to alloc match parameters\n"); return NULL; } dmac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, @@ -374,8 +290,8 @@ __esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u32 vport, bool rx_rule, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, 0, &dest); if (IS_ERR(flow_rule)) { - pr_warn( - "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", + esw_warn(esw->dev, + "FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule)); flow_rule = NULL; } @@ -955,7 +871,7 @@ static void esw_update_vport_rx_mode(struct mlx5_eswitch *esw, u32 vport_num) esw_debug(esw->dev, "vport[%d] context update rx mode promisc_all=%d, all_multi=%d\n", vport_num, promisc_all, promisc_mc); - if (!vport->trusted || !vport->enabled) { + if (!vport->info.trusted || !vport->enabled) { promisc_uc = 0; promisc_mc = 0; promisc_all = 0; @@ -1291,30 +1207,20 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, struct mlx5_vport *vport) { struct mlx5_flow_spec *spec; - u8 smac[ETH_ALEN]; int err = 0; u8 *smac_v; - if (vport->spoofchk) { - err = mlx5_query_nic_vport_mac_address(esw->dev, vport->vport, smac); - if (err) { - esw_warn(esw->dev, - "vport[%d] configure ingress rules failed, query smac failed, err(%d)\n", - vport->vport, err); - return err; - } + if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { + mlx5_core_warn(esw->dev, + "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", + vport->vport); + return -EPERM; - if (!is_valid_ether_addr(smac)) { - mlx5_core_warn(esw->dev, - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", - vport->vport); - return -EPERM; - } } esw_vport_cleanup_ingress_rules(esw, vport); - if (!vport->vlan && !vport->qos && !vport->spoofchk) { + if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { esw_vport_disable_ingress_acl(esw, vport); return 0; } @@ -1323,7 +1229,7 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1333,16 +1239,16 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, goto out; } - if (vport->vlan || vport->qos) + if (vport->info.vlan || vport->info.qos) MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); - if (vport->spoofchk) { + if (vport->info.spoofchk) { MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0); smac_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers.smac_47_16); - ether_addr_copy(smac_v, smac); + ether_addr_copy(smac_v, vport->info.mac); } spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; @@ -1352,8 +1258,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->ingress.allow_rule)) { err = PTR_ERR(vport->ingress.allow_rule); - pr_warn("vport[%d] configure ingress allow rule, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure ingress allow rule, err(%d)\n", + vport->vport, err); vport->ingress.allow_rule = NULL; goto out; } @@ -1365,8 +1272,9 @@ static int esw_vport_ingress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->ingress.drop_rule)) { err = PTR_ERR(vport->ingress.drop_rule); - pr_warn("vport[%d] configure ingress drop rule, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure ingress drop rule, err(%d)\n", + vport->vport, err); vport->ingress.drop_rule = NULL; goto out; } @@ -1386,7 +1294,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_vport_cleanup_egress_rules(esw, vport); - if (!vport->vlan && !vport->qos) { + if (!vport->info.vlan && !vport->info.qos) { esw_vport_disable_egress_acl(esw, vport); return 0; } @@ -1395,7 +1303,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, esw_debug(esw->dev, "vport[%d] configure egress rules, vlan(%d) qos(%d)\n", - vport->vport, vport->vlan, vport->qos); + vport->vport, vport->info.vlan, vport->info.qos); spec = mlx5_vzalloc(sizeof(*spec)); if (!spec) { @@ -1409,7 +1317,7 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.vlan_tag); MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid); - MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->vlan); + MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vport->info.vlan); spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; vport->egress.allowed_vlan = @@ -1418,8 +1326,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->egress.allowed_vlan)) { err = PTR_ERR(vport->egress.allowed_vlan); - pr_warn("vport[%d] configure egress allowed vlan rule failed, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure egress allowed vlan rule failed, err(%d)\n", + vport->vport, err); vport->egress.allowed_vlan = NULL; goto out; } @@ -1432,8 +1341,9 @@ static int esw_vport_egress_config(struct mlx5_eswitch *esw, 0, NULL); if (IS_ERR(vport->egress.drop_rule)) { err = PTR_ERR(vport->egress.drop_rule); - pr_warn("vport[%d] configure egress drop rule failed, err(%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "vport[%d] configure egress drop rule failed, err(%d)\n", + vport->vport, err); vport->egress.drop_rule = NULL; } out: @@ -1441,6 +1351,41 @@ out: return err; } +static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) +{ + ((u8 *)node_guid)[7] = mac[0]; + ((u8 *)node_guid)[6] = mac[1]; + ((u8 *)node_guid)[5] = mac[2]; + ((u8 *)node_guid)[4] = 0xff; + ((u8 *)node_guid)[3] = 0xfe; + ((u8 *)node_guid)[2] = mac[3]; + ((u8 *)node_guid)[1] = mac[4]; + ((u8 *)node_guid)[0] = mac[5]; +} + +static void esw_apply_vport_conf(struct mlx5_eswitch *esw, + struct mlx5_vport *vport) +{ + int vport_num = vport->vport; + + if (!vport_num) + return; + + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + vport->info.link_state); + mlx5_modify_nic_vport_mac_address(esw->dev, vport_num, vport->info.mac); + mlx5_modify_nic_vport_node_guid(esw->dev, vport_num, vport->info.node_guid); + modify_esw_vport_cvlan(esw->dev, vport_num, vport->info.vlan, vport->info.qos, + (vport->info.vlan || vport->info.qos)); + + /* Only legacy mode needs ACLs */ + if (esw->mode == SRIOV_LEGACY) { + esw_vport_ingress_config(esw, vport); + esw_vport_egress_config(esw, vport); + } +} static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, int enable_events) { @@ -1451,23 +1396,17 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); - /* Only VFs need ACLs for VST and spoofchk filtering */ - if (vport_num && esw->mode == SRIOV_LEGACY) { - esw_vport_ingress_config(esw, vport); - esw_vport_egress_config(esw, vport); - } - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_AUTO); + /* Restore old vport configuration */ + esw_apply_vport_conf(esw, vport); /* Sync with current vport context */ vport->enabled_events = enable_events; vport->enabled = true; /* only PF is trusted by default */ - vport->trusted = (vport_num) ? false : true; + if (!vport_num) + vport->info.trusted = true; + esw_vport_change_handle_locked(vport); esw->enabled_vports++; @@ -1487,11 +1426,6 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) vport->enabled = false; synchronize_irq(mlx5_get_msix_vec(esw->dev, MLX5_EQ_VEC_ASYNC)); - - mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport_num, - MLX5_ESW_VPORT_ADMIN_STATE_DOWN); /* Wait for current already scheduled events to complete */ flush_workqueue(esw->work_queue); /* Disable events from this vport */ @@ -1503,7 +1437,12 @@ static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num) */ esw_vport_change_handle_locked(vport); vport->enabled_events = 0; + if (vport_num && esw->mode == SRIOV_LEGACY) { + mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport_num, + MLX5_ESW_VPORT_ADMIN_STATE_DOWN); esw_vport_disable_egress_acl(esw, vport); esw_vport_disable_ingress_acl(esw, vport); } @@ -1590,6 +1529,25 @@ void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw) esw_enable_vport(esw, 0, UC_ADDR_CHANGE); } +void mlx5_eswitch_attach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_enable_vport(esw, 0, UC_ADDR_CHANGE); + /* VF Vports will be enabled when SRIOV is enabled */ +} + +void mlx5_eswitch_detach(struct mlx5_eswitch *esw) +{ + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) || + MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) + return; + + esw_disable_vport(esw, 0); +} + int mlx5_eswitch_init(struct mlx5_core_dev *dev) { int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table); @@ -1657,6 +1615,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) struct mlx5_vport *vport = &esw->vports[vport_num]; vport->vport = vport_num; + vport->info.link_state = MLX5_ESW_VPORT_ADMIN_STATE_AUTO; vport->dev = dev; INIT_WORK(&vport->vport_change_handler, esw_vport_change_handler); @@ -1667,8 +1626,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) esw->mode = SRIOV_NONE; dev->priv.eswitch = esw; - esw_enable_vport(esw, 0, UC_ADDR_CHANGE); - /* VF Vports will be enabled when SRIOV is enabled */ return 0; abort: if (esw->work_queue) @@ -1687,7 +1644,6 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) return; esw_info(esw->dev, "cleanup\n"); - esw_disable_vport(esw, 0); esw->dev->priv.eswitch = NULL; destroy_workqueue(esw->work_queue); @@ -1720,18 +1676,6 @@ void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe) (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev)) #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports) -static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN]) -{ - ((u8 *)node_guid)[7] = mac[0]; - ((u8 *)node_guid)[6] = mac[1]; - ((u8 *)node_guid)[5] = mac[2]; - ((u8 *)node_guid)[4] = 0xff; - ((u8 *)node_guid)[3] = 0xfe; - ((u8 *)node_guid)[2] = mac[3]; - ((u8 *)node_guid)[1] = mac[4]; - ((u8 *)node_guid)[0] = mac[5]; -} - int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int vport, u8 mac[ETH_ALEN]) { @@ -1744,13 +1688,15 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - if (evport->spoofchk && !is_valid_ether_addr(mac)) { + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { mlx5_core_warn(esw->dev, "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", vport); - return -EPERM; + err = -EPERM; + goto unlock; } err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); @@ -1758,7 +1704,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, mlx5_core_warn(esw->dev, "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n", vport, err); - return err; + goto unlock; } node_guid_gen_from_mac(&node_guid, mac); @@ -1768,9 +1714,12 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, "Failed to set vport %d node guid, err = %d. RDMA_CM will not function properly for this VF.\n", vport, err); - mutex_lock(&esw->state_lock); + ether_addr_copy(evport->info.mac, mac); + evport->info.node_guid = node_guid; if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); + +unlock: mutex_unlock(&esw->state_lock); return err; } @@ -1778,22 +1727,38 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw, int vport, int link_state) { + struct mlx5_vport *evport; + int err = 0; + if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - return mlx5_modify_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport, link_state); + mutex_lock(&esw->state_lock); + evport = &esw->vports[vport]; + + err = mlx5_modify_vport_admin_state(esw->dev, + MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, + vport, link_state); + if (err) { + mlx5_core_warn(esw->dev, + "Failed to set vport %d link state, err = %d", + vport, err); + goto unlock; + } + + evport->info.link_state = link_state; + +unlock: + mutex_unlock(&esw->state_lock); + return 0; } int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, int vport, struct ifla_vf_info *ivi) { struct mlx5_vport *evport; - u16 vlan; - u8 qos; if (!ESW_ALLOWED(esw)) return -EPERM; @@ -1805,54 +1770,61 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, memset(ivi, 0, sizeof(*ivi)); ivi->vf = vport - 1; - mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac); - ivi->linkstate = mlx5_query_vport_admin_state(esw->dev, - MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT, - vport); - query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos); - ivi->vlan = vlan; - ivi->qos = qos; - ivi->spoofchk = evport->spoofchk; + mutex_lock(&esw->state_lock); + ether_addr_copy(ivi->mac, evport->info.mac); + ivi->linkstate = evport->info.link_state; + ivi->vlan = evport->info.vlan; + ivi->qos = evport->info.qos; + ivi->spoofchk = evport->info.spoofchk; + ivi->trusted = evport->info.trusted; + mutex_unlock(&esw->state_lock); return 0; } -int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, - int vport, u16 vlan, u8 qos) +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags) { struct mlx5_vport *evport; int err = 0; - int set = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7)) return -EINVAL; - if (vlan || qos) - set = 1; - + mutex_lock(&esw->state_lock); evport = &esw->vports[vport]; - err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set); + err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set_flags); if (err) - return err; + goto unlock; - mutex_lock(&esw->state_lock); - evport->vlan = vlan; - evport->qos = qos; + evport->info.vlan = vlan; + evport->info.qos = qos; if (evport->enabled && esw->mode == SRIOV_LEGACY) { err = esw_vport_ingress_config(esw, evport); if (err) - goto out; + goto unlock; err = esw_vport_egress_config(esw, evport); } -out: +unlock: mutex_unlock(&esw->state_lock); return err; } +int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos) +{ + u8 set_flags = 0; + + if (vlan || qos) + set_flags = SET_VLAN_STRIP | SET_VLAN_INSERT; + + return __mlx5_eswitch_set_vport_vlan(esw, vport, vlan, qos, set_flags); +} + int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, int vport, bool spoofchk) { @@ -1865,16 +1837,14 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - pschk = evport->spoofchk; - evport->spoofchk = spoofchk; - if (evport->enabled && esw->mode == SRIOV_LEGACY) { + evport = &esw->vports[vport]; + pschk = evport->info.spoofchk; + evport->info.spoofchk = spoofchk; + if (evport->enabled && esw->mode == SRIOV_LEGACY) err = esw_vport_ingress_config(esw, evport); - if (err) - evport->spoofchk = pschk; - } + if (err) + evport->info.spoofchk = pschk; mutex_unlock(&esw->state_lock); return err; @@ -1890,10 +1860,9 @@ int mlx5_eswitch_set_vport_trust(struct mlx5_eswitch *esw, if (!LEGAL_VPORT(esw, vport)) return -EINVAL; - evport = &esw->vports[vport]; - mutex_lock(&esw->state_lock); - evport->trusted = setting; + evport = &esw->vports[vport]; + evport->info.trusted = setting; if (evport->enabled) esw_vport_change_handle_locked(evport); mutex_unlock(&esw->state_lock); @@ -1906,7 +1875,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats) { int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out); - u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)]; + u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0}; int err = 0; u32 *out; @@ -1919,8 +1888,6 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, if (!out) return -ENOMEM; - memset(in, 0, sizeof(in)); - MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER); MLX5_SET(query_vport_counter_in, in, op_mod, 0); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h index a96140971d77..2e2938e08cda 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h @@ -109,6 +109,16 @@ struct vport_egress { struct mlx5_flow_rule *drop_rule; }; +struct mlx5_vport_info { + u8 mac[ETH_ALEN]; + u16 vlan; + u8 qos; + u64 node_guid; + int link_state; + bool spoofchk; + bool trusted; +}; + struct mlx5_vport { struct mlx5_core_dev *dev; int vport; @@ -121,10 +131,8 @@ struct mlx5_vport { struct vport_ingress ingress; struct vport_egress egress; - u16 vlan; - u8 qos; - bool spoofchk; - bool trusted; + struct mlx5_vport_info info; + bool enabled; u16 enabled_events; }; @@ -149,6 +157,7 @@ struct mlx5_eswitch_fdb { struct mlx5_flow_group *send_to_vport_grp; struct mlx5_flow_group *miss_grp; struct mlx5_flow_rule *miss_rule; + int vlan_push_pop_refcount; } offloads; }; }; @@ -170,11 +179,14 @@ struct mlx5_eswitch_rep { void (*unload)(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep); u16 vport; - struct mlx5_flow_rule *vport_rx_rule; - void *priv_data; - struct list_head vport_sqs_list; - bool valid; u8 hw_id[ETH_ALEN]; + void *priv_data; + + struct mlx5_flow_rule *vport_rx_rule; + struct list_head vport_sqs_list; + u16 vlan; + u32 vlan_refcount; + bool valid; }; struct mlx5_esw_offload { @@ -201,9 +213,14 @@ struct mlx5_eswitch { int mode; }; +void esw_offloads_cleanup(struct mlx5_eswitch *esw, int nvports); +int esw_offloads_init(struct mlx5_eswitch *esw, int nvports); + /* E-Switch API */ int mlx5_eswitch_init(struct mlx5_core_dev *dev); void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw); +void mlx5_eswitch_attach(struct mlx5_eswitch *esw); +void mlx5_eswitch_detach(struct mlx5_eswitch *esw); void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe); int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs, int mode); void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw); @@ -224,14 +241,32 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw, struct ifla_vf_stats *vf_stats); struct mlx5_flow_spec; +struct mlx5_esw_flow_attr; struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport); + struct mlx5_esw_flow_attr *attr); struct mlx5_flow_rule * mlx5_eswitch_create_vport_rx_rule(struct mlx5_eswitch *esw, int vport, u32 tirn); +enum { + SET_VLAN_STRIP = BIT(0), + SET_VLAN_INSERT = BIT(1) +}; + +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_POP 0x40 +#define MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH 0x80 + +struct mlx5_esw_flow_attr { + struct mlx5_eswitch_rep *in_rep; + struct mlx5_eswitch_rep *out_rep; + + int action; + u16 vlan; + bool vlan_handled; +}; + int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, struct mlx5_eswitch_rep *rep, u16 *sqns_array, int sqns_num); @@ -241,9 +276,17 @@ void mlx5_eswitch_sqs2vport_stop(struct mlx5_eswitch *esw, int mlx5_devlink_eswitch_mode_set(struct devlink *devlink, u16 mode); int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode); void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, + int vport_index, struct mlx5_eswitch_rep *rep); void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport); + int vport_index); + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr); +int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw, + int vport, u16 vlan, u8 qos, u8 set_flags); #define MLX5_DEBUG_ESWITCH_MASK BIT(3) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c index 7de40e6b0c25..c55ad8d00c05 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c @@ -46,19 +46,22 @@ enum { struct mlx5_flow_rule * mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, struct mlx5_flow_spec *spec, - u32 action, u32 src_vport, u32 dst_vport) + struct mlx5_esw_flow_attr *attr) { struct mlx5_flow_destination dest = { 0 }; struct mlx5_fc *counter = NULL; struct mlx5_flow_rule *rule; void *misc; + int action; if (esw->mode != SRIOV_OFFLOADS) return ERR_PTR(-EOPNOTSUPP); + action = attr->action; + if (action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) { dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; - dest.vport_num = dst_vport; + dest.vport_num = attr->out_rep->vport; action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; } else if (action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { counter = mlx5_fc_create(esw->dev, true); @@ -69,7 +72,7 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, } misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, misc_parameters); - MLX5_SET(fte_match_set_misc, misc, source_port, src_vport); + MLX5_SET(fte_match_set_misc, misc, source_port, attr->in_rep->vport); misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, misc_parameters); MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); @@ -86,6 +89,186 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw, return rule; } +static int esw_set_global_vlan_pop(struct mlx5_eswitch *esw, u8 val) +{ + struct mlx5_eswitch_rep *rep; + int vf_vport, err = 0; + + esw_debug(esw->dev, "%s applying global %s policy\n", __func__, val ? "pop" : "none"); + for (vf_vport = 1; vf_vport < esw->enabled_vports; vf_vport++) { + rep = &esw->offloads.vport_reps[vf_vport]; + if (!rep->valid) + continue; + + err = __mlx5_eswitch_set_vport_vlan(esw, rep->vport, 0, 0, val); + if (err) + goto out; + } + +out: + return err; +} + +static struct mlx5_eswitch_rep * +esw_vlan_action_get_vport(struct mlx5_esw_flow_attr *attr, bool push, bool pop) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep, *vport = NULL; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push) + vport = in_rep; + else if (pop) + vport = out_rep; + else + vport = in_rep; + + return vport; +} + +static int esw_add_vlan_action_check(struct mlx5_esw_flow_attr *attr, + bool push, bool pop, bool fwd) +{ + struct mlx5_eswitch_rep *in_rep, *out_rep; + + if ((push || pop) && !fwd) + goto out_notsupp; + + in_rep = attr->in_rep; + out_rep = attr->out_rep; + + if (push && in_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + if (pop && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* vport has vlan push configured, can't offload VF --> wire rules w.o it */ + if (!push && !pop && fwd) + if (in_rep->vlan && out_rep->vport == FDB_UPLINK_VPORT) + goto out_notsupp; + + /* protects against (1) setting rules with different vlans to push and + * (2) setting rules w.o vlans (attr->vlan = 0) && w. vlans to push (!= 0) + */ + if (push && in_rep->vlan_refcount && (in_rep->vlan != attr->vlan)) + goto out_notsupp; + + return 0; + +out_notsupp: + return -ENOTSUPP; +} + +int mlx5_eswitch_add_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + err = esw_add_vlan_action_check(attr, push, pop, fwd); + if (err) + return err; + + attr->vlan_handled = false; + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) { + vport->vlan_refcount++; + attr->vlan_handled = true; + } + + return 0; + } + + if (!push && !pop) + return 0; + + if (!(offloads->vlan_push_pop_refcount)) { + /* it's the 1st vlan rule, apply global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, SET_VLAN_STRIP); + if (err) + goto out; + } + offloads->vlan_push_pop_refcount++; + + if (push) { + if (vport->vlan_refcount) + goto skip_set_push; + + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, attr->vlan, 0, + SET_VLAN_INSERT | SET_VLAN_STRIP); + if (err) + goto out; + vport->vlan = attr->vlan; +skip_set_push: + vport->vlan_refcount++; + } +out: + if (!err) + attr->vlan_handled = true; + return err; +} + +int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw, + struct mlx5_esw_flow_attr *attr) +{ + struct offloads_fdb *offloads = &esw->fdb_table.offloads; + struct mlx5_eswitch_rep *vport = NULL; + bool push, pop, fwd; + int err = 0; + + if (!attr->vlan_handled) + return 0; + + push = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH); + pop = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP); + fwd = !!(attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST); + + vport = esw_vlan_action_get_vport(attr, push, pop); + + if (!push && !pop && fwd) { + /* tracks VF --> wire rules without vlan push action */ + if (attr->out_rep->vport == FDB_UPLINK_VPORT) + vport->vlan_refcount--; + + return 0; + } + + if (push) { + vport->vlan_refcount--; + if (vport->vlan_refcount) + goto skip_unset_push; + + vport->vlan = 0; + err = __mlx5_eswitch_set_vport_vlan(esw, vport->vport, + 0, 0, SET_VLAN_STRIP); + if (err) + goto out; + } + +skip_unset_push: + offloads->vlan_push_pop_refcount--; + if (offloads->vlan_push_pop_refcount) + return 0; + + /* no more vlan rules, stop global vlan pop policy */ + err = esw_set_global_vlan_pop(esw, 0); + +out: + return err; +} + static struct mlx5_flow_rule * mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, int vport, u32 sqn) { @@ -144,16 +327,12 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, { struct mlx5_flow_rule *flow_rule; struct mlx5_esw_sq *esw_sq; - int vport; int err; int i; if (esw->mode != SRIOV_OFFLOADS) return 0; - vport = rep->vport == 0 ? - FDB_UPLINK_VPORT : rep->vport; - for (i = 0; i < sqns_num; i++) { esw_sq = kzalloc(sizeof(*esw_sq), GFP_KERNEL); if (!esw_sq) { @@ -163,7 +342,7 @@ int mlx5_eswitch_sqs2vport_start(struct mlx5_eswitch *esw, /* Add re-inject rule to the PF/representor sqs */ flow_rule = mlx5_eswitch_add_send_to_vport_rule(esw, - vport, + rep->vport, sqns_array[i]); if (IS_ERR(flow_rule)) { err = PTR_ERR(flow_rule); @@ -620,27 +799,36 @@ int mlx5_devlink_eswitch_mode_get(struct devlink *devlink, u16 *mode) } void mlx5_eswitch_register_vport_rep(struct mlx5_eswitch *esw, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_esw_offload *offloads = &esw->offloads; - - memcpy(&offloads->vport_reps[rep->vport], rep, - sizeof(struct mlx5_eswitch_rep)); - - INIT_LIST_HEAD(&offloads->vport_reps[rep->vport].vport_sqs_list); - offloads->vport_reps[rep->vport].valid = true; -} - -void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, - int vport) + int vport_index, + struct mlx5_eswitch_rep *__rep) { struct mlx5_esw_offload *offloads = &esw->offloads; struct mlx5_eswitch_rep *rep; - rep = &offloads->vport_reps[vport]; + rep = &offloads->vport_reps[vport_index]; - if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport].enabled) + memset(rep, 0, sizeof(*rep)); + + rep->load = __rep->load; + rep->unload = __rep->unload; + rep->vport = __rep->vport; + rep->priv_data = __rep->priv_data; + ether_addr_copy(rep->hw_id, __rep->hw_id); + + INIT_LIST_HEAD(&rep->vport_sqs_list); + rep->valid = true; +} + +void mlx5_eswitch_unregister_vport_rep(struct mlx5_eswitch *esw, + int vport_index) +{ + struct mlx5_esw_offload *offloads = &esw->offloads; + struct mlx5_eswitch_rep *rep; + + rep = &offloads->vport_reps[vport_index]; + + if (esw->mode == SRIOV_OFFLOADS && esw->vports[vport_index].enabled) rep->unload(esw, rep); - offloads->vport_reps[vport].valid = false; + rep->valid = false; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 287ade151ec8..113c32326333 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -41,10 +41,8 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) { - u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]; - u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); @@ -55,30 +53,23 @@ int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, u16 vport, + enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id) { - u32 out[MLX5_ST_SZ_DW(create_flow_table_out)]; - u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]; + u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {0}; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(create_flow_table_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_TABLE); - if (next_ft) { - MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); - } MLX5_SET(create_flow_table_in, in, table_type, type); MLX5_SET(create_flow_table_in, in, level, level); MLX5_SET(create_flow_table_in, in, log_size, log_size); @@ -87,10 +78,23 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, MLX5_SET(create_flow_table_in, in, other_vport, 1); } - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + switch (op_mod) { + case FS_FT_OP_MOD_NORMAL: + if (next_ft) { + MLX5_SET(create_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(create_flow_table_in, in, table_miss_id, next_ft->id); + } + break; + case FS_FT_OP_MOD_LAG_DEMUX: + MLX5_SET(create_flow_table_in, in, op_mod, 0x1); + if (next_ft) + MLX5_SET(create_flow_table_in, in, lag_master_next_table_id, + next_ft->id); + break; + } + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *table_id = MLX5_GET(create_flow_table_out, out, table_id); @@ -100,11 +104,8 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft) { - u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0}; MLX5_SET(destroy_flow_table_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_TABLE); @@ -115,39 +116,49 @@ int mlx5_cmd_destroy_flow_table(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_table_in, in, other_vport, 1); } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_modify_flow_table(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, struct mlx5_flow_table *next_ft) { - u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]; - u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0}; MLX5_SET(modify_flow_table_in, in, opcode, MLX5_CMD_OP_MODIFY_FLOW_TABLE); MLX5_SET(modify_flow_table_in, in, table_type, ft->type); MLX5_SET(modify_flow_table_in, in, table_id, ft->id); - if (ft->vport) { - MLX5_SET(modify_flow_table_in, in, vport_number, ft->vport); - MLX5_SET(modify_flow_table_in, in, other_vport, 1); - } - MLX5_SET(modify_flow_table_in, in, modify_field_select, - MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); - if (next_ft) { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); - MLX5_SET(modify_flow_table_in, in, table_miss_id, next_ft->id); + + if (ft->op_mod == FS_FT_OP_MOD_LAG_DEMUX) { + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, + lag_master_next_table_id, next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, + lag_master_next_table_id, 0); + } } else { - MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + if (ft->vport) { + MLX5_SET(modify_flow_table_in, in, vport_number, + ft->vport); + MLX5_SET(modify_flow_table_in, in, other_vport, 1); + } + MLX5_SET(modify_flow_table_in, in, modify_field_select, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID); + if (next_ft) { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 1); + MLX5_SET(modify_flow_table_in, in, table_miss_id, + next_ft->id); + } else { + MLX5_SET(modify_flow_table_in, in, table_miss_mode, 0); + } } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, @@ -155,12 +166,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, u32 *in, unsigned int *group_id) { + u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0}; int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); - u32 out[MLX5_ST_SZ_DW(create_flow_group_out)]; int err; - memset(out, 0, sizeof(out)); - MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_CREATE_FLOW_GROUP); MLX5_SET(create_flow_group_in, in, table_type, ft->type); @@ -170,13 +179,10 @@ int mlx5_cmd_create_flow_group(struct mlx5_core_dev *dev, MLX5_SET(create_flow_group_in, in, other_vport, 1); } - err = mlx5_cmd_exec_check_status(dev, in, - inlen, out, - sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *group_id = MLX5_GET(create_flow_group_out, out, group_id); - return err; } @@ -184,11 +190,8 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int group_id) { - u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)]; - u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {0}; MLX5_SET(destroy_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP); @@ -200,8 +203,7 @@ int mlx5_cmd_destroy_flow_group(struct mlx5_core_dev *dev, MLX5_SET(destroy_flow_group_in, in, other_vport, 1); } - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, @@ -212,7 +214,7 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, { unsigned int inlen = MLX5_ST_SZ_BYTES(set_fte_in) + fte->dests_size * MLX5_ST_SZ_BYTES(dest_format_struct); - u32 out[MLX5_ST_SZ_DW(set_fte_out)]; + u32 out[MLX5_ST_SZ_DW(set_fte_out)] = {0}; struct mlx5_flow_rule *dst; void *in_flow_context; void *in_match_value; @@ -290,11 +292,8 @@ static int mlx5_cmd_set_fte(struct mlx5_core_dev *dev, list_size); } - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, - sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); kvfree(in); - return err; } @@ -303,7 +302,7 @@ int mlx5_cmd_create_fte(struct mlx5_core_dev *dev, unsigned group_id, struct fs_fte *fte) { - return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); + return mlx5_cmd_set_fte(dev, 0, 0, ft, group_id, fte); } int mlx5_cmd_update_fte(struct mlx5_core_dev *dev, @@ -327,12 +326,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, struct mlx5_flow_table *ft, unsigned int index) { - u32 out[MLX5_ST_SZ_DW(delete_fte_out)]; - u32 in[MLX5_ST_SZ_DW(delete_fte_in)]; - int err; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {0}; MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY); MLX5_SET(delete_fte_in, in, table_type, ft->type); @@ -343,74 +338,55 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, MLX5_SET(delete_fte_in, in, other_vport, 1); } - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); - - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id) { - u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_flow_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_FLOW_COUNTER); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); - if (err) - return err; - - *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); - - return 0; + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id); + return err; } int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id) { - u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0}; MLX5_SET(dealloc_flow_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_FLOW_COUNTER); MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u16 id, u64 *packets, u64 *bytes) { u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) + - MLX5_ST_SZ_BYTES(traffic_counter)]; - u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; + MLX5_ST_SZ_BYTES(traffic_counter)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; void *stats; int err = 0; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); MLX5_SET(query_flow_counter_in, in, flow_counter_id, id); - - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; stats = MLX5_ADDR_OF(query_flow_counter_out, out, flow_statistics); *packets = MLX5_GET64(traffic_counter, stats, packets); *bytes = MLX5_GET64(traffic_counter, stats, octets); - return 0; } @@ -448,18 +424,14 @@ void mlx5_cmd_fc_bulk_free(struct mlx5_cmd_fc_bulk *b) int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b) { - u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0}; MLX5_SET(query_flow_counter_in, in, opcode, MLX5_CMD_OP_QUERY_FLOW_COUNTER); MLX5_SET(query_flow_counter_in, in, op_mod, 0); MLX5_SET(query_flow_counter_in, in, flow_counter_id, b->id); MLX5_SET(query_flow_counter_in, in, num_of_counters, b->num); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - b->out, b->outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), b->out, b->outlen); } void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, @@ -480,3 +452,51 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, *packets = MLX5_GET64(traffic_counter, stats, packets); *bytes = MLX5_GET64(traffic_counter, stats, octets); } + +#define MAX_ENCAP_SIZE (128) + +int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id) +{ + u32 out[MLX5_ST_SZ_DW(alloc_encap_header_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_encap_header_in) + + (MAX_ENCAP_SIZE / sizeof(u32))]; + void *encap_header_in = MLX5_ADDR_OF(alloc_encap_header_in, in, + encap_header); + void *header = MLX5_ADDR_OF(encap_header_in, encap_header_in, + encap_header); + int inlen = header - (void *)in + size; + int err; + + if (size > MAX_ENCAP_SIZE) + return -EINVAL; + + memset(in, 0, inlen); + MLX5_SET(alloc_encap_header_in, in, opcode, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER); + MLX5_SET(encap_header_in, encap_header_in, encap_header_size, size); + MLX5_SET(encap_header_in, encap_header_in, header_type, header_type); + memcpy(header, encap_header, size); + + memset(out, 0, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + + *encap_id = MLX5_GET(alloc_encap_header_out, out, encap_id); + return err; +} + +void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id) +{ + u32 in[MLX5_ST_SZ_DW(dealloc_encap_header_in)]; + u32 out[MLX5_ST_SZ_DW(dealloc_encap_header_out)]; + + memset(in, 0, sizeof(in)); + MLX5_SET(dealloc_encap_header_in, in, opcode, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER); + MLX5_SET(dealloc_encap_header_in, in, encap_id, encap_id); + + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 158844cef82b..c5bc4686c832 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -35,6 +35,7 @@ int mlx5_cmd_create_flow_table(struct mlx5_core_dev *dev, u16 vport, + enum fs_flow_table_op_mod op_mod, enum fs_flow_table_type type, unsigned int level, unsigned int log_size, struct mlx5_flow_table *next_ft, unsigned int *table_id); @@ -88,4 +89,11 @@ void mlx5_cmd_fc_bulk_get(struct mlx5_core_dev *dev, struct mlx5_cmd_fc_bulk *b, u16 id, u64 *packets, u64 *bytes); +int mlx5_cmd_alloc_encap(struct mlx5_core_dev *dev, + int header_type, + size_t size, + void *encap_header, + u32 *encap_id); +void mlx5_cmd_dealloc_encap(struct mlx5_core_dev *dev, u32 encap_id); + #endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 3d6c1f65e586..5da2cc878582 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -96,6 +96,10 @@ #define OFFLOADS_NUM_PRIOS 1 #define OFFLOADS_MIN_LEVEL (ANCHOR_MIN_LEVEL + 1) +#define LAG_PRIO_NUM_LEVELS 1 +#define LAG_NUM_PRIOS 1 +#define LAG_MIN_LEVEL (OFFLOADS_MIN_LEVEL + 1) + struct node_caps { size_t arr_sz; long *caps; @@ -111,12 +115,16 @@ static struct init_tree_node { int num_levels; } root_fs = { .type = FS_TYPE_NAMESPACE, - .ar_size = 6, + .ar_size = 7, .children = (struct init_tree_node[]) { ADD_PRIO(0, BY_PASS_MIN_LEVEL, 0, FS_CHAINING_CAPS, ADD_NS(ADD_MULTIPLE_PRIO(MLX5_BY_PASS_NUM_PRIOS, BY_PASS_PRIO_NUM_LEVELS))), + ADD_PRIO(0, LAG_MIN_LEVEL, 0, + FS_CHAINING_CAPS, + ADD_NS(ADD_MULTIPLE_PRIO(LAG_NUM_PRIOS, + LAG_PRIO_NUM_LEVELS))), ADD_PRIO(0, OFFLOADS_MIN_LEVEL, 0, {}, ADD_NS(ADD_MULTIPLE_PRIO(OFFLOADS_NUM_PRIOS, OFFLOADS_MAX_FT))), ADD_PRIO(0, ETHTOOL_MIN_LEVEL, 0, @@ -345,7 +353,7 @@ static void del_flow_table(struct fs_node *node) err = mlx5_cmd_destroy_flow_table(dev, ft); if (err) - pr_warn("flow steering can't destroy ft\n"); + mlx5_core_warn(dev, "flow steering can't destroy ft\n"); fs_get_obj(prio, ft->node.parent); prio->num_ft--; } @@ -364,7 +372,7 @@ static void del_rule(struct fs_node *node) match_value = mlx5_vzalloc(match_len); if (!match_value) { - pr_warn("failed to allocate inbox\n"); + mlx5_core_warn(dev, "failed to allocate inbox\n"); return; } @@ -387,8 +395,9 @@ static void del_rule(struct fs_node *node) modify_mask, fte); if (err) - pr_warn("%s can't del rule fg id=%d fte_index=%d\n", - __func__, fg->id, fte->index); + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); } kvfree(match_value); } @@ -409,8 +418,9 @@ static void del_fte(struct fs_node *node) err = mlx5_cmd_delete_fte(dev, ft, fte->index); if (err) - pr_warn("flow steering can't delete fte in index %d of flow group id %d\n", - fte->index, fg->id); + mlx5_core_warn(dev, + "flow steering can't delete fte in index %d of flow group id %d\n", + fte->index, fg->id); fte->status = 0; fg->num_ftes--; @@ -427,8 +437,8 @@ static void del_flow_group(struct fs_node *node) dev = get_dev(&ft->node); if (mlx5_cmd_destroy_flow_group(dev, ft, fg->id)) - pr_warn("flow steering can't destroy fg %d of ft %d\n", - fg->id, ft->id); + mlx5_core_warn(dev, "flow steering can't destroy fg %d of ft %d\n", + fg->id, ft->id); } static struct fs_fte *alloc_fte(u8 action, @@ -475,7 +485,8 @@ static struct mlx5_flow_group *alloc_flow_group(u32 *create_fg_in) } static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_fte, - enum fs_flow_table_type table_type) + enum fs_flow_table_type table_type, + enum fs_flow_table_op_mod op_mod) { struct mlx5_flow_table *ft; @@ -485,6 +496,7 @@ static struct mlx5_flow_table *alloc_flow_table(int level, u16 vport, int max_ft ft->level = level; ft->node.type = FS_TYPE_FLOW_TABLE; + ft->op_mod = op_mod; ft->type = table_type; ft->vport = vport; ft->max_fte = max_fte; @@ -722,6 +734,7 @@ static void list_add_flow_table(struct mlx5_flow_table *ft, } static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespace *ns, + enum fs_flow_table_op_mod op_mod, u16 vport, int prio, int max_fte, u32 level) { @@ -754,18 +767,19 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa level += fs_prio->start_level; ft = alloc_flow_table(level, vport, - roundup_pow_of_two(max_fte), - root->table_type); + max_fte ? roundup_pow_of_two(max_fte) : 0, + root->table_type, + op_mod); if (!ft) { err = -ENOMEM; goto unlock_root; } tree_init_node(&ft->node, 1, del_flow_table); - log_table_sz = ilog2(ft->max_fte); + log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); - err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->type, ft->level, - log_table_sz, next_ft, &ft->id); + err = mlx5_cmd_create_flow_table(root->dev, ft->vport, ft->op_mod, ft->type, + ft->level, log_table_sz, next_ft, &ft->id); if (err) goto free_ft; @@ -792,16 +806,27 @@ struct mlx5_flow_table *mlx5_create_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level) { - return __mlx5_create_flow_table(ns, 0, prio, max_fte, level); + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, 0, prio, + max_fte, level); } struct mlx5_flow_table *mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int max_fte, u32 level, u16 vport) { - return __mlx5_create_flow_table(ns, vport, prio, max_fte, level); + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_NORMAL, vport, prio, + max_fte, level); } +struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( + struct mlx5_flow_namespace *ns, + int prio, u32 level) +{ + return __mlx5_create_flow_table(ns, FS_FT_OP_MOD_LAG_DEMUX, 0, prio, 0, + level); +} +EXPORT_SYMBOL(mlx5_create_lag_demux_flow_table); + struct mlx5_flow_table *mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, @@ -1379,6 +1404,7 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, switch (type) { case MLX5_FLOW_NAMESPACE_BYPASS: + case MLX5_FLOW_NAMESPACE_LAG: case MLX5_FLOW_NAMESPACE_OFFLOADS: case MLX5_FLOW_NAMESPACE_ETHTOOL: case MLX5_FLOW_NAMESPACE_KERNEL: @@ -1401,6 +1427,16 @@ struct mlx5_flow_namespace *mlx5_get_flow_namespace(struct mlx5_core_dev *dev, return &steering->esw_ingress_root_ns->ns; else return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_RX: + if (steering->sniffer_rx_root_ns) + return &steering->sniffer_rx_root_ns->ns; + else + return NULL; + case MLX5_FLOW_NAMESPACE_SNIFFER_TX: + if (steering->sniffer_tx_root_ns) + return &steering->sniffer_tx_root_ns->ns; + else + return NULL; default: return NULL; } @@ -1700,10 +1736,46 @@ void mlx5_cleanup_fs(struct mlx5_core_dev *dev) cleanup_root_ns(steering->esw_egress_root_ns); cleanup_root_ns(steering->esw_ingress_root_ns); cleanup_root_ns(steering->fdb_root_ns); + cleanup_root_ns(steering->sniffer_rx_root_ns); + cleanup_root_ns(steering->sniffer_tx_root_ns); mlx5_cleanup_fc_stats(dev); kfree(steering); } +static int init_sniffer_tx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_tx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_TX); + if (!steering->sniffer_tx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_tx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_tx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + +static int init_sniffer_rx_root_ns(struct mlx5_flow_steering *steering) +{ + struct fs_prio *prio; + + steering->sniffer_rx_root_ns = create_root_ns(steering, FS_FT_SNIFFER_RX); + if (!steering->sniffer_rx_root_ns) + return -ENOMEM; + + /* Create single prio */ + prio = fs_create_prio(&steering->sniffer_rx_root_ns->ns, 0, 1); + if (IS_ERR(prio)) { + cleanup_root_ns(steering->sniffer_rx_root_ns); + return PTR_ERR(prio); + } + return 0; +} + static int init_fdb_root_ns(struct mlx5_flow_steering *steering) { struct fs_prio *prio; @@ -1800,6 +1872,18 @@ int mlx5_init_fs(struct mlx5_core_dev *dev) } } + if (MLX5_CAP_FLOWTABLE_SNIFFER_RX(dev, ft_support)) { + err = init_sniffer_rx_root_ns(steering); + if (err) + goto err; + } + + if (MLX5_CAP_FLOWTABLE_SNIFFER_TX(dev, ft_support)) { + err = init_sniffer_tx_root_ns(steering); + if (err) + goto err; + } + return 0; err: mlx5_cleanup_fs(dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 9cffb6aeb4e9..71ff03bceabb 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -49,6 +49,13 @@ enum fs_flow_table_type { FS_FT_ESW_EGRESS_ACL = 0x2, FS_FT_ESW_INGRESS_ACL = 0x3, FS_FT_FDB = 0X4, + FS_FT_SNIFFER_RX = 0X5, + FS_FT_SNIFFER_TX = 0X6, +}; + +enum fs_flow_table_op_mod { + FS_FT_OP_MOD_NORMAL, + FS_FT_OP_MOD_LAG_DEMUX, }; enum fs_fte_status { @@ -61,6 +68,8 @@ struct mlx5_flow_steering { struct mlx5_flow_root_namespace *fdb_root_ns; struct mlx5_flow_root_namespace *esw_egress_root_ns; struct mlx5_flow_root_namespace *esw_ingress_root_ns; + struct mlx5_flow_root_namespace *sniffer_tx_root_ns; + struct mlx5_flow_root_namespace *sniffer_rx_root_ns; }; struct fs_node { @@ -93,6 +102,7 @@ struct mlx5_flow_table { unsigned int max_fte; unsigned int level; enum fs_flow_table_type type; + enum fs_flow_table_op_mod op_mod; struct { bool active; unsigned int required_groups; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c index 77fc1aa26114..5718aada6605 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c @@ -38,13 +38,10 @@ static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_adapter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0}; MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } int mlx5_query_board_id(struct mlx5_core_dev *dev) @@ -162,38 +159,18 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev) int mlx5_cmd_init_hca(struct mlx5_core_dev *dev) { - struct mlx5_cmd_init_hca_mbox_in in; - struct mlx5_cmd_init_hca_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_INIT_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev) { - struct mlx5_cmd_teardown_hca_mbox_in in; - struct mlx5_cmd_teardown_hca_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_TEARDOWN_HCA); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c new file mode 100644 index 000000000000..55957246c0e8 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -0,0 +1,588 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include "mlx5_core.h" + +enum { + MLX5_LAG_FLAG_BONDED = 1 << 0, +}; + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + bool is_bonded; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + u8 v2p_map[MLX5_MAX_PORTS]; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct delayed_work bond_work; + struct notifier_block nb; +}; + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_MUTEX(lag_mutex); + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_lag_out)] = {0}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0}; + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} + +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + +static struct mlx5_lag *mlx5_lag_dev_get(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -1; +} + +static bool mlx5_lag_is_bonded(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_BONDED); +} + +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 *port1, u8 *port2) +{ + if (tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + if (tracker->netdev_state[0].tx_enabled) { + *port1 = 1; + *port2 = 1; + } else { + *port1 = 2; + *port2 = 2; + } + } else { + *port1 = 1; + *port2 = 2; + if (!tracker->netdev_state[0].link_up) + *port1 = 2; + else if (!tracker->netdev_state[1].link_up) + *port2 = 1; + } +} + +static void mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags |= MLX5_LAG_FLAG_BONDED; + + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[0], + &ldev->v2p_map[1]); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[0], ldev->v2p_map[1]); + if (err) + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); +} + +static void mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + int err; + + ldev->flags &= ~MLX5_LAG_FLAG_BONDED; + + err = mlx5_cmd_destroy_lag(dev0); + if (err) + mlx5_core_err(dev0, + "Failed to destroy LAG (%d)\n", + err); +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[0].dev; + struct mlx5_core_dev *dev1 = ldev->pf[1].dev; + struct lag_tracker tracker; + u8 v2p_port1, v2p_port2; + int i, err; + + if (!dev0 || !dev1) + return; + + mutex_lock(&lag_mutex); + tracker = ldev->tracker; + mutex_unlock(&lag_mutex); + + if (tracker.is_bonded && !mlx5_lag_is_bonded(ldev)) { + if (mlx5_sriov_is_enabled(dev0) || + mlx5_sriov_is_enabled(dev1)) { + mlx5_core_warn(dev0, "LAG is not supported with SRIOV"); + return; + } + + for (i = 0; i < MLX5_MAX_PORTS; i++) + mlx5_remove_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + + mlx5_activate_lag(ldev, &tracker); + + mlx5_add_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_enable_roce(dev1); + } else if (tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + mlx5_infer_tx_affinity_mapping(&tracker, &v2p_port1, + &v2p_port2); + + if ((v2p_port1 != ldev->v2p_map[0]) || + (v2p_port2 != ldev->v2p_map[1])) { + ldev->v2p_map[0] = v2p_port1; + ldev->v2p_map[1] = v2p_port2; + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } + } else if (!tracker.is_bonded && mlx5_lag_is_bonded(ldev)) { + mlx5_remove_dev_by_protocol(dev0, MLX5_INTERFACE_PROTOCOL_IB); + mlx5_nic_vport_disable_roce(dev1); + + mlx5_deactivate_lag(ldev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + mlx5_add_dev_by_protocol(ldev->pf[i].dev, + MLX5_INTERFACE_PROTOCOL_IB); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + schedule_delayed_work(&ldev->bond_work, delay); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + int status; + + status = mlx5_dev_list_trylock(); + if (!status) { + /* 1 sec delay. */ + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_do_bond(ldev); + mlx5_dev_list_unlock(); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info; + bool is_bonded; + int bond_status = 0; + int num_slaves = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx > -1) + bond_status |= (1 << idx); + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & 0x3)) + return 0; + + if (lag_upper_info) + tracker->tx_type = lag_upper_info->tx_type; + + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + * Lag mode must be activebackup or hash. + */ + is_bonded = (num_slaves == MLX5_MAX_PORTS) && + (bond_status == 0x3) && + ((tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) || + (tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH)); + + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + return 1; + } + + return 0; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx == -1) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if (!net_eq(dev_net(ndev), &init_net)) + return NOTIFY_DONE; + + if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, + ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + } + + mutex_lock(&lag_mutex); + ldev->tracker = tracker; + mutex_unlock(&lag_mutex); + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(void) +{ + struct mlx5_lag *ldev; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + + return ldev; +} + +static void mlx5_lag_dev_free(struct mlx5_lag *ldev) +{ + kfree(ldev); +} + +static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = PCI_FUNC(dev->pdev->devfn); + + if (fn >= MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + ldev->pf[fn].dev = dev; + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + + dev->priv.lag = ldev; + mutex_unlock(&lag_mutex); +} + +static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == MLX5_MAX_PORTS) + return; + + mutex_lock(&lag_mutex); + memset(&ldev->pf[i], 0, sizeof(*ldev->pf)); + + dev->priv.lag = NULL; + mutex_unlock(&lag_mutex); +} + + +/* Must be called with intf_mutex held */ +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + (MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS)) + return; + + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return; + } + } + + mlx5_lag_dev_add_pf(ldev, dev, netdev); + + if (!ldev->nb.notifier_call) { + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier(&ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + } +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_remove(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev) + return; + + if (mlx5_lag_is_bonded(ldev)) + mlx5_deactivate_lag(ldev); + + mlx5_lag_dev_remove_pf(ldev, dev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev) + break; + + if (i == MLX5_MAX_PORTS) { + if (ldev->nb.notifier_call) + unregister_netdevice_notifier(&ldev->nb); + cancel_delayed_work_sync(&ldev->bond_work); + mlx5_lag_dev_free(ldev); + } +} + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + res = ldev && mlx5_lag_is_bonded(ldev); + mutex_unlock(&lag_mutex); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + + mutex_lock(&lag_mutex); + ldev = mlx5_lag_dev_get(dev); + + if (!(ldev && mlx5_lag_is_bonded(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + ndev = ldev->tracker.netdev_state[0].tx_enabled ? + ldev->pf[0].netdev : ldev->pf[1].netdev; + } else { + ndev = ldev->pf[0].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + mutex_unlock(&lag_mutex); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv) +{ + struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, + priv); + struct mlx5_lag *ldev; + + if (intf->protocol != MLX5_INTERFACE_PROTOCOL_IB) + return true; + + ldev = mlx5_lag_dev_get(dev); + if (!ldev || !mlx5_lag_is_bonded(ldev) || ldev->pf[0].dev == dev) + return true; + + /* If bonded, we do not add an IB device for PF1. */ + return false; +} + diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mad.c b/drivers/net/ethernet/mellanox/mlx5/core/mad.c index 1368dac00da0..3a3b0005fd2b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mad.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mad.c @@ -39,36 +39,33 @@ int mlx5_core_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb, u16 opmod, u8 port) { - struct mlx5_mad_ifc_mbox_in *in = NULL; - struct mlx5_mad_ifc_mbox_out *out = NULL; - int err; + int outlen = MLX5_ST_SZ_BYTES(mad_ifc_out); + int inlen = MLX5_ST_SZ_BYTES(mad_ifc_in); + int err = -ENOMEM; + void *data; + void *resp; + u32 *out; + u32 *in; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return -ENOMEM; - - out = kzalloc(sizeof(*out), GFP_KERNEL); - if (!out) { - err = -ENOMEM; + in = kzalloc(inlen, GFP_KERNEL); + out = kzalloc(outlen, GFP_KERNEL); + if (!in || !out) goto out; - } - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MAD_IFC); - in->hdr.opmod = cpu_to_be16(opmod); - in->port = port; + MLX5_SET(mad_ifc_in, in, opcode, MLX5_CMD_OP_MAD_IFC); + MLX5_SET(mad_ifc_in, in, op_mod, opmod); + MLX5_SET(mad_ifc_in, in, port, port); - memcpy(in->data, inb, sizeof(in->data)); + data = MLX5_ADDR_OF(mad_ifc_in, in, mad); + memcpy(data, inb, MLX5_FLD_SZ_BYTES(mad_ifc_in, mad)); - err = mlx5_cmd_exec(dev, in, sizeof(*in), out, sizeof(*out)); + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) goto out; - if (out->hdr.status) { - err = mlx5_cmd_status_to_err(&out->hdr); - goto out; - } - - memcpy(outb, out->data, sizeof(out->data)); + resp = MLX5_ADDR_OF(mad_ifc_out, out, response_mad_packet); + memcpy(outb, resp, + MLX5_FLD_SZ_BYTES(mad_ifc_out, response_mad_packet)); out: kfree(out); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2385bae92672..d9c3c70b29e4 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -72,16 +72,6 @@ static int prof_sel = MLX5_DEFAULT_PROF; module_param_named(prof_sel, prof_sel, int, 0444); MODULE_PARM_DESC(prof_sel, "profile selector. Valid range 0 - 2"); -static LIST_HEAD(intf_list); -static LIST_HEAD(dev_list); -static DEFINE_MUTEX(intf_mutex); - -struct mlx5_device_context { - struct list_head list; - struct mlx5_interface *intf; - void *context; -}; - enum { MLX5_ATOMIC_REQ_MODE_BE = 0x0, MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS = 0x1, @@ -324,7 +314,7 @@ enum { MLX5_DEV_CAP_FLAG_DCT, }; -static u16 to_fw_pkey_sz(u32 size) +static u16 to_fw_pkey_sz(struct mlx5_core_dev *dev, u32 size) { switch (size) { case 128: @@ -340,7 +330,7 @@ static u16 to_fw_pkey_sz(u32 size) case 4096: return 5; default: - pr_warn("invalid pkey table size %d\n", size); + mlx5_core_warn(dev, "invalid pkey table size %d\n", size); return 0; } } @@ -363,10 +353,6 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev, MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP); MLX5_SET(query_hca_cap_in, in, op_mod, opmod); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); - if (err) - goto query_ex; - - err = mlx5_cmd_status_to_err_v2(out); if (err) { mlx5_core_warn(dev, "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n", @@ -409,20 +395,11 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type) static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod) { - u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)]; - int err; - - memset(out, 0, sizeof(out)); + u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0}; MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP); MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1); - err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - if (err) - return err; - - err = mlx5_cmd_status_to_err_v2(out); - - return err; + return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); } static int handle_hca_cap_atomic(struct mlx5_core_dev *dev) @@ -490,7 +467,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) 128); /* we limit the size of the pkey table to 128 entries for now */ MLX5_SET(cmd_hca_cap, set_hca_cap, pkey_table_size, - to_fw_pkey_sz(128)); + to_fw_pkey_sz(dev, 128)); if (prof->mask & MLX5_PROF_MASK_QP_SIZE) MLX5_SET(cmd_hca_cap, set_hca_cap, log_max_qp, @@ -528,37 +505,22 @@ static int set_hca_ctrl(struct mlx5_core_dev *dev) int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id) { - u32 out[MLX5_ST_SZ_DW(enable_hca_out)]; - u32 in[MLX5_ST_SZ_DW(enable_hca_in)]; - int err; + u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA); MLX5_SET(enable_hca_in, in, function_id, func_id); - memset(out, 0, sizeof(out)); - - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - return mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); } int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id) { - u32 out[MLX5_ST_SZ_DW(disable_hca_out)]; - u32 in[MLX5_ST_SZ_DW(disable_hca_in)]; - int err; + u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA); MLX5_SET(disable_hca_in, in, function_id, func_id); - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - return mlx5_cmd_status_to_err_v2(out); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } cycle_t mlx5_read_internal_timer(struct mlx5_core_dev *dev) @@ -758,44 +720,40 @@ clean: static int mlx5_core_set_issi(struct mlx5_core_dev *dev) { - u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]; - u32 query_out[MLX5_ST_SZ_DW(query_issi_out)]; - u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]; - u32 set_out[MLX5_ST_SZ_DW(set_issi_out)]; - int err; + u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {0}; + u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0}; u32 sup_issi; - - memset(query_in, 0, sizeof(query_in)); - memset(query_out, 0, sizeof(query_out)); + int err; MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI); - - err = mlx5_cmd_exec_check_status(dev, query_in, sizeof(query_in), - query_out, sizeof(query_out)); + err = mlx5_cmd_exec(dev, query_in, sizeof(query_in), + query_out, sizeof(query_out)); if (err) { - if (((struct mlx5_outbox_hdr *)query_out)->status == - MLX5_CMD_STAT_BAD_OP_ERR) { + u32 syndrome; + u8 status; + + mlx5_cmd_mbox_status(query_out, &status, &syndrome); + if (status == MLX5_CMD_STAT_BAD_OP_ERR) { pr_debug("Only ISSI 0 is supported\n"); return 0; } - pr_err("failed to query ISSI\n"); + pr_err("failed to query ISSI err(%d)\n", err); return err; } sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0); if (sup_issi & (1 << 1)) { - memset(set_in, 0, sizeof(set_in)); - memset(set_out, 0, sizeof(set_out)); + u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {0}; + u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0}; MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI); MLX5_SET(set_issi_in, set_in, current_issi, 1); - - err = mlx5_cmd_exec_check_status(dev, set_in, sizeof(set_in), - set_out, sizeof(set_out)); + err = mlx5_cmd_exec(dev, set_in, sizeof(set_in), + set_out, sizeof(set_out)); if (err) { - pr_err("failed to set ISSI=1\n"); + pr_err("failed to set ISSI=1 err(%d)\n", err); return err; } @@ -809,120 +767,6 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev) return -ENOTSUPP; } -static void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - dev_ctx = kmalloc(sizeof(*dev_ctx), GFP_KERNEL); - if (!dev_ctx) - return; - - dev_ctx->intf = intf; - dev_ctx->context = intf->add(dev); - - if (dev_ctx->context) { - spin_lock_irq(&priv->ctx_lock); - list_add_tail(&dev_ctx->list, &priv->ctx_list); - spin_unlock_irq(&priv->ctx_lock); - } else { - kfree(dev_ctx); - } -} - -static void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv) -{ - struct mlx5_device_context *dev_ctx; - struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev, priv); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf == intf) { - spin_lock_irq(&priv->ctx_lock); - list_del(&dev_ctx->list); - spin_unlock_irq(&priv->ctx_lock); - - intf->remove(dev, dev_ctx->context); - kfree(dev_ctx); - return; - } -} - -static int mlx5_register_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&intf_mutex); - list_add_tail(&priv->dev_list, &dev_list); - list_for_each_entry(intf, &intf_list, list) - mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); - - return 0; -} - -static void mlx5_unregister_device(struct mlx5_core_dev *dev) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_interface *intf; - - mutex_lock(&intf_mutex); - list_for_each_entry(intf, &intf_list, list) - mlx5_remove_device(intf, priv); - list_del(&priv->dev_list); - mutex_unlock(&intf_mutex); -} - -int mlx5_register_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - if (!intf->add || !intf->remove) - return -EINVAL; - - mutex_lock(&intf_mutex); - list_add_tail(&intf->list, &intf_list); - list_for_each_entry(priv, &dev_list, dev_list) - mlx5_add_device(intf, priv); - mutex_unlock(&intf_mutex); - - return 0; -} -EXPORT_SYMBOL(mlx5_register_interface); - -void mlx5_unregister_interface(struct mlx5_interface *intf) -{ - struct mlx5_priv *priv; - - mutex_lock(&intf_mutex); - list_for_each_entry(priv, &dev_list, dev_list) - mlx5_remove_device(intf, priv); - list_del(&intf->list); - mutex_unlock(&intf_mutex); -} -EXPORT_SYMBOL(mlx5_unregister_interface); - -void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol) -{ - struct mlx5_priv *priv = &mdev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - void *result = NULL; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &mdev->priv.ctx_list, list) - if ((dev_ctx->intf->protocol == protocol) && - dev_ctx->intf->get_dev) { - result = dev_ctx->intf->get_dev(dev_ctx->context); - break; - } - - spin_unlock_irqrestore(&priv->ctx_lock, flags); - - return result; -} -EXPORT_SYMBOL(mlx5_get_protocol_dev); static int mlx5_pci_init(struct mlx5_core_dev *dev, struct mlx5_priv *priv) { @@ -995,8 +839,102 @@ static void mlx5_pci_close(struct mlx5_core_dev *dev, struct mlx5_priv *priv) debugfs_remove(priv->dbg_root); } -#define MLX5_IB_MOD "mlx5_ib" -static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +{ + struct pci_dev *pdev = dev->pdev; + int err; + + err = mlx5_query_hca_caps(dev); + if (err) { + dev_err(&pdev->dev, "query hca failed\n"); + goto out; + } + + err = mlx5_query_board_id(dev); + if (err) { + dev_err(&pdev->dev, "query board id failed\n"); + goto out; + } + + err = mlx5_eq_init(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize eq\n"); + goto out; + } + + MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); + + err = mlx5_init_cq_table(dev); + if (err) { + dev_err(&pdev->dev, "failed to initialize cq table\n"); + goto err_eq_cleanup; + } + + mlx5_init_qp_table(dev); + + mlx5_init_srq_table(dev); + + mlx5_init_mkey_table(dev); + + err = mlx5_init_rl_table(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init rate limiting\n"); + goto err_tables_cleanup; + } + +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init eswitch %d\n", err); + goto err_rl_cleanup; + } +#endif + + err = mlx5_sriov_init(dev); + if (err) { + dev_err(&pdev->dev, "Failed to init sriov %d\n", err); + goto err_eswitch_cleanup; + } + + return 0; + +err_eswitch_cleanup: +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); + +err_rl_cleanup: +#endif + mlx5_cleanup_rl_table(dev); + +err_tables_cleanup: + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + +err_eq_cleanup: + mlx5_eq_cleanup(dev); + +out: + return err; +} + +static void mlx5_cleanup_once(struct mlx5_core_dev *dev) +{ + mlx5_sriov_cleanup(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_cleanup(dev->priv.eswitch); +#endif + mlx5_cleanup_rl_table(dev); + mlx5_cleanup_mkey_table(dev); + mlx5_cleanup_srq_table(dev); + mlx5_cleanup_qp_table(dev); + mlx5_cleanup_cq_table(dev); + mlx5_eq_cleanup(dev); +} + +static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool boot) { struct pci_dev *pdev = dev->pdev; int err; @@ -1029,12 +967,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out_err; } - mlx5_pagealloc_init(dev); - err = mlx5_core_enable_hca(dev, 0); if (err) { dev_err(&pdev->dev, "enable hca failed\n"); - goto err_pagealloc_cleanup; + goto err_cmd_cleanup; } err = mlx5_core_set_issi(dev); @@ -1087,34 +1023,21 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_start_health_poll(dev); - err = mlx5_query_hca_caps(dev); - if (err) { - dev_err(&pdev->dev, "query hca failed\n"); - goto err_stop_poll; - } - - err = mlx5_query_board_id(dev); - if (err) { - dev_err(&pdev->dev, "query board id failed\n"); + if (boot && mlx5_init_once(dev, priv)) { + dev_err(&pdev->dev, "sw objs init failed\n"); goto err_stop_poll; } err = mlx5_enable_msix(dev); if (err) { dev_err(&pdev->dev, "enable msix failed\n"); - goto err_stop_poll; - } - - err = mlx5_eq_init(dev); - if (err) { - dev_err(&pdev->dev, "failed to initialize eq\n"); - goto disable_msix; + goto err_cleanup_once; } err = mlx5_alloc_uuars(dev, &priv->uuari); if (err) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); - goto err_eq_cleanup; + goto err_disable_msix; } err = mlx5_start_eqs(dev); @@ -1130,15 +1053,10 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) } err = mlx5_irq_set_affinity_hints(dev); - if (err) + if (err) { dev_err(&pdev->dev, "Failed to alloc affinity hint cpumask\n"); - - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - - mlx5_init_cq_table(dev); - mlx5_init_qp_table(dev); - mlx5_init_srq_table(dev); - mlx5_init_mkey_table(dev); + goto err_affinity_hints; + } err = mlx5_init_fs(dev); if (err) { @@ -1146,36 +1064,26 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto err_fs; } - err = mlx5_init_rl_table(dev); - if (err) { - dev_err(&pdev->dev, "Failed to init rate limiting\n"); - goto err_rl; - } - #ifdef CONFIG_MLX5_CORE_EN - err = mlx5_eswitch_init(dev); - if (err) { - dev_err(&pdev->dev, "eswitch init failed %d\n", err); - goto err_reg_dev; - } + mlx5_eswitch_attach(dev->priv.eswitch); #endif - err = mlx5_sriov_init(dev); + err = mlx5_sriov_attach(dev); if (err) { dev_err(&pdev->dev, "sriov init failed %d\n", err); goto err_sriov; } - err = mlx5_register_device(dev); - if (err) { - dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); - goto err_reg_dev; + if (mlx5_device_registered(dev)) { + mlx5_attach_device(dev); + } else { + err = mlx5_register_device(dev); + if (err) { + dev_err(&pdev->dev, "mlx5_register_device failed %d\n", err); + goto err_reg_dev; + } } - err = request_module_nowait(MLX5_IB_MOD); - if (err) - pr_info("failed request module on %s\n", MLX5_IB_MOD); - clear_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state); set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state); out: @@ -1183,23 +1091,19 @@ out: return 0; -err_sriov: - if (mlx5_sriov_cleanup(dev)) - dev_err(&dev->pdev->dev, "sriov cleanup failed\n"); - -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); -#endif err_reg_dev: - mlx5_cleanup_rl_table(dev); -err_rl: + mlx5_sriov_detach(dev); + +err_sriov: +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_detach(dev->priv.eswitch); +#endif mlx5_cleanup_fs(dev); + err_fs: - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); + +err_affinity_hints: free_comp_eqs(dev); err_stop_eqs: @@ -1208,12 +1112,13 @@ err_stop_eqs: err_free_uar: mlx5_free_uuars(dev, &priv->uuari); -err_eq_cleanup: - mlx5_eq_cleanup(dev); - -disable_msix: +err_disable_msix: mlx5_disable_msix(dev); +err_cleanup_once: + if (boot) + mlx5_cleanup_once(dev); + err_stop_poll: mlx5_stop_health_poll(dev); if (mlx5_cmd_teardown_hca(dev)) { @@ -1230,8 +1135,7 @@ reclaim_boot_pages: err_disable_hca: mlx5_core_disable_hca(dev, 0); -err_pagealloc_cleanup: - mlx5_pagealloc_cleanup(dev); +err_cmd_cleanup: mlx5_cmd_cleanup(dev); out_err: @@ -1241,40 +1145,35 @@ out_err: return err; } -static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) +static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, + bool cleanup) { int err = 0; - err = mlx5_sriov_cleanup(dev); - if (err) { - dev_warn(&dev->pdev->dev, "%s: sriov cleanup failed - abort\n", - __func__); - return err; - } - mutex_lock(&dev->intf_state_mutex); if (test_bit(MLX5_INTERFACE_STATE_DOWN, &dev->intf_state)) { dev_warn(&dev->pdev->dev, "%s: interface is down, NOP\n", __func__); + if (cleanup) + mlx5_cleanup_once(dev); goto out; } - mlx5_unregister_device(dev); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_cleanup(dev->priv.eswitch); -#endif - mlx5_cleanup_rl_table(dev); + if (mlx5_device_registered(dev)) + mlx5_detach_device(dev); + + mlx5_sriov_detach(dev); +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_detach(dev->priv.eswitch); +#endif mlx5_cleanup_fs(dev); - mlx5_cleanup_mkey_table(dev); - mlx5_cleanup_srq_table(dev); - mlx5_cleanup_qp_table(dev); - mlx5_cleanup_cq_table(dev); mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); mlx5_free_uuars(dev, &priv->uuari); - mlx5_eq_cleanup(dev); mlx5_disable_msix(dev); + if (cleanup) + mlx5_cleanup_once(dev); mlx5_stop_health_poll(dev); err = mlx5_cmd_teardown_hca(dev); if (err) { @@ -1284,7 +1183,6 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv) mlx5_pagealloc_stop(dev); mlx5_reclaim_startup_pages(dev); mlx5_core_disable_hca(dev, 0); - mlx5_pagealloc_cleanup(dev); mlx5_cmd_cleanup(dev); out: @@ -1294,22 +1192,6 @@ out: return err; } -void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, - unsigned long param) -{ - struct mlx5_priv *priv = &dev->priv; - struct mlx5_device_context *dev_ctx; - unsigned long flags; - - spin_lock_irqsave(&priv->ctx_lock, flags); - - list_for_each_entry(dev_ctx, &priv->ctx_list, list) - if (dev_ctx->intf->event) - dev_ctx->intf->event(dev, dev_ctx->context, event, param); - - spin_unlock_irqrestore(&priv->ctx_lock, flags); -} - struct mlx5_core_event_handler { void (*event)(struct mlx5_core_dev *dev, enum mlx5_dev_event event, @@ -1323,6 +1205,7 @@ static const struct devlink_ops mlx5_devlink_ops = { #endif }; +#define MLX5_IB_MOD "mlx5_ib" static int init_one(struct pci_dev *pdev, const struct pci_device_id *id) { @@ -1344,8 +1227,9 @@ static int init_one(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); if (prof_sel < 0 || prof_sel >= ARRAY_SIZE(profile)) { - pr_warn("selected profile out of range, selecting default (%d)\n", - MLX5_DEFAULT_PROF); + mlx5_core_warn(dev, + "selected profile out of range, selecting default (%d)\n", + MLX5_DEFAULT_PROF); prof_sel = MLX5_DEFAULT_PROF; } dev->profile = &profile[prof_sel]; @@ -1368,12 +1252,18 @@ static int init_one(struct pci_dev *pdev, goto close_pci; } - err = mlx5_load_one(dev, priv); + mlx5_pagealloc_init(dev); + + err = mlx5_load_one(dev, priv, true); if (err) { dev_err(&pdev->dev, "mlx5_load_one failed with error code %d\n", err); goto clean_health; } + err = request_module_nowait(MLX5_IB_MOD); + if (err) + pr_info("failed request module on %s\n", MLX5_IB_MOD); + err = devlink_register(devlink, &pdev->dev); if (err) goto clean_load; @@ -1381,8 +1271,9 @@ static int init_one(struct pci_dev *pdev, return 0; clean_load: - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, true); clean_health: + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); close_pci: mlx5_pci_close(dev, priv); @@ -1400,11 +1291,15 @@ static void remove_one(struct pci_dev *pdev) struct mlx5_priv *priv = &dev->priv; devlink_unregister(devlink); - if (mlx5_unload_one(dev, priv)) { + mlx5_unregister_device(dev); + + if (mlx5_unload_one(dev, priv, true)) { dev_err(&dev->pdev->dev, "mlx5_unload_one failed\n"); mlx5_health_cleanup(dev); return; } + + mlx5_pagealloc_cleanup(dev); mlx5_health_cleanup(dev); mlx5_pci_close(dev, priv); pci_set_drvdata(pdev, NULL); @@ -1419,7 +1314,7 @@ static pci_ers_result_t mlx5_pci_err_detected(struct pci_dev *pdev, dev_info(&pdev->dev, "%s was called\n", __func__); mlx5_enter_error_state(dev); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); pci_save_state(pdev); mlx5_pci_disable_device(dev); return state == pci_channel_io_perm_failure ? @@ -1491,7 +1386,7 @@ static void mlx5_pci_resume(struct pci_dev *pdev) dev_info(&pdev->dev, "%s was called\n", __func__); - err = mlx5_load_one(dev, priv); + err = mlx5_load_one(dev, priv, false); if (err) dev_err(&pdev->dev, "%s: mlx5_load_one failed with error code: %d\n" , __func__, err); @@ -1513,7 +1408,7 @@ static void shutdown(struct pci_dev *pdev) dev_info(&pdev->dev, "Shutdown was called\n"); /* Notify mlx5 clients that the kernel is being shut down */ set_bit(MLX5_INTERFACE_STATE_SHUTDOWN, &dev->intf_state); - mlx5_unload_one(dev, priv); + mlx5_unload_one(dev, priv, false); mlx5_pci_disable_device(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c index d5a0c2d61a18..ba2b09cc192f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c @@ -37,70 +37,30 @@ #include #include "mlx5_core.h" -struct mlx5_attach_mcg_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - __be32 rsvd; - u8 gid[16]; -}; - -struct mlx5_attach_mcg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvf[8]; -}; - -struct mlx5_detach_mcg_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - __be32 rsvd; - u8 gid[16]; -}; - -struct mlx5_detach_mcg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvf[8]; -}; - int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) { - struct mlx5_attach_mcg_mbox_in in; - struct mlx5_attach_mcg_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {0}; + void *gid; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ATTACH_TO_MCG); - memcpy(in.gid, mgid, sizeof(*mgid)); - in.qpn = cpu_to_be32(qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG); + MLX5_SET(attach_to_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_attach_mcg); int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn) { - struct mlx5_detach_mcg_mbox_in in; - struct mlx5_detach_mcg_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {0}; + void *gid; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DETTACH_FROM_MCG); - memcpy(in.gid, mgid, sizeof(*mgid)); - in.qpn = cpu_to_be32(qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG); + MLX5_SET(detach_from_mcg_in, in, qpn, qpn); + gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid); + memcpy(gid, mgid, sizeof(*mgid)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_detach_mcg); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 2f86ec6fcf25..3d0cfb9f18f9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -58,8 +58,8 @@ do { \ } while (0) #define mlx5_core_err(__dev, format, ...) \ - dev_err(&(__dev)->pdev->dev, "%s:%s:%d:(pid %d): " format, \ - (__dev)->priv.name, __func__, __LINE__, current->pid, \ + dev_err(&(__dev)->pdev->dev, "%s:%d:(pid %d): " format, \ + __func__, __LINE__, current->pid, \ ##__VA_ARGS__) #define mlx5_core_warn(__dev, format, ...) \ @@ -75,19 +75,6 @@ enum { MLX5_CMD_TIME, /* print command execution time */ }; -static inline int mlx5_cmd_exec_check_status(struct mlx5_core_dev *dev, u32 *in, - int in_size, u32 *out, - int out_size) -{ - int err; - - err = mlx5_cmd_exec(dev, in, in_size, out, out_size); - if (err) - return err; - - return mlx5_cmd_status_to_err((struct mlx5_outbox_hdr *)out); -} - int mlx5_query_hca_caps(struct mlx5_core_dev *dev); int mlx5_query_board_id(struct mlx5_core_dev *dev); int mlx5_cmd_init_hca(struct mlx5_core_dev *dev); @@ -96,7 +83,12 @@ void mlx5_core_event(struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); void mlx5_enter_error_state(struct mlx5_core_dev *dev); void mlx5_disable_device(struct mlx5_core_dev *dev); +int mlx5_sriov_init(struct mlx5_core_dev *dev); +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev); +int mlx5_sriov_attach(struct mlx5_core_dev *dev); +void mlx5_sriov_detach(struct mlx5_core_dev *dev); int mlx5_core_sriov_configure(struct pci_dev *dev, int num_vfs); +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev); int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id); int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev); @@ -105,7 +97,38 @@ u32 mlx5_get_msix_vec(struct mlx5_core_dev *dev, int vecidx); struct mlx5_eq *mlx5_eqn2eq(struct mlx5_core_dev *dev, int eqn); void mlx5_cq_tasklet_cb(unsigned long data); +void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev); +void mlx5_lag_remove(struct mlx5_core_dev *dev); + +void mlx5_add_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_remove_device(struct mlx5_interface *intf, struct mlx5_priv *priv); +void mlx5_attach_device(struct mlx5_core_dev *dev); +void mlx5_detach_device(struct mlx5_core_dev *dev); +bool mlx5_device_registered(struct mlx5_core_dev *dev); +int mlx5_register_device(struct mlx5_core_dev *dev); +void mlx5_unregister_device(struct mlx5_core_dev *dev); +void mlx5_add_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +void mlx5_remove_dev_by_protocol(struct mlx5_core_dev *dev, int protocol); +struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev); +void mlx5_dev_list_lock(void); +void mlx5_dev_list_unlock(void); +int mlx5_dev_list_trylock(void); + +bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv); + void mlx5e_init(void); void mlx5e_cleanup(void); +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + #endif /* __MLX5_CORE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 77a7293921d5..b9736f505bdf 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -49,48 +49,43 @@ void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev) { } -int mlx5_core_create_mkey(struct mlx5_core_dev *dev, - struct mlx5_core_mkey *mkey, - struct mlx5_create_mkey_mbox_in *in, int inlen, - mlx5_cmd_cbk_t callback, void *context, - struct mlx5_create_mkey_mbox_out *out) +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; - struct mlx5_create_mkey_mbox_out lout; + u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0}; + u32 mkey_index; + void *mkc; int err; u8 key; - memset(&lout, 0, sizeof(lout)); spin_lock_irq(&dev->priv.mkey_lock); key = dev->priv.mkey_key++; spin_unlock_irq(&dev->priv.mkey_lock); - in->seg.qpn_mkey7_0 |= cpu_to_be32(key); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_MKEY); - if (callback) { - err = mlx5_cmd_exec_cb(dev, in, inlen, out, sizeof(*out), - callback, context); + mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); + + MLX5_SET(create_mkey_in, in, opcode, MLX5_CMD_OP_CREATE_MKEY); + MLX5_SET(mkc, mkc, mkey_7_0, key); + + if (callback) + return mlx5_cmd_exec_cb(dev, in, inlen, out, outlen, + callback, context); + + err = mlx5_cmd_exec(dev, in, inlen, lout, sizeof(lout)); + if (err) return err; - } else { - err = mlx5_cmd_exec(dev, in, inlen, &lout, sizeof(lout)); - } - if (err) { - mlx5_core_dbg(dev, "cmd exec failed %d\n", err); - return err; - } - - if (lout.hdr.status) { - mlx5_core_dbg(dev, "status %d\n", lout.hdr.status); - return mlx5_cmd_status_to_err(&lout.hdr); - } - - mkey->iova = be64_to_cpu(in->seg.start_addr); - mkey->size = be64_to_cpu(in->seg.len); - mkey->key = mlx5_idx_to_mkey(be32_to_cpu(lout.mkey) & 0xffffff) | key; - mkey->pd = be32_to_cpu(in->seg.flags_pd) & 0xffffff; + mkey_index = MLX5_GET(create_mkey_out, lout, mkey_index); + mkey->iova = MLX5_GET64(mkc, mkc, start_addr); + mkey->size = MLX5_GET64(mkc, mkc, len); + mkey->key = mlx5_idx_to_mkey(mkey_index) | key; + mkey->pd = MLX5_GET(mkc, mkc, pd); mlx5_core_dbg(dev, "out 0x%x, key 0x%x, mkey 0x%x\n", - be32_to_cpu(lout.mkey), key, mkey->key); + mkey_index, key, mkey->key); /* connect to mkey tree */ write_lock_irq(&table->lock); @@ -104,20 +99,25 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev, return err; } +EXPORT_SYMBOL(mlx5_core_create_mkey_cb); + +int mlx5_core_create_mkey(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen) +{ + return mlx5_core_create_mkey_cb(dev, mkey, in, inlen, + NULL, 0, NULL, NULL); +} EXPORT_SYMBOL(mlx5_core_create_mkey); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey) { struct mlx5_mkey_table *table = &dev->priv.mkey_table; - struct mlx5_destroy_mkey_mbox_in in; - struct mlx5_destroy_mkey_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {0}; struct mlx5_core_mkey *deleted_mkey; unsigned long flags; - int err; - - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); write_lock_irqsave(&table->lock, flags); deleted_mkey = radix_tree_delete(&table->tree, mlx5_base_mkey(mkey->key)); @@ -128,94 +128,71 @@ int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, return -ENOENT; } - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY); + MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_query_mkey_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_mkey_mbox_in in; - int err; + u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0}; - memset(&in, 0, sizeof(in)); memset(out, 0, outlen); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_MKEY); - in.mkey = cpu_to_be32(mlx5_mkey_to_idx(mkey->key)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY); + MLX5_SET(query_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_mkey); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey) { - struct mlx5_query_special_ctxs_mbox_in in; - struct mlx5_query_special_ctxs_mbox_out out; + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *mkey = be32_to_cpu(out.dump_fill_mkey); - + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *mkey = MLX5_GET(query_special_contexts_out, out, + dump_fill_mkey); return err; } EXPORT_SYMBOL(mlx5_core_dump_fill_mkey); +static inline u32 mlx5_get_psv(u32 *out, int psv_index) +{ + switch (psv_index) { + case 1: return MLX5_GET(create_psv_out, out, psv1_index); + case 2: return MLX5_GET(create_psv_out, out, psv2_index); + case 3: return MLX5_GET(create_psv_out, out, psv3_index); + default: return MLX5_GET(create_psv_out, out, psv0_index); + } +} + int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index) { - struct mlx5_allocate_psv_in in; - struct mlx5_allocate_psv_out out; + u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {0}; int i, err; if (npsvs > MLX5_MAX_PSVS) return -EINVAL; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); + MLX5_SET(create_psv_in, in, opcode, MLX5_CMD_OP_CREATE_PSV); + MLX5_SET(create_psv_in, in, pd, pdn); + MLX5_SET(create_psv_in, in, num_psv, npsvs); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_PSV); - in.npsv_pd = cpu_to_be32((npsvs << 28) | pdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) { - mlx5_core_err(dev, "cmd exec failed %d\n", err); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err) return err; - } - - if (out.hdr.status) { - mlx5_core_err(dev, "create_psv bad status %d\n", - out.hdr.status); - return mlx5_cmd_status_to_err(&out.hdr); - } for (i = 0; i < npsvs; i++) - sig_index[i] = be32_to_cpu(out.psv_idx[i]) & 0xffffff; + sig_index[i] = mlx5_get_psv(out, i); return err; } @@ -223,29 +200,11 @@ EXPORT_SYMBOL(mlx5_core_create_psv); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) { - struct mlx5_destroy_psv_in in; - struct mlx5_destroy_psv_out out; - int err; + u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - - in.psv_number = cpu_to_be32(psv_num); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_PSV); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) { - mlx5_core_err(dev, "destroy_psv cmd exec failed %d\n", err); - goto out; - } - - if (out.hdr.status) { - mlx5_core_err(dev, "destroy_psv bad status %d\n", - out.hdr.status); - err = mlx5_cmd_status_to_err(&out.hdr); - goto out; - } - -out: - return err; + MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV); + MLX5_SET(destroy_psv_in, in, psvn, psv_num); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_psv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c index 32dea3524cee..d4585154151d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c @@ -44,12 +44,6 @@ enum { MLX5_PAGES_TAKE = 2 }; -enum { - MLX5_BOOT_PAGES = 1, - MLX5_INIT_PAGES = 2, - MLX5_POST_INIT_PAGES = 3 -}; - struct mlx5_pages_req { struct mlx5_core_dev *dev; u16 func_id; @@ -67,33 +61,6 @@ struct fw_page { unsigned free_count; }; -struct mlx5_query_pages_inbox { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_pages_outbox { - struct mlx5_outbox_hdr hdr; - __be16 rsvd; - __be16 func_id; - __be32 num_pages; -}; - -struct mlx5_manage_pages_inbox { - struct mlx5_inbox_hdr hdr; - __be16 rsvd; - __be16 func_id; - __be32 num_entries; - __be64 pas[0]; -}; - -struct mlx5_manage_pages_outbox { - struct mlx5_outbox_hdr hdr; - __be32 num_entries; - u8 rsvd[4]; - __be64 pas[0]; -}; - enum { MAX_RECLAIM_TIME_MSECS = 5000, MAX_RECLAIM_VFS_PAGES_TIME_MSECS = 2 * 1000 * 60, @@ -167,24 +134,21 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr) static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id, s32 *npages, int boot) { - struct mlx5_query_pages_inbox in; - struct mlx5_query_pages_outbox out; + u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_PAGES); - in.hdr.opmod = boot ? cpu_to_be16(MLX5_BOOT_PAGES) : cpu_to_be16(MLX5_INIT_PAGES); + MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES); + MLX5_SET(query_pages_in, in, op_mod, boot ? + MLX5_QUERY_PAGES_IN_OP_MOD_BOOT_PAGES : + MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *npages = be32_to_cpu(out.num_pages); - *func_id = be16_to_cpu(out.func_id); + *npages = MLX5_GET(query_pages_out, out, num_pages); + *func_id = MLX5_GET(query_pages_out, out, function_id); return err; } @@ -280,46 +244,37 @@ out_alloc: static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id) { - struct mlx5_manage_pages_inbox *in; - struct mlx5_manage_pages_outbox out; + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; int err; - in = kzalloc(sizeof(*in), GFP_KERNEL); - if (!in) - return; - - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in->hdr.opmod = cpu_to_be16(MLX5_PAGES_CANT_GIVE); - in->func_id = cpu_to_be16(func_id); - err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); - if (!err) - err = mlx5_cmd_status_to_err(&out.hdr); + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_CANT_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) - mlx5_core_warn(dev, "page notify failed\n"); - - kfree(in); + mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n", + func_id, err); } static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages, int notify_fail) { - struct mlx5_manage_pages_inbox *in; - struct mlx5_manage_pages_outbox out; - int inlen; + u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(manage_pages_in); u64 addr; int err; + u32 *in; int i; - inlen = sizeof(*in) + npages * sizeof(in->pas[0]); + inlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_in, pas[0]); in = mlx5_vzalloc(inlen); if (!in) { err = -ENOMEM; mlx5_core_warn(dev, "vzalloc failed %d\n", inlen); goto out_free; } - memset(&out, 0, sizeof(out)); for (i = 0; i < npages; i++) { retry: @@ -332,27 +287,21 @@ retry: goto retry; } - in->pas[i] = cpu_to_be64(addr); + MLX5_SET64(manage_pages_in, in, pas[i], addr); } - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in->hdr.opmod = cpu_to_be16(MLX5_PAGES_GIVE); - in->func_id = cpu_to_be16(func_id); - in->num_entries = cpu_to_be32(npages); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_GIVE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (err) { mlx5_core_warn(dev, "func_id 0x%x, npages %d, err %d\n", func_id, npages, err); goto out_4k; } - err = mlx5_cmd_status_to_err(&out.hdr); - if (err) { - mlx5_core_warn(dev, "func_id 0x%x, npages %d, status %d\n", - func_id, npages, out.hdr.status); - goto out_4k; - } - dev->priv.fw_pages += npages; if (func_id) dev->priv.vfs_pages += npages; @@ -364,7 +313,7 @@ retry: out_4k: for (i--; i >= 0; i--) - free_4k(dev, be64_to_cpu(in->pas[i])); + free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i])); out_free: kvfree(in); if (notify_fail) @@ -373,64 +322,67 @@ out_free: } static int reclaim_pages_cmd(struct mlx5_core_dev *dev, - struct mlx5_manage_pages_inbox *in, int in_size, - struct mlx5_manage_pages_outbox *out, int out_size) + u32 *in, int in_size, u32 *out, int out_size) { struct fw_page *fwp; struct rb_node *p; + u32 func_id; u32 npages; u32 i = 0; if (dev->state != MLX5_DEVICE_STATE_INTERNAL_ERROR) - return mlx5_cmd_exec_check_status(dev, (u32 *)in, in_size, - (u32 *)out, out_size); + return mlx5_cmd_exec(dev, in, in_size, out, out_size); - npages = be32_to_cpu(in->num_entries); + /* No hard feelings, we want our pages back! */ + npages = MLX5_GET(manage_pages_in, in, input_num_entries); + func_id = MLX5_GET(manage_pages_in, in, function_id); p = rb_first(&dev->priv.page_root); while (p && i < npages) { fwp = rb_entry(p, struct fw_page, rb_node); - out->pas[i] = cpu_to_be64(fwp->addr); p = rb_next(p); + if (fwp->func_id != func_id) + continue; + + MLX5_SET64(manage_pages_out, out, pas[i], fwp->addr); i++; } - out->num_entries = cpu_to_be32(i); + MLX5_SET(manage_pages_out, out, output_num_entries, i); return 0; } static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, int *nclaimed) { - struct mlx5_manage_pages_inbox in; - struct mlx5_manage_pages_outbox *out; + int outlen = MLX5_ST_SZ_BYTES(manage_pages_out); + u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0}; int num_claimed; - int outlen; - u64 addr; + u32 *out; int err; int i; if (nclaimed) *nclaimed = 0; - memset(&in, 0, sizeof(in)); - outlen = sizeof(*out) + npages * sizeof(out->pas[0]); + outlen += npages * MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); out = mlx5_vzalloc(outlen); if (!out) return -ENOMEM; - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_MANAGE_PAGES); - in.hdr.opmod = cpu_to_be16(MLX5_PAGES_TAKE); - in.func_id = cpu_to_be16(func_id); - in.num_entries = cpu_to_be32(npages); + MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES); + MLX5_SET(manage_pages_in, in, op_mod, MLX5_PAGES_TAKE); + MLX5_SET(manage_pages_in, in, function_id, func_id); + MLX5_SET(manage_pages_in, in, input_num_entries, npages); + mlx5_core_dbg(dev, "npages %d, outlen %d\n", npages, outlen); - err = reclaim_pages_cmd(dev, &in, sizeof(in), out, outlen); + err = reclaim_pages_cmd(dev, in, sizeof(in), out, outlen); if (err) { mlx5_core_err(dev, "failed reclaiming pages: err %d\n", err); goto out_free; } - num_claimed = be32_to_cpu(out->num_entries); + num_claimed = MLX5_GET(manage_pages_out, out, output_num_entries); if (num_claimed > npages) { mlx5_core_warn(dev, "fw returned %d, driver asked %d => corruption\n", num_claimed, npages); @@ -438,10 +390,9 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages, goto out_free; } - for (i = 0; i < num_claimed; i++) { - addr = be64_to_cpu(out->pas[i]); - free_4k(dev, addr); - } + for (i = 0; i < num_claimed; i++) + free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i])); + if (nclaimed) *nclaimed = num_claimed; @@ -518,8 +469,8 @@ static int optimal_reclaimed_pages(void) int ret; ret = (sizeof(lay->out) + MLX5_BLKS_FOR_RECLAIM_PAGES * sizeof(block->data) - - sizeof(struct mlx5_manage_pages_outbox)) / - FIELD_SIZEOF(struct mlx5_manage_pages_outbox, pas[0]); + MLX5_ST_SZ_BYTES(manage_pages_out)) / + MLX5_FLD_SZ_BYTES(manage_pages_out, pas[0]); return ret; } @@ -594,6 +545,12 @@ int mlx5_wait_for_vf_pages(struct mlx5_core_dev *dev) unsigned long end = jiffies + msecs_to_jiffies(MAX_RECLAIM_VFS_PAGES_TIME_MSECS); int prev_vfs_pages = dev->priv.vfs_pages; + /* In case of internal error we will free the pages manually later */ + if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) { + mlx5_core_warn(dev, "Skipping wait for vf pages stage"); + return 0; + } + mlx5_core_dbg(dev, "Waiting for %d pages from %s\n", prev_vfs_pages, dev->priv.name); while (dev->priv.vfs_pages) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c index f2d3aee909e8..bd830d8d6c5f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c @@ -36,66 +36,27 @@ #include #include "mlx5_core.h" -struct mlx5_alloc_pd_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_pd_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 pdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_pd_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 pdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_pd_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn) { - struct mlx5_alloc_pd_mbox_in in; - struct mlx5_alloc_pd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_PD); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - *pdn = be32_to_cpu(out.pdn) & 0xffffff; + MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *pdn = MLX5_GET(alloc_pd_out, out, pd); return err; } EXPORT_SYMBOL(mlx5_core_alloc_pd); int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn) { - struct mlx5_dealloc_pd_mbox_in in; - struct mlx5_dealloc_pd_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_PD); - in.pdn = cpu_to_be32(pdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD); + MLX5_SET(dealloc_pd_in, in, pd, pdn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_dealloc_pd); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c index 752c08127138..34e7184e23c9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c @@ -38,45 +38,42 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int size_in, void *data_out, int size_out, - u16 reg_num, int arg, int write) + u16 reg_id, int arg, int write) { - struct mlx5_access_reg_mbox_in *in = NULL; - struct mlx5_access_reg_mbox_out *out = NULL; + int outlen = MLX5_ST_SZ_BYTES(access_register_out) + size_out; + int inlen = MLX5_ST_SZ_BYTES(access_register_in) + size_in; int err = -ENOMEM; + u32 *out = NULL; + u32 *in = NULL; + void *data; - in = mlx5_vzalloc(sizeof(*in) + size_in); - if (!in) - return -ENOMEM; + in = mlx5_vzalloc(inlen); + out = mlx5_vzalloc(outlen); + if (!in || !out) + goto out; - out = mlx5_vzalloc(sizeof(*out) + size_out); - if (!out) - goto ex1; + data = MLX5_ADDR_OF(access_register_in, in, register_data); + memcpy(data, data_in, size_in); - memcpy(in->data, data_in, size_in); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ACCESS_REG); - in->hdr.opmod = cpu_to_be16(!write); - in->arg = cpu_to_be32(arg); - in->register_id = cpu_to_be16(reg_num); - err = mlx5_cmd_exec(dev, in, sizeof(*in) + size_in, out, - sizeof(*out) + size_out); + MLX5_SET(access_register_in, in, opcode, MLX5_CMD_OP_ACCESS_REG); + MLX5_SET(access_register_in, in, op_mod, !write); + MLX5_SET(access_register_in, in, argument, arg); + MLX5_SET(access_register_in, in, register_id, reg_id); + + err = mlx5_cmd_exec(dev, in, inlen, out, outlen); if (err) - goto ex2; + goto out; - if (out->hdr.status) - err = mlx5_cmd_status_to_err(&out->hdr); + data = MLX5_ADDR_OF(access_register_out, out, register_data); + memcpy(data_out, data, size_out); - if (!err) - memcpy(data_out, out->data, size_out); - -ex2: +out: kvfree(out); -ex1: kvfree(in); return err; } EXPORT_SYMBOL_GPL(mlx5_core_access_reg); - struct mlx5_reg_pcap { u8 rsvd0; u8 port_num; @@ -104,12 +101,10 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port) { - u32 in[MLX5_ST_SZ_DW(ptys_reg)]; + u32 in[MLX5_ST_SZ_DW(ptys_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(ptys_reg, in, local_port, local_port); MLX5_SET(ptys_reg, in, proto_mask, proto_mask); - return mlx5_core_access_reg(dev, in, sizeof(in), ptys, ptys_size, MLX5_REG_PTYS, 0, 0); } @@ -117,13 +112,11 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ptys); int mlx5_set_port_beacon(struct mlx5_core_dev *dev, u16 beacon_duration) { + u32 in[MLX5_ST_SZ_DW(mlcr_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(mlcr_reg)]; - u32 in[MLX5_ST_SZ_DW(mlcr_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(mlcr_reg, in, local_port, 1); MLX5_SET(mlcr_reg, in, beacon_duration, beacon_duration); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_MLCR, 0, 1); } @@ -182,25 +175,39 @@ int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, } EXPORT_SYMBOL_GPL(mlx5_query_port_link_width_oper); -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, int proto_mask, - u8 local_port) +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port) { u32 out[MLX5_ST_SZ_DW(ptys_reg)]; int err; - err = mlx5_query_port_ptys(dev, out, sizeof(out), proto_mask, local_port); + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_EN, + local_port); if (err) return err; - if (proto_mask == MLX5_PTYS_EN) - *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); - else - *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + *proto_oper = MLX5_GET(ptys_reg, out, eth_proto_oper); return 0; } -EXPORT_SYMBOL_GPL(mlx5_query_port_proto_oper); +EXPORT_SYMBOL(mlx5_query_port_eth_proto_oper); + +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port) +{ + u32 out[MLX5_ST_SZ_DW(ptys_reg)]; + int err; + + err = mlx5_query_port_ptys(dev, out, sizeof(out), MLX5_PTYS_IB, + local_port); + if (err) + return err; + + *proto_oper = MLX5_GET(ptys_reg, out, ib_proto_oper); + + return 0; +} +EXPORT_SYMBOL(mlx5_query_port_ib_proto_oper); int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, int proto_mask) @@ -246,15 +253,12 @@ EXPORT_SYMBOL_GPL(mlx5_toggle_port_link); int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status status) { - u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(paos_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(paos_reg, in, local_port, 1); MLX5_SET(paos_reg, in, admin_status, status); MLX5_SET(paos_reg, in, ase, 1); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 1); } @@ -263,19 +267,15 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_admin_status); int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status) { - u32 in[MLX5_ST_SZ_DW(paos_reg)]; + u32 in[MLX5_ST_SZ_DW(paos_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(paos_reg)]; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(paos_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PAOS, 0, 0); if (err) return err; - *status = MLX5_GET(paos_reg, out, admin_status); return 0; } @@ -284,13 +284,10 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_admin_status); static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, u16 *max_mtu, u16 *oper_mtu, u8 port) { - u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, local_port, port); - mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 0); @@ -304,14 +301,11 @@ static void mlx5_query_port_mtu(struct mlx5_core_dev *dev, u16 *admin_mtu, int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port) { - u32 in[MLX5_ST_SZ_DW(pmtu_reg)]; + u32 in[MLX5_ST_SZ_DW(pmtu_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmtu_reg)]; - memset(in, 0, sizeof(in)); - MLX5_SET(pmtu_reg, in, admin_mtu, mtu); MLX5_SET(pmtu_reg, in, local_port, port); - return mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMTU, 0, 1); } @@ -333,15 +327,12 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_oper_mtu); static int mlx5_query_module_num(struct mlx5_core_dev *dev, int *module_num) { + u32 in[MLX5_ST_SZ_DW(pmlp_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pmlp_reg)]; - u32 in[MLX5_ST_SZ_DW(pmlp_reg)]; int module_mapping; int err; - memset(in, 0, sizeof(in)); - MLX5_SET(pmlp_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PMLP, 0, 0); if (err) @@ -410,11 +401,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_module_eeprom); static int mlx5_query_port_pvlc(struct mlx5_core_dev *dev, u32 *pvlc, int pvlc_size, u8 local_port) { - u32 in[MLX5_ST_SZ_DW(pvlc_reg)]; + u32 in[MLX5_ST_SZ_DW(pvlc_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pvlc_reg, in, local_port, local_port); - return mlx5_core_access_reg(dev, in, sizeof(in), pvlc, pvlc_size, MLX5_REG_PVLC, 0, 0); } @@ -460,10 +449,9 @@ EXPORT_SYMBOL_GPL(mlx5_core_query_ib_ppcnt); int mlx5_set_port_pause(struct mlx5_core_dev *dev, u32 rx_pause, u32 tx_pause) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); MLX5_SET(pfcc_reg, in, pptx, tx_pause); MLX5_SET(pfcc_reg, in, pprx, rx_pause); @@ -476,13 +464,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pause); int mlx5_query_port_pause(struct mlx5_core_dev *dev, u32 *rx_pause, u32 *tx_pause) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 0); if (err) @@ -500,10 +486,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_pause); int mlx5_set_port_pfc(struct mlx5_core_dev *dev, u8 pfc_en_tx, u8 pfc_en_rx) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); MLX5_SET(pfcc_reg, in, pfctx, pfc_en_tx); MLX5_SET(pfcc_reg, in, pfcrx, pfc_en_rx); @@ -517,13 +502,11 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_pfc); int mlx5_query_port_pfc(struct mlx5_core_dev *dev, u8 *pfc_en_tx, u8 *pfc_en_rx) { - u32 in[MLX5_ST_SZ_DW(pfcc_reg)]; + u32 in[MLX5_ST_SZ_DW(pfcc_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(pfcc_reg)]; int err; - memset(in, 0, sizeof(in)); MLX5_SET(pfcc_reg, in, local_port, 1); - err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), MLX5_REG_PFCC, 0, 0); if (err) @@ -567,12 +550,11 @@ int mlx5_max_tc(struct mlx5_core_dev *mdev) int mlx5_set_port_prio_tc(struct mlx5_core_dev *mdev, u8 *prio_tc) { - u32 in[MLX5_ST_SZ_DW(qtct_reg)]; + u32 in[MLX5_ST_SZ_DW(qtct_reg)] = {0}; u32 out[MLX5_ST_SZ_DW(qtct_reg)]; int err; int i; - memset(in, 0, sizeof(in)); for (i = 0; i < 8; i++) { if (prio_tc[i] > mlx5_max_tc(mdev)) return -EINVAL; @@ -617,11 +599,9 @@ static int mlx5_query_port_qetcr_reg(struct mlx5_core_dev *mdev, u32 *out, int mlx5_set_port_tc_group(struct mlx5_core_dev *mdev, u8 *tc_group) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; int i; - memset(in, 0, sizeof(in)); - for (i = 0; i <= mlx5_max_tc(mdev); i++) { MLX5_SET(qetc_reg, in, tc_configuration[i].g, 1); MLX5_SET(qetc_reg, in, tc_configuration[i].group, tc_group[i]); @@ -633,11 +613,9 @@ EXPORT_SYMBOL_GPL(mlx5_set_port_tc_group); int mlx5_set_port_tc_bw_alloc(struct mlx5_core_dev *mdev, u8 *tc_bw) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; int i; - memset(in, 0, sizeof(in)); - for (i = 0; i <= mlx5_max_tc(mdev); i++) { MLX5_SET(qetc_reg, in, tc_configuration[i].b, 1); MLX5_SET(qetc_reg, in, tc_configuration[i].bw_allocation, tc_bw[i]); @@ -651,12 +629,10 @@ int mlx5_modify_port_ets_rate_limit(struct mlx5_core_dev *mdev, u8 *max_bw_value, u8 *max_bw_units) { - u32 in[MLX5_ST_SZ_DW(qetc_reg)]; + u32 in[MLX5_ST_SZ_DW(qetc_reg)] = {0}; void *ets_tcn_conf; int i; - memset(in, 0, sizeof(in)); - MLX5_SET(qetc_reg, in, port_number, 1); for (i = 0; i <= mlx5_max_tc(mdev); i++) { @@ -701,35 +677,24 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit); int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode) { - u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]; - u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0}; MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL); MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1); MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_set_port_wol); int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode) { - u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]; - u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)]; + u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL); - - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), - out, sizeof(out)); - + err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); if (!err) *wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode); @@ -740,11 +705,9 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_wol); static int mlx5_query_ports_check(struct mlx5_core_dev *mdev, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pcmr_reg, in, local_port, 1); - return mlx5_core_access_reg(mdev, in, sizeof(in), out, outlen, MLX5_REG_PCMR, 0, 0); } @@ -759,12 +722,10 @@ static int mlx5_set_ports_check(struct mlx5_core_dev *mdev, u32 *in, int inlen) int mlx5_set_port_fcs(struct mlx5_core_dev *mdev, u8 enable) { - u32 in[MLX5_ST_SZ_DW(pcmr_reg)]; + u32 in[MLX5_ST_SZ_DW(pcmr_reg)] = {0}; - memset(in, 0, sizeof(in)); MLX5_SET(pcmr_reg, in, local_port, 1); MLX5_SET(pcmr_reg, in, fcs_chk, enable); - return mlx5_set_ports_check(mdev, in, sizeof(in)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c index b82d65802d96..d0a4005fe63a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c @@ -271,30 +271,20 @@ static void destroy_qprqsq_common(struct mlx5_core_dev *dev, int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_create_qp_mbox_in *in, - int inlen) + u32 *in, int inlen) { - struct mlx5_create_qp_mbox_out out; - struct mlx5_destroy_qp_mbox_in din; - struct mlx5_destroy_qp_mbox_out dout; + u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0}; + u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)]; + u32 din[MLX5_ST_SZ_DW(destroy_qp_in)]; int err; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_CREATE_QP); + MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP); - err = mlx5_cmd_exec(dev, in, inlen, &out, sizeof(out)); - if (err) { - mlx5_core_warn(dev, "ret %d\n", err); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); + if (err) return err; - } - if (out.hdr.status) { - mlx5_core_warn(dev, "current num of QPs 0x%x\n", - atomic_read(&dev->num_qps)); - return mlx5_cmd_status_to_err(&out.hdr); - } - - qp->qpn = be32_to_cpu(out.qpn) & 0xffffff; + qp->qpn = MLX5_GET(create_qp_out, out, qpn); mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn); err = create_qprqsq_common(dev, qp, MLX5_RES_QP); @@ -311,12 +301,11 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev, return 0; err_cmd: - memset(&din, 0, sizeof(din)); - memset(&dout, 0, sizeof(dout)); - din.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); - din.qpn = cpu_to_be32(qp->qpn); - mlx5_cmd_exec(dev, &din, sizeof(din), &out, sizeof(dout)); - + memset(din, 0, sizeof(din)); + memset(dout, 0, sizeof(dout)); + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout)); return err; } EXPORT_SYMBOL_GPL(mlx5_core_create_qp); @@ -324,45 +313,145 @@ EXPORT_SYMBOL_GPL(mlx5_core_create_qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp) { - struct mlx5_destroy_qp_mbox_in in; - struct mlx5_destroy_qp_mbox_out out; + u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {0}; int err; mlx5_debug_qp_remove(dev, qp); destroy_qprqsq_common(dev, qp); - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DESTROY_QP); - in.qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); + MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP); + MLX5_SET(destroy_qp_in, in, qpn, qp->qpn); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (err) return err; - if (out.hdr.status) - return mlx5_cmd_status_to_err(&out.hdr); - atomic_dec(&dev->num_qps); return 0; } EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, - struct mlx5_modify_qp_mbox_in *in, int sqd_event, +struct mbox_info { + u32 *in; + u32 *out; + int inlen; + int outlen; +}; + +static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen) +{ + mbox->inlen = inlen; + mbox->outlen = outlen; + mbox->in = kzalloc(mbox->inlen, GFP_KERNEL); + mbox->out = kzalloc(mbox->outlen, GFP_KERNEL); + if (!mbox->in || !mbox->out) { + kfree(mbox->in); + kfree(mbox->out); + return -ENOMEM; + } + + return 0; +} + +static void mbox_free(struct mbox_info *mbox) +{ + kfree(mbox->in); + kfree(mbox->out); +} + +static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn, + u32 opt_param_mask, void *qpc, + struct mbox_info *mbox) +{ + mbox->out = NULL; + mbox->in = NULL; + +#define MBOX_ALLOC(mbox, typ) \ + mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out)) + +#define MOD_QP_IN_SET(typ, in, _opcode, _qpn) \ + MLX5_SET(typ##_in, in, opcode, _opcode); \ + MLX5_SET(typ##_in, in, qpn, _qpn) + +#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc) \ + MOD_QP_IN_SET(typ, in, _opcode, _qpn); \ + MLX5_SET(typ##_in, in, opt_param_mask, _opt_p); \ + memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc, MLX5_ST_SZ_BYTES(qpc)) + + switch (opcode) { + /* 2RST & 2ERR */ + case MLX5_CMD_OP_2RST_QP: + if (MBOX_ALLOC(mbox, qp_2rst)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn); + break; + case MLX5_CMD_OP_2ERR_QP: + if (MBOX_ALLOC(mbox, qp_2err)) + return -ENOMEM; + MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn); + break; + + /* MODIFY with QPC */ + case MLX5_CMD_OP_RST2INIT_QP: + if (MBOX_ALLOC(mbox, rst2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2RTR_QP: + if (MBOX_ALLOC(mbox, init2rtr_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTR2RTS_QP: + if (MBOX_ALLOC(mbox, rtr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_RTS2RTS_QP: + if (MBOX_ALLOC(mbox, rts2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_SQERR2RTS_QP: + if (MBOX_ALLOC(mbox, sqerr2rts_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + case MLX5_CMD_OP_INIT2INIT_QP: + if (MBOX_ALLOC(mbox, init2init_qp)) + return -ENOMEM; + MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn, + opt_param_mask, qpc); + break; + default: + mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n", + opcode, qpn); + return -EINVAL; + } + return 0; +} + +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp) { - struct mlx5_modify_qp_mbox_out out; - int err = 0; + struct mbox_info mbox; + int err; - memset(&out, 0, sizeof(out)); - in->hdr.opcode = cpu_to_be16(operation); - in->qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out)); + err = modify_qp_mbox_alloc(dev, opcode, qp->qpn, + opt_param_mask, qpc, &mbox); if (err) return err; - return mlx5_cmd_status_to_err(&out.hdr); + err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen); + mbox_free(&mbox); + return err; } EXPORT_SYMBOL_GPL(mlx5_core_qp_modify); @@ -382,66 +471,38 @@ void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev) } int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_query_qp_mbox_out *out, int outlen) + u32 *out, int outlen) { - struct mlx5_query_qp_mbox_in in; - int err; + u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(out, 0, outlen); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_QP); - in.qpn = cpu_to_be32(qp->qpn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), out, outlen); - if (err) - return err; - - if (out->hdr.status) - return mlx5_cmd_status_to_err(&out->hdr); - - return err; + MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP); + MLX5_SET(query_qp_in, in, qpn, qp->qpn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL_GPL(mlx5_core_qp_query); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn) { - struct mlx5_alloc_xrcd_mbox_in in; - struct mlx5_alloc_xrcd_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_XRCD); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - else - *xrcdn = be32_to_cpu(out.xrcdn) & 0xffffff; - + MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd); return err; } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn) { - struct mlx5_dealloc_xrcd_mbox_in in; - struct mlx5_dealloc_xrcd_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_XRCD); - in.xrcdn = cpu_to_be32(xrcdn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - - return err; + MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD); + MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); @@ -449,28 +510,23 @@ EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc); int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn, u8 flags, int error) { - struct mlx5_page_fault_resume_mbox_in in; - struct mlx5_page_fault_resume_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(page_fault_resume_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(page_fault_resume_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME); - in.hdr.opmod = 0; - flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR | - MLX5_PAGE_FAULT_RESUME_WRITE | - MLX5_PAGE_FAULT_RESUME_RDMA); - flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0); - in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) | - (flags << MLX5_QPN_BITS)); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - return err; + MLX5_SET(page_fault_resume_in, in, opcode, + MLX5_CMD_OP_PAGE_FAULT_RESUME); + MLX5_SET(page_fault_resume_in, in, qpn, qpn); - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); + if (flags & MLX5_PAGE_FAULT_RESUME_REQUESTOR) + MLX5_SET(page_fault_resume_in, in, req_res, 1); + if (flags & MLX5_PAGE_FAULT_RESUME_WRITE) + MLX5_SET(page_fault_resume_in, in, read_write, 1); + if (flags & MLX5_PAGE_FAULT_RESUME_RDMA) + MLX5_SET(page_fault_resume_in, in, rdma, 1); + if (error) + MLX5_SET(page_fault_resume_in, in, error, 1); - return err; + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume); #endif @@ -541,15 +597,12 @@ EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked); int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id) { - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *counter_id = MLX5_GET(alloc_q_counter_out, out, counter_set_id); @@ -559,31 +612,25 @@ EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter); int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id) { - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0}; MLX5_SET(dealloc_q_counter_in, in, opcode, MLX5_CMD_OP_DEALLOC_Q_COUNTER); MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter); int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id, int reset, void *out, int out_size) { - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0}; MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); MLX5_SET(query_q_counter_in, in, clear, reset); MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_size); + return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size); } EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c index c07c28bd3d55..104902a93a0b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c @@ -63,19 +63,14 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table, static int mlx5_set_rate_limit_cmd(struct mlx5_core_dev *dev, u32 rate, u16 index) { - u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)]; - u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(set_rate_limit_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(set_rate_limit_out)] = {0}; MLX5_SET(set_rate_limit_in, in, opcode, MLX5_CMD_OP_SET_RATE_LIMIT); MLX5_SET(set_rate_limit_in, in, rate_limit_index, index); MLX5_SET(set_rate_limit_in, in, rate_limit, rate); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c index b380a6bc1f85..e08627785590 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sriov.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sriov.c @@ -37,198 +37,200 @@ #include "eswitch.h" #endif -static void enable_vfs(struct mlx5_core_dev *dev, int num_vfs) +bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + + return !!sriov->num_vfs; +} + +static int mlx5_device_enable_sriov(struct mlx5_core_dev *dev, int num_vfs) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; int err; int vf; - for (vf = 1; vf <= num_vfs; vf++) { - err = mlx5_core_enable_hca(dev, vf); - if (err) { - mlx5_core_warn(dev, "failed to enable VF %d\n", vf - 1); - } else { - sriov->vfs_ctx[vf - 1].enabled = 1; - mlx5_core_dbg(dev, "successfully enabled VF %d\n", vf - 1); - } + if (sriov->enabled_vfs) { + mlx5_core_warn(dev, + "failed to enable SRIOV on device, already enabled with %d vfs\n", + sriov->enabled_vfs); + return -EBUSY; } -} -static void disable_vfs(struct mlx5_core_dev *dev, int num_vfs) -{ - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int vf; - - for (vf = 1; vf <= num_vfs; vf++) { - if (sriov->vfs_ctx[vf - 1].enabled) { - if (mlx5_core_disable_hca(dev, vf)) - mlx5_core_warn(dev, "failed to disable VF %d\n", vf - 1); - else - sriov->vfs_ctx[vf - 1].enabled = 0; - } - } -} - -static int mlx5_core_create_vfs(struct pci_dev *pdev, int num_vfs) -{ - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - int err; - - if (pci_num_vf(pdev)) - pci_disable_sriov(pdev); - - enable_vfs(dev, num_vfs); - - err = pci_enable_sriov(pdev, num_vfs); +#ifdef CONFIG_MLX5_CORE_EN + err = mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); if (err) { - dev_warn(&pdev->dev, "enable sriov failed %d\n", err); - goto ex; + mlx5_core_warn(dev, + "failed to enable eswitch SRIOV (%d)\n", err); + return err; + } +#endif + + for (vf = 0; vf < num_vfs; vf++) { + err = mlx5_core_enable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to enable VF %d (%d)\n", vf, err); + continue; + } + sriov->vfs_ctx[vf].enabled = 1; + sriov->enabled_vfs++; + mlx5_core_dbg(dev, "successfully enabled VF* %d\n", vf); + } return 0; +} + +static void mlx5_device_disable_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_core_sriov *sriov = &dev->priv.sriov; + int err; + int vf; + + if (!sriov->enabled_vfs) + return; + + for (vf = 0; vf < sriov->num_vfs; vf++) { + if (!sriov->vfs_ctx[vf].enabled) + continue; + err = mlx5_core_disable_hca(dev, vf + 1); + if (err) { + mlx5_core_warn(dev, "failed to disable VF %d\n", vf); + continue; + } + sriov->vfs_ctx[vf].enabled = 0; + sriov->enabled_vfs--; + } + +#ifdef CONFIG_MLX5_CORE_EN + mlx5_eswitch_disable_sriov(dev->priv.eswitch); +#endif + + if (mlx5_wait_for_vf_pages(dev)) + mlx5_core_warn(dev, "timeout reclaiming VFs pages\n"); +} + +static int mlx5_pci_enable_sriov(struct pci_dev *pdev, int num_vfs) +{ + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); + int err = 0; + + if (pci_num_vf(pdev)) { + mlx5_core_warn(dev, "Unable to enable pci sriov, already enabled\n"); + return -EBUSY; + } + + err = pci_enable_sriov(pdev, num_vfs); + if (err) + mlx5_core_warn(dev, "pci_enable_sriov failed : %d\n", err); -ex: - disable_vfs(dev, num_vfs); return err; } -static int mlx5_core_sriov_enable(struct pci_dev *pdev, int num_vfs) +static void mlx5_pci_disable_sriov(struct pci_dev *pdev) +{ + pci_disable_sriov(pdev); +} + +static int mlx5_sriov_enable(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = kcalloc(num_vfs, sizeof(*sriov->vfs_ctx), GFP_ATOMIC); - if (!sriov->vfs_ctx) - return -ENOMEM; - - sriov->enabled_vfs = num_vfs; - err = mlx5_core_create_vfs(pdev, num_vfs); + err = mlx5_device_enable_sriov(dev, num_vfs); if (err) { - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; + mlx5_core_warn(dev, "mlx5_device_enable_sriov failed : %d\n", err); return err; } + err = mlx5_pci_enable_sriov(pdev, num_vfs); + if (err) { + mlx5_core_warn(dev, "mlx5_pci_enable_sriov failed : %d\n", err); + mlx5_device_disable_sriov(dev); + return err; + } + + sriov->num_vfs = num_vfs; + return 0; } -static void mlx5_core_init_vfs(struct mlx5_core_dev *dev, int num_vfs) +static void mlx5_sriov_disable(struct pci_dev *pdev) { + struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - sriov->num_vfs = num_vfs; -} - -static void mlx5_core_cleanup_vfs(struct mlx5_core_dev *dev) -{ - struct mlx5_core_sriov *sriov; - - sriov = &dev->priv.sriov; - disable_vfs(dev, sriov->num_vfs); - - if (mlx5_wait_for_vf_pages(dev)) - mlx5_core_warn(dev, "timeout claiming VFs pages\n"); - + mlx5_pci_disable_sriov(pdev); + mlx5_device_disable_sriov(dev); sriov->num_vfs = 0; } int mlx5_core_sriov_configure(struct pci_dev *pdev, int num_vfs) { struct mlx5_core_dev *dev = pci_get_drvdata(pdev); - struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int err; + int err = 0; mlx5_core_dbg(dev, "requested num_vfs %d\n", num_vfs); if (!mlx5_core_is_pf(dev)) return -EPERM; - mlx5_core_cleanup_vfs(dev); - - if (!num_vfs) { -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_disable_sriov(dev->priv.eswitch); -#endif - kfree(sriov->vfs_ctx); - sriov->vfs_ctx = NULL; - if (!pci_vfs_assigned(pdev)) - pci_disable_sriov(pdev); - else - pr_info("unloading PF driver while leaving orphan VFs\n"); - return 0; + if (num_vfs && mlx5_lag_is_active(dev)) { + mlx5_core_warn(dev, "can't turn sriov on while LAG is active"); + return -EINVAL; } - err = mlx5_core_sriov_enable(pdev, num_vfs); - if (err) { - dev_warn(&pdev->dev, "mlx5_core_sriov_enable failed %d\n", err); - return err; - } + if (num_vfs) + err = mlx5_sriov_enable(pdev, num_vfs); + else + mlx5_sriov_disable(pdev); - mlx5_core_init_vfs(dev, num_vfs); -#ifdef CONFIG_MLX5_CORE_EN - mlx5_eswitch_enable_sriov(dev->priv.eswitch, num_vfs, SRIOV_LEGACY); -#endif - - return num_vfs; + return err ? err : num_vfs; } -static int sync_required(struct pci_dev *pdev) +int mlx5_sriov_attach(struct mlx5_core_dev *dev) { - struct mlx5_core_dev *dev = pci_get_drvdata(pdev); struct mlx5_core_sriov *sriov = &dev->priv.sriov; - int cur_vfs = pci_num_vf(pdev); - if (cur_vfs != sriov->num_vfs) { - pr_info("current VFs %d, registered %d - sync needed\n", cur_vfs, sriov->num_vfs); - return 1; - } + if (!mlx5_core_is_pf(dev) || !sriov->num_vfs) + return 0; - return 0; + /* If sriov VFs exist in PCI level, enable them in device level */ + return mlx5_device_enable_sriov(dev, sriov->num_vfs); +} + +void mlx5_sriov_detach(struct mlx5_core_dev *dev) +{ + if (!mlx5_core_is_pf(dev)) + return; + + mlx5_device_disable_sriov(dev); } int mlx5_sriov_init(struct mlx5_core_dev *dev) { struct mlx5_core_sriov *sriov = &dev->priv.sriov; struct pci_dev *pdev = dev->pdev; - int cur_vfs; + int total_vfs; if (!mlx5_core_is_pf(dev)) return 0; - if (!sync_required(dev->pdev)) - return 0; - - cur_vfs = pci_num_vf(pdev); - sriov->vfs_ctx = kcalloc(cur_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); + total_vfs = pci_sriov_get_totalvfs(pdev); + sriov->num_vfs = pci_num_vf(pdev); + sriov->vfs_ctx = kcalloc(total_vfs, sizeof(*sriov->vfs_ctx), GFP_KERNEL); if (!sriov->vfs_ctx) return -ENOMEM; - sriov->enabled_vfs = cur_vfs; - - mlx5_core_init_vfs(dev, cur_vfs); -#ifdef CONFIG_MLX5_CORE_EN - if (cur_vfs) - mlx5_eswitch_enable_sriov(dev->priv.eswitch, cur_vfs, - SRIOV_LEGACY); -#endif - - enable_vfs(dev, cur_vfs); - return 0; } -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev) +void mlx5_sriov_cleanup(struct mlx5_core_dev *dev) { - struct pci_dev *pdev = dev->pdev; - int err; + struct mlx5_core_sriov *sriov = &dev->priv.sriov; if (!mlx5_core_is_pf(dev)) - return 0; + return; - err = mlx5_core_sriov_configure(pdev, 0); - if (err) - return err; - - return 0; + kfree(sriov->vfs_ctx); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/srq.c b/drivers/net/ethernet/mellanox/mlx5/core/srq.c index c07f4d01b70e..3099630015d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/srq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/srq.c @@ -175,8 +175,8 @@ static int create_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(create_srq_in, create_in, opcode, MLX5_CMD_OP_CREATE_SRQ); - err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, - sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); kvfree(create_in); if (!err) srq->srqn = MLX5_GET(create_srq_out, create_out, srqn); @@ -194,8 +194,8 @@ static int destroy_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_DESTROY_SRQ); MLX5_SET(destroy_srq_in, srq_in, srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -209,8 +209,8 @@ static int arm_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(arm_xrc_srq_in, srq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, srq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, sizeof(srq_out)); + return mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, sizeof(srq_out)); } static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, @@ -228,9 +228,8 @@ static int query_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, MLX5_SET(query_srq_in, srq_in, opcode, MLX5_CMD_OP_QUERY_SRQ); MLX5_SET(query_srq_in, srq_in, srqn, srq->srqn); - err = mlx5_cmd_exec_check_status(dev, srq_in, sizeof(srq_in), - srq_out, - MLX5_ST_SZ_BYTES(query_srq_out)); + err = mlx5_cmd_exec(dev, srq_in, sizeof(srq_in), + srq_out, MLX5_ST_SZ_BYTES(query_srq_out)); if (err) goto out; @@ -272,8 +271,8 @@ static int create_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_CMD_OP_CREATE_XRC_SRQ); memset(create_out, 0, sizeof(create_out)); - err = mlx5_cmd_exec_check_status(dev, create_in, inlen, create_out, - sizeof(create_out)); + err = mlx5_cmd_exec(dev, create_in, inlen, create_out, + sizeof(create_out)); if (err) goto out; @@ -286,36 +285,30 @@ out: static int destroy_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; - u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; - - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + u32 xrcsrq_in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); } static int arm_xrc_srq_cmd(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm) { - u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; - u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; - - memset(xrcsrq_in, 0, sizeof(xrcsrq_in)); - memset(xrcsrq_out, 0, sizeof(xrcsrq_out)); + u32 xrcsrq_in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 xrcsrq_out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; MLX5_SET(arm_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); MLX5_SET(arm_xrc_srq_in, xrcsrq_in, lwm, lwm); - return mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, sizeof(xrcsrq_out)); + return mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), + xrcsrq_out, sizeof(xrcsrq_out)); } static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, @@ -335,9 +328,9 @@ static int query_xrc_srq_cmd(struct mlx5_core_dev *dev, MLX5_SET(query_xrc_srq_in, xrcsrq_in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); MLX5_SET(query_xrc_srq_in, xrcsrq_in, xrc_srqn, srq->srqn); - err = mlx5_cmd_exec_check_status(dev, xrcsrq_in, sizeof(xrcsrq_in), - xrcsrq_out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + + err = mlx5_cmd_exec(dev, xrcsrq_in, sizeof(xrcsrq_in), xrcsrq_out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); if (err) goto out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c index 28274a6fbafe..a00ff49eec18 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c @@ -36,17 +36,14 @@ int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn) { - u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]; - u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)]; + u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0}; int err; - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); - MLX5_SET(alloc_transport_domain_in, in, opcode, MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); if (!err) *tdn = MLX5_GET(alloc_transport_domain_out, out, transport_domain); @@ -57,29 +54,23 @@ EXPORT_SYMBOL(mlx5_core_alloc_transport_domain); void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn) { - u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]; - u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0}; MLX5_SET(dealloc_transport_domain_in, in, opcode, MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN); MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain); int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn) { - u32 out[MLX5_ST_SZ_DW(create_rq_out)]; + u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0}; int err; MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rqn = MLX5_GET(create_rq_out, out, rqn); @@ -95,21 +86,18 @@ int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen) MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ); memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_rq); void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn) { - u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rq_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0}; MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ); MLX5_SET(destroy_rq_in, in, rqn, rqn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_rq); @@ -121,19 +109,17 @@ int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out) MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ); MLX5_SET(query_rq_in, in, rqn, rqn); - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_rq); int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) { - u32 out[MLX5_ST_SZ_DW(create_sq_out)]; + u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0}; int err; MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *sqn = MLX5_GET(create_sq_out, out, sqn); @@ -142,27 +128,22 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn) int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_sq_out)]; + u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0}; MLX5_SET(modify_sq_in, in, sqn, sqn); MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_sq); void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn) { - u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_sq_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0}; MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ); MLX5_SET(destroy_sq_in, in, sqn, sqn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) @@ -172,21 +153,20 @@ int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out) MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ); MLX5_SET(query_sq_in, in, sqn, sqn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } EXPORT_SYMBOL(mlx5_core_query_sq); int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tirn) { - u32 out[MLX5_ST_SZ_DW(create_tir_out)]; + u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {0}; int err; MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR); memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *tirn = MLX5_GET(create_tir_out, out, tirn); @@ -197,39 +177,32 @@ EXPORT_SYMBOL(mlx5_core_create_tir); int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_tir_out)]; + u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0}; MLX5_SET(modify_tir_in, in, tirn, tirn); MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn) { - u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_tir_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0}; MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR); MLX5_SET(destroy_tir_in, in, tirn, tirn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_tir); int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *tisn) { - u32 out[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0}; int err; MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *tisn = MLX5_GET(create_tis_out, out, tisn); @@ -245,34 +218,29 @@ int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in, MLX5_SET(modify_tis_in, in, tisn, tisn); MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_modify_tis); void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn) { - u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_tis_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0}; MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); MLX5_SET(destroy_tis_in, in, tisn, tisn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_tis); int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rmpn) { - u32 out[MLX5_ST_SZ_DW(create_rmp_out)]; + u32 out[MLX5_ST_SZ_DW(create_rmp_out)] = {0}; int err; MLX5_SET(create_rmp_in, in, opcode, MLX5_CMD_OP_CREATE_RMP); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rmpn = MLX5_GET(create_rmp_out, out, rmpn); @@ -281,38 +249,31 @@ int mlx5_core_create_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_modify_rmp(struct mlx5_core_dev *dev, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_rmp_out)]; + u32 out[MLX5_ST_SZ_DW(modify_rmp_out)] = {0}; MLX5_SET(modify_rmp_in, in, opcode, MLX5_CMD_OP_MODIFY_RMP); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } int mlx5_core_destroy_rmp(struct mlx5_core_dev *dev, u32 rmpn) { - u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rmp_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rmp_out)] = {0}; MLX5_SET(destroy_rmp_in, in, opcode, MLX5_CMD_OP_DESTROY_RMP); MLX5_SET(destroy_rmp_in, in, rmpn, rmpn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_rmp(struct mlx5_core_dev *dev, u32 rmpn, u32 *out) { - u32 in[MLX5_ST_SZ_DW(query_rmp_in)]; + u32 in[MLX5_ST_SZ_DW(query_rmp_in)] = {0}; int outlen = MLX5_ST_SZ_BYTES(query_rmp_out); - memset(in, 0, sizeof(in)); MLX5_SET(query_rmp_in, in, opcode, MLX5_CMD_OP_QUERY_RMP); MLX5_SET(query_rmp_in, in, rmpn, rmpn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen); } int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) @@ -347,13 +308,11 @@ int mlx5_core_arm_rmp(struct mlx5_core_dev *dev, u32 rmpn, u16 lwm) int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *xsrqn) { - u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)]; + u32 out[MLX5_ST_SZ_DW(create_xrc_srq_out)] = {0}; int err; MLX5_SET(create_xrc_srq_in, in, opcode, MLX5_CMD_OP_CREATE_XRC_SRQ); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *xsrqn = MLX5_GET(create_xrc_srq_out, out, xrc_srqn); @@ -362,33 +321,25 @@ int mlx5_core_create_xsrq(struct mlx5_core_dev *dev, u32 *in, int inlen, int mlx5_core_destroy_xsrq(struct mlx5_core_dev *dev, u32 xsrqn) { - u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(destroy_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_xrc_srq_out)] = {0}; MLX5_SET(destroy_xrc_srq_in, in, opcode, MLX5_CMD_OP_DESTROY_XRC_SRQ); MLX5_SET(destroy_xrc_srq_in, in, xrc_srqn, xsrqn); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) { - u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)]; + u32 in[MLX5_ST_SZ_DW(query_xrc_srq_in)] = {0}; void *srqc; void *xrc_srqc; int err; - memset(in, 0, sizeof(in)); MLX5_SET(query_xrc_srq_in, in, opcode, MLX5_CMD_OP_QUERY_XRC_SRQ); MLX5_SET(query_xrc_srq_in, in, xrc_srqn, xsrqn); - - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), - out, - MLX5_ST_SZ_BYTES(query_xrc_srq_out)); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, + MLX5_ST_SZ_BYTES(query_xrc_srq_out)); if (!err) { xrc_srqc = MLX5_ADDR_OF(query_xrc_srq_out, out, xrc_srq_context_entry); @@ -401,32 +352,25 @@ int mlx5_core_query_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u32 *out) int mlx5_core_arm_xsrq(struct mlx5_core_dev *dev, u32 xsrqn, u16 lwm) { - u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)]; - u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(arm_xrc_srq_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(arm_xrc_srq_out)] = {0}; MLX5_SET(arm_xrc_srq_in, in, opcode, MLX5_CMD_OP_ARM_XRC_SRQ); MLX5_SET(arm_xrc_srq_in, in, xrc_srqn, xsrqn); MLX5_SET(arm_xrc_srq_in, in, lwm, lwm); MLX5_SET(arm_xrc_srq_in, in, op_mod, MLX5_ARM_XRC_SRQ_IN_OP_MOD_XRC_SRQ); - - return mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqtn) { - u32 out[MLX5_ST_SZ_DW(create_rqt_out)]; + u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0}; int err; MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT); - - memset(out, 0, sizeof(out)); - err = mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); if (!err) *rqtn = MLX5_GET(create_rqt_out, out, rqtn); @@ -437,25 +381,20 @@ EXPORT_SYMBOL(mlx5_core_create_rqt); int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_rqt_out)]; + u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0}; MLX5_SET(modify_rqt_in, in, rqtn, rqtn); MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(dev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out)); } void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn) { - u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]; - u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0}; MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT); MLX5_SET(destroy_rqt_in, in, rqtn, rqtn); - - mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, sizeof(out)); + mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_core_destroy_rqt); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index 5ff8af472bf5..ab0b896621a0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -42,73 +42,28 @@ enum { NUM_LOW_LAT_UUARS = 4, }; - -struct mlx5_alloc_uar_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_uar_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 uarn; - u8 rsvd[4]; -}; - -struct mlx5_free_uar_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 uarn; - u8 rsvd[4]; -}; - -struct mlx5_free_uar_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { - struct mlx5_alloc_uar_mbox_in in; - struct mlx5_alloc_uar_mbox_out out; + u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {0}; int err; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_ALLOC_UAR); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - goto ex; - - if (out.hdr.status) { - err = mlx5_cmd_status_to_err(&out.hdr); - goto ex; - } - - *uarn = be32_to_cpu(out.uarn) & 0xffffff; - -ex: + MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (!err) + *uarn = MLX5_GET(alloc_uar_out, out, uar); return err; } EXPORT_SYMBOL(mlx5_cmd_alloc_uar); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) { - struct mlx5_free_uar_mbox_in in; - struct mlx5_free_uar_mbox_out out; - int err; + u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0}; + u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {0}; - memset(&in, 0, sizeof(in)); - memset(&out, 0, sizeof(out)); - in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_DEALLOC_UAR); - in.uarn = cpu_to_be32(uarn); - err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out)); - if (err) - goto ex; - - if (out.hdr.status) - err = mlx5_cmd_status_to_err(&out.hdr); - -ex: - return err; + MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR); + MLX5_SET(dealloc_uar_in, in, uar, uarn); + return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL(mlx5_cmd_free_uar); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c index 21365d06982b..525f17af108e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c @@ -39,10 +39,7 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u32 *out, int outlen) { - int err; - u32 in[MLX5_ST_SZ_DW(query_vport_state_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0}; MLX5_SET(query_vport_state_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_STATE); @@ -51,11 +48,7 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, if (vport) MLX5_SET(query_vport_state_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); - if (err) - mlx5_core_warn(mdev, "MLX5_CMD_OP_QUERY_VPORT_STATE failed\n"); - - return err; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport) @@ -81,58 +74,43 @@ EXPORT_SYMBOL_GPL(mlx5_query_vport_admin_state); int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport, u8 state) { - u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]; - u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)]; - int err; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0}; MLX5_SET(modify_vport_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VPORT_STATE); MLX5_SET(modify_vport_state_in, in, op_mod, opmod); MLX5_SET(modify_vport_state_in, in, vport_number, vport); - if (vport) MLX5_SET(modify_vport_state_in, in, other_vport, 1); - MLX5_SET(modify_vport_state_in, in, admin_state, state); - err = mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); - if (err) - mlx5_core_warn(mdev, "MLX5_CMD_OP_MODIFY_VPORT_STATE failed\n"); - - return err; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } EXPORT_SYMBOL_GPL(mlx5_modify_vport_admin_state); static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport, u32 *out, int outlen) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; - - memset(in, 0, sizeof(in)); + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; MLX5_SET(query_nic_vport_context_in, in, opcode, MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT); - MLX5_SET(query_nic_vport_context_in, in, vport_number, vport); if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, outlen); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen); } static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in, int inlen) { - u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)]; + u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0}; MLX5_SET(modify_nic_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT); - - memset(out, 0, sizeof(out)); - return mlx5_cmd_exec_check_status(mdev, in, inlen, out, sizeof(out)); + return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out)); } void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, @@ -147,6 +125,26 @@ void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, } EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_min_inline); +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline) +{ + u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0}; + int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in); + void *nic_vport_ctx; + + MLX5_SET(modify_nic_vport_context_in, in, + field_select.min_inline, 1); + MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport); + MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1); + + nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in, + in, nic_vport_context); + MLX5_SET(nic_vport_context, nic_vport_ctx, + min_wqe_inline_mode, min_inline); + + return mlx5_modify_nic_vport_context(mdev, in, inlen); +} + int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr) { @@ -254,7 +252,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, u8 addr_list[][ETH_ALEN], int *list_size) { - u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)]; + u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0}; void *nic_vport_ctx; int max_list_size; int req_list_size; @@ -278,7 +276,6 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) + req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout); - memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -291,7 +288,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev, if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) goto out; @@ -361,7 +358,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev, ether_addr_copy(curr_mac, addr_list[i]); } - err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); kfree(in); return err; } @@ -406,7 +403,7 @@ int mlx5_query_nic_vport_vlans(struct mlx5_core_dev *dev, if (vport) MLX5_SET(query_nic_vport_context_in, in, other_vport, 1); - err = mlx5_cmd_exec_check_status(dev, in, sizeof(in), out, out_sz); + err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); if (err) goto out; @@ -473,7 +470,7 @@ int mlx5_modify_nic_vport_vlans(struct mlx5_core_dev *dev, MLX5_SET(vlan_layout, vlan_addr, vlan, vlans[i]); } - err = mlx5_cmd_exec_check_status(dev, in, in_sz, out, sizeof(out)); + err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); kfree(in); return err; } @@ -631,10 +628,6 @@ int mlx5_query_hca_vport_gid(struct mlx5_core_dev *dev, u8 other_vport, if (err) goto out; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto out; - tmp = out + MLX5_ST_SZ_BYTES(query_hca_vport_gid_out); gid->global.subnet_prefix = tmp->global.subnet_prefix; gid->global.interface_id = tmp->global.interface_id; @@ -700,10 +693,6 @@ int mlx5_query_hca_vport_pkey(struct mlx5_core_dev *dev, u8 other_vport, if (err) goto out; - err = mlx5_cmd_status_to_err_v2(out); - if (err) - goto out; - pkarr = MLX5_ADDR_OF(query_hca_vport_pkey_out, out, pkey); for (i = 0; i < nout; i++, pkey++, pkarr += MLX5_ST_SZ_BYTES(pkey)) *pkey = MLX5_GET_PR(pkey, pkarr, pkey); @@ -721,7 +710,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, struct mlx5_hca_vport_context *rep) { int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out); - int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)]; + int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0}; int is_group_manager; void *out; void *ctx; @@ -729,7 +718,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager); - memset(in, 0, sizeof(in)); out = kzalloc(out_sz, GFP_KERNEL); if (!out) return -ENOMEM; @@ -750,9 +738,6 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev, MLX5_SET(query_hca_vport_context_in, in, port_num, port_num); err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz); - if (err) - goto ex; - err = mlx5_cmd_status_to_err_v2(out); if (err) goto ex; @@ -969,10 +954,6 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport, MLX5_SET(query_vport_counter_in, in, port_num, port_num); err = mlx5_cmd_exec(dev, in, in_sz, out, out_sz); - if (err) - goto free; - err = mlx5_cmd_status_to_err_v2(out); - free: kvfree(in); return err; @@ -1035,11 +1016,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev, MLX5_SET(hca_vport_context, ctx, qkey_violation_counter, req->qkey_violation_counter); MLX5_SET(hca_vport_context, ctx, pkey_violation_counter, req->pkey_violation_counter); err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out)); - if (err) - goto ex; - - err = mlx5_cmd_status_to_err_v2(out); - ex: kfree(in); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index e25a73ed2981..07a9ba6cfc70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -46,41 +46,24 @@ void mlx5e_vxlan_init(struct mlx5e_priv *priv) static int mlx5e_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port) { - struct mlx5_outbox_hdr *hdr; - int err; - - u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]; - u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0}; MLX5_SET(add_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT); MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (err) - return err; - - hdr = (struct mlx5_outbox_hdr *)out; - return hdr->status ? -ENOMEM : 0; + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } static int mlx5e_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port) { - u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]; - u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)]; - - memset(in, 0, sizeof(in)); - memset(out, 0, sizeof(out)); + u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {0}; + u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0}; MLX5_SET(delete_vxlan_udp_dport_in, in, opcode, MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT); MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port); - - return mlx5_cmd_exec_check_status(mdev, in, sizeof(in), out, - sizeof(out)); + return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); } struct mlx5e_vxlan *mlx5e_vxlan_lookup_port(struct mlx5e_priv *priv, u16 port) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.c b/drivers/net/ethernet/mellanox/mlxsw/core.c index 068ee65a960b..aa33d58b9f81 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core.c @@ -1100,10 +1100,15 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, goto err_alloc_stats; } - if (mlxsw_driver->profile->used_max_lag && - mlxsw_driver->profile->used_max_port_per_lag) { - alloc_size = sizeof(u8) * mlxsw_driver->profile->max_lag * - mlxsw_driver->profile->max_port_per_lag; + err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, + &mlxsw_core->resources); + if (err) + goto err_bus_init; + + if (mlxsw_core->resources.max_lag_valid && + mlxsw_core->resources.max_ports_in_lag_valid) { + alloc_size = sizeof(u8) * mlxsw_core->resources.max_lag * + mlxsw_core->resources.max_ports_in_lag; mlxsw_core->lag.mapping = kzalloc(alloc_size, GFP_KERNEL); if (!mlxsw_core->lag.mapping) { err = -ENOMEM; @@ -1111,11 +1116,6 @@ int mlxsw_core_bus_device_register(const struct mlxsw_bus_info *mlxsw_bus_info, } } - err = mlxsw_bus->init(bus_priv, mlxsw_core, mlxsw_driver->profile, - &mlxsw_core->resources); - if (err) - goto err_bus_init; - err = mlxsw_emad_init(mlxsw_core); if (err) goto err_emad_init; @@ -1146,10 +1146,10 @@ err_hwmon_init: err_devlink_register: mlxsw_emad_fini(mlxsw_core); err_emad_init: - mlxsw_bus->fini(bus_priv); -err_bus_init: kfree(mlxsw_core->lag.mapping); err_alloc_lag_mapping: + mlxsw_bus->fini(bus_priv); +err_bus_init: free_percpu(mlxsw_core->pcpu_stats); err_alloc_stats: devlink_free(devlink); @@ -1615,7 +1615,7 @@ EXPORT_SYMBOL(mlxsw_core_skb_receive); static int mlxsw_core_lag_mapping_index(struct mlxsw_core *mlxsw_core, u16 lag_id, u8 port_index) { - return mlxsw_core->driver->profile->max_port_per_lag * lag_id + + return mlxsw_core->resources.max_ports_in_lag * lag_id + port_index; } @@ -1644,7 +1644,7 @@ void mlxsw_core_lag_mapping_clear(struct mlxsw_core *mlxsw_core, { int i; - for (i = 0; i < mlxsw_core->driver->profile->max_port_per_lag; i++) { + for (i = 0; i < mlxsw_core->resources.max_ports_in_lag; i++) { int index = mlxsw_core_lag_mapping_index(mlxsw_core, lag_id, i); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core.h b/drivers/net/ethernet/mellanox/mlxsw/core.h index d3476ead9982..c4f550b6f783 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core.h @@ -87,6 +87,7 @@ struct mlxsw_rx_listener { void (*func)(struct sk_buff *skb, u8 local_port, void *priv); u8 local_port; u16 trap_id; + enum mlxsw_reg_hpkt_action action; }; struct mlxsw_event_listener { @@ -178,8 +179,6 @@ struct mlxsw_swid_config { struct mlxsw_config_profile { u16 used_max_vepa_channels:1, - used_max_lag:1, - used_max_port_per_lag:1, used_max_mid:1, used_max_pgt:1, used_max_system_port:1, @@ -191,10 +190,9 @@ struct mlxsw_config_profile { used_max_pkey:1, used_ar_sec:1, used_adaptive_routing_group_cap:1, - used_kvd_sizes:1; + used_kvd_split_data:1; /* indicate for the kvd's values */ + u8 max_vepa_channels; - u16 max_lag; - u16 max_port_per_lag; u16 max_mid; u16 max_pgt; u16 max_system_port; @@ -213,8 +211,9 @@ struct mlxsw_config_profile { u16 adaptive_routing_group_cap; u8 arn; u32 kvd_linear_size; - u32 kvd_hash_single_size; - u32 kvd_hash_double_size; + u16 kvd_hash_granularity; + u8 kvd_hash_single_parts; + u8 kvd_hash_double_parts; u8 resource_query_enable; struct mlxsw_swid_config swid_config[MLXSW_CONFIG_PROFILE_SWID_COUNT]; }; @@ -268,8 +267,35 @@ struct mlxsw_driver { }; struct mlxsw_resources { - u8 max_span_valid:1; + u32 max_span_valid:1, + max_lag_valid:1, + max_ports_in_lag_valid:1, + kvd_size_valid:1, + kvd_single_min_size_valid:1, + kvd_double_min_size_valid:1, + max_virtual_routers_valid:1, + max_system_ports_valid:1, + max_vlan_groups_valid:1, + max_regions_valid:1, + max_rif_valid:1; u8 max_span; + u8 max_lag; + u8 max_ports_in_lag; + u32 kvd_size; + u32 kvd_single_min_size; + u32 kvd_double_min_size; + u16 max_virtual_routers; + u16 max_system_ports; + u16 max_vlan_groups; + u16 max_regions; + u16 max_rif; + + /* Internal resources. + * Determined by the SW, not queried from the HW. + */ + u32 kvd_single_size; + u32 kvd_double_size; + u32 kvd_linear_size; }; struct mlxsw_resources *mlxsw_core_resources_get(struct mlxsw_core *mlxsw_core); diff --git a/drivers/net/ethernet/mellanox/mlxsw/pci.c b/drivers/net/ethernet/mellanox/mlxsw/pci.c index 1d1360c178bb..e742bd4e8894 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/pci.c +++ b/drivers/net/ethernet/mellanox/mlxsw/pci.c @@ -1156,6 +1156,16 @@ mlxsw_pci_config_profile_swid_config(struct mlxsw_pci *mlxsw_pci, #define MLXSW_RESOURCES_TABLE_END_ID 0xffff #define MLXSW_MAX_SPAN_ID 0x2420 +#define MLXSW_MAX_LAG_ID 0x2520 +#define MLXSW_MAX_PORTS_IN_LAG_ID 0x2521 +#define MLXSW_KVD_SIZE_ID 0x1001 +#define MLXSW_KVD_SINGLE_MIN_SIZE_ID 0x1002 +#define MLXSW_KVD_DOUBLE_MIN_SIZE_ID 0x1003 +#define MLXSW_MAX_VIRTUAL_ROUTERS_ID 0x2C01 +#define MLXSW_MAX_SYSTEM_PORT_ID 0x2502 +#define MLXSW_MAX_VLAN_GROUPS_ID 0x2906 +#define MLXSW_MAX_REGIONS_ID 0x2901 +#define MLXSW_MAX_RIF_ID 0x2C02 #define MLXSW_RESOURCES_QUERY_MAX_QUERIES 100 #define MLXSW_RESOURCES_PER_QUERY 32 @@ -1167,6 +1177,46 @@ static void mlxsw_pci_resources_query_parse(int id, u64 val, resources->max_span = val; resources->max_span_valid = 1; break; + case MLXSW_MAX_LAG_ID: + resources->max_lag = val; + resources->max_lag_valid = 1; + break; + case MLXSW_MAX_PORTS_IN_LAG_ID: + resources->max_ports_in_lag = val; + resources->max_ports_in_lag_valid = 1; + break; + case MLXSW_KVD_SIZE_ID: + resources->kvd_size = val; + resources->kvd_size_valid = 1; + break; + case MLXSW_KVD_SINGLE_MIN_SIZE_ID: + resources->kvd_single_min_size = val; + resources->kvd_single_min_size_valid = 1; + break; + case MLXSW_KVD_DOUBLE_MIN_SIZE_ID: + resources->kvd_double_min_size = val; + resources->kvd_double_min_size_valid = 1; + break; + case MLXSW_MAX_VIRTUAL_ROUTERS_ID: + resources->max_virtual_routers = val; + resources->max_virtual_routers_valid = 1; + break; + case MLXSW_MAX_SYSTEM_PORT_ID: + resources->max_system_ports = val; + resources->max_system_ports_valid = 1; + break; + case MLXSW_MAX_VLAN_GROUPS_ID: + resources->max_vlan_groups = val; + resources->max_vlan_groups_valid = 1; + break; + case MLXSW_MAX_REGIONS_ID: + resources->max_regions = val; + resources->max_regions_valid = 1; + break; + case MLXSW_MAX_RIF_ID: + resources->max_rif = val; + resources->max_rif_valid = 1; + break; default: break; } @@ -1209,10 +1259,52 @@ static int mlxsw_pci_resources_query(struct mlxsw_pci *mlxsw_pci, char *mbox, return -EIO; } +static int mlxsw_pci_profile_get_kvd_sizes(const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) +{ + u32 singles_size, doubles_size, linear_size; + + if (!resources->kvd_single_min_size_valid || + !resources->kvd_double_min_size_valid || + !profile->used_kvd_split_data) + return -EIO; + + linear_size = profile->kvd_linear_size; + + /* The hash part is what left of the kvd without the + * linear part. It is split to the single size and + * double size by the parts ratio from the profile. + * Both sizes must be a multiplications of the + * granularity from the profile. + */ + doubles_size = (resources->kvd_size - linear_size); + doubles_size *= profile->kvd_hash_double_parts; + doubles_size /= (profile->kvd_hash_double_parts + + profile->kvd_hash_single_parts); + doubles_size /= profile->kvd_hash_granularity; + doubles_size *= profile->kvd_hash_granularity; + singles_size = resources->kvd_size - doubles_size - + linear_size; + + /* Check results are legal. */ + if (singles_size < resources->kvd_single_min_size || + doubles_size < resources->kvd_double_min_size || + resources->kvd_size < linear_size) + return -EIO; + + resources->kvd_single_size = singles_size; + resources->kvd_double_size = doubles_size; + resources->kvd_linear_size = linear_size; + + return 0; +} + static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, - const struct mlxsw_config_profile *profile) + const struct mlxsw_config_profile *profile, + struct mlxsw_resources *resources) { int i; + int err; mlxsw_cmd_mbox_zero(mbox); @@ -1222,18 +1314,6 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_max_vepa_channels_set( mbox, profile->max_vepa_channels); } - if (profile->used_max_lag) { - mlxsw_cmd_mbox_config_profile_set_max_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_lag_set( - mbox, profile->max_lag); - } - if (profile->used_max_port_per_lag) { - mlxsw_cmd_mbox_config_profile_set_max_port_per_lag_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_max_port_per_lag_set( - mbox, profile->max_port_per_lag); - } if (profile->used_max_mid) { mlxsw_cmd_mbox_config_profile_set_max_mid_set( mbox, 1); @@ -1310,19 +1390,22 @@ static int mlxsw_pci_config_profile(struct mlxsw_pci *mlxsw_pci, char *mbox, mlxsw_cmd_mbox_config_profile_adaptive_routing_group_cap_set( mbox, profile->adaptive_routing_group_cap); } - if (profile->used_kvd_sizes) { - mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_linear_size_set( - mbox, profile->kvd_linear_size); - mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set( - mbox, profile->kvd_hash_single_size); + if (resources->kvd_size_valid) { + err = mlxsw_pci_profile_get_kvd_sizes(profile, resources); + if (err) + return err; + + mlxsw_cmd_mbox_config_profile_set_kvd_linear_size_set(mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_linear_size_set(mbox, + resources->kvd_linear_size); + mlxsw_cmd_mbox_config_profile_set_kvd_hash_single_size_set(mbox, + 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_single_size_set(mbox, + resources->kvd_single_size); mlxsw_cmd_mbox_config_profile_set_kvd_hash_double_size_set( - mbox, 1); - mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set( - mbox, profile->kvd_hash_double_size); + mbox, 1); + mlxsw_cmd_mbox_config_profile_kvd_hash_double_size_set(mbox, + resources->kvd_double_size); } for (i = 0; i < MLXSW_CONFIG_PROFILE_SWID_COUNT; i++) @@ -1524,7 +1607,7 @@ static int mlxsw_pci_init(void *bus_priv, struct mlxsw_core *mlxsw_core, if (err) goto err_query_resources; - err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile); + err = mlxsw_pci_config_profile(mlxsw_pci, mbox, profile, resources); if (err) goto err_config_profile; diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 1721098eef13..6460c7256f2b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -591,6 +591,12 @@ static const struct mlxsw_reg_info mlxsw_reg_sfn = { */ MLXSW_ITEM32(reg, sfn, swid, 0x00, 24, 8); +/* reg_sfn_end + * Forces the current session to end. + * Access: OP + */ +MLXSW_ITEM32(reg, sfn, end, 0x04, 20, 1); + /* reg_sfn_num_rec * Request: Number of learned notifications and aged-out notification * records requested. @@ -605,6 +611,7 @@ static inline void mlxsw_reg_sfn_pack(char *payload) { MLXSW_REG_ZERO(sfn, payload); mlxsw_reg_sfn_swid_set(payload, 0); + mlxsw_reg_sfn_end_set(payload, 1); mlxsw_reg_sfn_num_rec_set(payload, MLXSW_REG_SFN_REC_MAX_COUNT); } @@ -1385,7 +1392,7 @@ static inline void mlxsw_reg_slcr_pack(char *payload, u16 lag_hash) { MLXSW_REG_ZERO(slcr, payload); mlxsw_reg_slcr_pp_set(payload, MLXSW_REG_SLCR_PP_GLOBAL); - mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_XOR); + mlxsw_reg_slcr_type_set(payload, MLXSW_REG_SLCR_TYPE_CRC); mlxsw_reg_slcr_lag_hash_set(payload, lag_hash); } @@ -2131,6 +2138,18 @@ MLXSW_ITEM32(reg, ptys, local_port, 0x00, 16, 8); */ MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); +enum { + MLXSW_REG_PTYS_AN_STATUS_NA, + MLXSW_REG_PTYS_AN_STATUS_OK, + MLXSW_REG_PTYS_AN_STATUS_FAIL, +}; + +/* reg_ptys_an_status + * Autonegotiation status. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, an_status, 0x04, 28, 4); + #define MLXSW_REG_PTYS_ETH_SPEED_SGMII BIT(0) #define MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX BIT(1) #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 BIT(2) @@ -2145,6 +2164,7 @@ MLXSW_ITEM32(reg, ptys, proto_mask, 0x00, 0, 3); #define MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR BIT(14) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 BIT(15) #define MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4 BIT(16) +#define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2 BIT(18) #define MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 BIT(19) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 BIT(20) #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 BIT(21) @@ -2177,6 +2197,13 @@ MLXSW_ITEM32(reg, ptys, eth_proto_admin, 0x18, 0, 32); */ MLXSW_ITEM32(reg, ptys, eth_proto_oper, 0x24, 0, 32); +/* reg_ptys_eth_proto_lp_advertise + * The protocols that were advertised by the link partner during + * autonegotiation. + * Access: RO + */ +MLXSW_ITEM32(reg, ptys, eth_proto_lp_advertise, 0x30, 0, 32); + static inline void mlxsw_reg_ptys_pack(char *payload, u8 local_port, u32 proto_admin) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index d48873bcbddf..1ec0a4ce3c46 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -248,7 +248,8 @@ static void mlxsw_sp_span_entry_destroy(struct mlxsw_sp *mlxsw_sp, span_entry->used = false; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry * +mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) { struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp; int i; @@ -262,7 +263,8 @@ struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_find(struct mlxsw_sp_port *port) return NULL; } -struct mlxsw_sp_span_entry *mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) +static struct mlxsw_sp_span_entry +*mlxsw_sp_span_entry_get(struct mlxsw_sp_port *port) { struct mlxsw_sp_span_entry *span_entry; @@ -364,7 +366,8 @@ mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port, } /* bind the port to the SPAN entry */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, true, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, true, pa_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); if (err) goto err_mpar_reg_write; @@ -405,7 +408,8 @@ mlxsw_sp_span_inspected_port_unbind(struct mlxsw_sp_port *port, return; /* remove the inspected port */ - mlxsw_reg_mpar_pack(mpar_pl, port->local_port, type, false, pa_id); + mlxsw_reg_mpar_pack(mpar_pl, port->local_port, + (enum mlxsw_reg_mpar_i_e) type, false, pa_id); mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl); /* remove the SBIB buffer if it was egress SPAN */ @@ -556,8 +560,9 @@ int mlxsw_sp_port_vid_to_fid_set(struct mlxsw_sp_port *mlxsw_sp_port, return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(svfa), svfa_pl); } -static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, - u16 vid, bool learn_enable) +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) { struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char *spvmlr_pl; @@ -566,13 +571,20 @@ static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, spvmlr_pl = kmalloc(MLXSW_REG_SPVMLR_LEN, GFP_KERNEL); if (!spvmlr_pl) return -ENOMEM; - mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid, vid, - learn_enable); + mlxsw_reg_spvmlr_pack(spvmlr_pl, mlxsw_sp_port->local_port, vid_begin, + vid_end, learn_enable); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(spvmlr), spvmlr_pl); kfree(spvmlr_pl); return err; } +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid, bool learn_enable) +{ + return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + learn_enable); +} + static int mlxsw_sp_port_system_port_mapping_set(struct mlxsw_sp_port *mlxsw_sp_port) { @@ -811,9 +823,9 @@ err_span_port_mtu_update: return err; } -static struct rtnl_link_stats64 * -mlxsw_sp_port_get_stats64(struct net_device *dev, - struct rtnl_link_stats64 *stats) +static int +mlxsw_sp_port_get_sw_stats64(const struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp_port_pcpu_stats *p; @@ -840,6 +852,107 @@ mlxsw_sp_port_get_stats64(struct net_device *dev, tx_dropped += p->tx_dropped; } stats->tx_dropped = tx_dropped; + return 0; +} + +static bool mlxsw_sp_port_has_offload_stats(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return true; + } + + return false; +} + +static int mlxsw_sp_port_get_offload_stats(int attr_id, const struct net_device *dev, + void *sp) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return mlxsw_sp_port_get_sw_stats64(dev, sp); + } + + return -EINVAL; +} + +static int mlxsw_sp_port_get_stats_raw(struct net_device *dev, int grp, + int prio, char *ppcnt_pl) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + + mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); + return mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); +} + +static int mlxsw_sp_port_get_hw_stats(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; + int err; + + err = mlxsw_sp_port_get_stats_raw(dev, MLXSW_REG_PPCNT_IEEE_8023_CNT, + 0, ppcnt_pl); + if (err) + goto out; + + stats->tx_packets = + mlxsw_reg_ppcnt_a_frames_transmitted_ok_get(ppcnt_pl); + stats->rx_packets = + mlxsw_reg_ppcnt_a_frames_received_ok_get(ppcnt_pl); + stats->tx_bytes = + mlxsw_reg_ppcnt_a_octets_transmitted_ok_get(ppcnt_pl); + stats->rx_bytes = + mlxsw_reg_ppcnt_a_octets_received_ok_get(ppcnt_pl); + stats->multicast = + mlxsw_reg_ppcnt_a_multicast_frames_received_ok_get(ppcnt_pl); + + stats->rx_crc_errors = + mlxsw_reg_ppcnt_a_frame_check_sequence_errors_get(ppcnt_pl); + stats->rx_frame_errors = + mlxsw_reg_ppcnt_a_alignment_errors_get(ppcnt_pl); + + stats->rx_length_errors = ( + mlxsw_reg_ppcnt_a_in_range_length_errors_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_out_of_range_length_field_get(ppcnt_pl) + + mlxsw_reg_ppcnt_a_frame_too_long_errors_get(ppcnt_pl)); + + stats->rx_errors = (stats->rx_crc_errors + + stats->rx_frame_errors + stats->rx_length_errors); + +out: + return err; +} + +static void update_stats_cache(struct work_struct *work) +{ + struct mlxsw_sp_port *mlxsw_sp_port = + container_of(work, struct mlxsw_sp_port, + hw_stats.update_dw.work); + + if (!netif_carrier_ok(mlxsw_sp_port->dev)) + goto out; + + mlxsw_sp_port_get_hw_stats(mlxsw_sp_port->dev, + mlxsw_sp_port->hw_stats.cache); + +out: + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, + MLXSW_HW_STATS_UPDATE_TIME); +} + +/* Return the stats from a cache that is updated periodically, + * as this function might get called in an atomic context. + */ +static struct rtnl_link_stats64 * +mlxsw_sp_port_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + + memcpy(stats, mlxsw_sp_port->hw_stats.cache, sizeof(*stats)); + return stats; } @@ -974,10 +1087,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, goto err_port_vp_mode_trans; } - err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, false); - if (err) - goto err_port_vid_learning_set; - err = mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, true, untagged); if (err) goto err_port_add_vid; @@ -985,8 +1094,6 @@ static int mlxsw_sp_port_add_vid(struct net_device *dev, return 0; err_port_add_vid: - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); -err_port_vid_learning_set: if (list_is_singular(&mlxsw_sp_port->vports_list)) mlxsw_sp_port_vlan_mode_trans(mlxsw_sp_port); err_port_vp_mode_trans: @@ -1013,8 +1120,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev, mlxsw_sp_port_vlan_set(mlxsw_sp_vport, vid, vid, false, false); - mlxsw_sp_port_vid_learning_set(mlxsw_sp_vport, vid, true); - /* Drop FID reference. If this was the last reference the * resources will be freed. */ @@ -1209,6 +1314,8 @@ static const struct net_device_ops mlxsw_sp_port_netdev_ops = { .ndo_set_mac_address = mlxsw_sp_port_set_mac_address, .ndo_change_mtu = mlxsw_sp_port_change_mtu, .ndo_get_stats64 = mlxsw_sp_port_get_stats64, + .ndo_has_offload_stats = mlxsw_sp_port_has_offload_stats, + .ndo_get_offload_stats = mlxsw_sp_port_get_offload_stats, .ndo_vlan_rx_add_vid = mlxsw_sp_port_add_vid, .ndo_vlan_rx_kill_vid = mlxsw_sp_port_kill_vid, .ndo_neigh_construct = mlxsw_sp_router_neigh_construct, @@ -1547,8 +1654,6 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, enum mlxsw_reg_ppcnt_grp grp, int prio, u64 *data, int data_index) { - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_port_hw_stats *hw_stats; char ppcnt_pl[MLXSW_REG_PPCNT_LEN]; int i, len; @@ -1557,10 +1662,9 @@ static void __mlxsw_sp_port_get_stats(struct net_device *dev, err = mlxsw_sp_get_hw_stats_by_group(&hw_stats, &len, grp); if (err) return; - mlxsw_reg_ppcnt_pack(ppcnt_pl, mlxsw_sp_port->local_port, grp, prio); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ppcnt), ppcnt_pl); + mlxsw_sp_port_get_stats_raw(dev, grp, prio, ppcnt_pl); for (i = 0; i < len; i++) - data[data_index + i] = !err ? hw_stats[i].getter(ppcnt_pl) : 0; + data[data_index + i] = hw_stats[i].getter(ppcnt_pl); } static void mlxsw_sp_port_get_stats(struct net_device *dev, @@ -1599,112 +1703,149 @@ static int mlxsw_sp_port_get_sset_count(struct net_device *dev, int sset) } struct mlxsw_sp_port_link_mode { + enum ethtool_link_mode_bit_indices mask_ethtool; u32 mask; - u32 supported; - u32 advertised; u32 speed; }; static const struct mlxsw_sp_port_link_mode mlxsw_sp_port_link_mode[] = { { .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_T, - .supported = SUPPORTED_100baseT_Full, - .advertised = ADVERTISED_100baseT_Full, - .speed = 100, - }, - { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100BASE_TX, - .speed = 100, + .mask_ethtool = ETHTOOL_LINK_MODE_100baseT_Full_BIT, + .speed = SPEED_100, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_SGMII | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX, - .supported = SUPPORTED_1000baseKX_Full, - .advertised = ADVERTISED_1000baseKX_Full, - .speed = 1000, + .mask_ethtool = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT, + .speed = SPEED_1000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_T, - .supported = SUPPORTED_10000baseT_Full, - .advertised = ADVERTISED_10000baseT_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseT_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CX4 | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4, - .supported = SUPPORTED_10000baseKX4_Full, - .advertised = ADVERTISED_10000baseKX4_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKX4_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_ER_LR, - .supported = SUPPORTED_10000baseKR_Full, - .advertised = ADVERTISED_10000baseKR_Full, - .speed = 10000, + .mask_ethtool = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT, + .speed = SPEED_10000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_20GBASE_KR2, - .supported = SUPPORTED_20000baseKR2_Full, - .advertised = ADVERTISED_20000baseKR2_Full, - .speed = 20000, + .mask_ethtool = ETHTOOL_LINK_MODE_20000baseKR2_Full_BIT, + .speed = SPEED_20000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_CR4, - .supported = SUPPORTED_40000baseCR4_Full, - .advertised = ADVERTISED_40000baseCR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4, - .supported = SUPPORTED_40000baseKR4_Full, - .advertised = ADVERTISED_40000baseKR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4, - .supported = SUPPORTED_40000baseSR4_Full, - .advertised = ADVERTISED_40000baseSR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseSR4_Full_BIT, + .speed = SPEED_40000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_40GBASE_LR4_ER4, - .supported = SUPPORTED_40000baseLR4_Full, - .advertised = ADVERTISED_40000baseLR4_Full, - .speed = 40000, + .mask_ethtool = ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT, + .speed = SPEED_40000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR | - MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, - .speed = 25000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT, + .speed = SPEED_25000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2 | - MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, - .speed = 50000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR, + .mask_ethtool = ETHTOOL_LINK_MODE_25000baseSR_Full_BIT, + .speed = SPEED_25000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_CR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseCR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_KR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT, + .speed = SPEED_50000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_50GBASE_SR2, + .mask_ethtool = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + .speed = SPEED_50000, }, { .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, - .supported = SUPPORTED_56000baseKR4_Full, - .advertised = ADVERTISED_56000baseKR4_Full, - .speed = 56000, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseKR4_Full_BIT, + .speed = SPEED_56000, }, { - .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | - MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, - .speed = 100000, + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseCR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseSR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_56GBASE_R4, + .mask_ethtool = ETHTOOL_LINK_MODE_56000baseLR4_Full_BIT, + .speed = SPEED_56000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT, + .speed = SPEED_100000, + }, + { + .mask = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4, + .mask_ethtool = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT, + .speed = SPEED_100000, }, }; #define MLXSW_SP_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp_port_link_mode) -static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) +static void +mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto, + struct ethtool_link_ksettings *cmd) { if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_CR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_SR | @@ -1712,43 +1853,29 @@ static u32 mlxsw_sp_from_ptys_supported_port(u32 ptys_eth_proto) MLXSW_REG_PTYS_ETH_SPEED_40GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 | MLXSW_REG_PTYS_ETH_SPEED_SGMII)) - return SUPPORTED_FIBRE; + ethtool_link_ksettings_add_link_mode(cmd, supported, FIBRE); if (ptys_eth_proto & (MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KR | MLXSW_REG_PTYS_ETH_SPEED_10GBASE_KX4 | MLXSW_REG_PTYS_ETH_SPEED_40GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 | MLXSW_REG_PTYS_ETH_SPEED_1000BASE_KX)) - return SUPPORTED_Backplane; - return 0; + ethtool_link_ksettings_add_link_mode(cmd, supported, Backplane); } -static u32 mlxsw_sp_from_ptys_supported_link(u32 ptys_eth_proto) +static void mlxsw_sp_from_ptys_link(u32 ptys_eth_proto, unsigned long *mode) { - u32 modes = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].supported; + __set_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + mode); } - return modes; -} - -static u32 mlxsw_sp_from_ptys_advert_link(u32 ptys_eth_proto) -{ - u32 modes = 0; - int i; - - for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (ptys_eth_proto & mlxsw_sp_port_link_mode[i].mask) - modes |= mlxsw_sp_port_link_mode[i].advertised; - } - return modes; } static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, - struct ethtool_cmd *cmd) + struct ethtool_link_ksettings *cmd) { u32 speed = SPEED_UNKNOWN; u8 duplex = DUPLEX_UNKNOWN; @@ -1765,8 +1892,8 @@ static void mlxsw_sp_from_ptys_speed_duplex(bool carrier_ok, u32 ptys_eth_proto, } } out: - ethtool_cmd_speed_set(cmd, speed); - cmd->duplex = duplex; + cmd->base.speed = speed; + cmd->base.duplex = duplex; } static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) @@ -1791,49 +1918,15 @@ static u8 mlxsw_sp_port_connector_port(u32 ptys_eth_proto) return PORT_OTHER; } -static int mlxsw_sp_port_get_settings(struct net_device *dev, - struct ethtool_cmd *cmd) -{ - struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 eth_proto_cap; - u32 eth_proto_admin; - u32 eth_proto_oper; - int err; - - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); - return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, - ð_proto_admin, ð_proto_oper); - - cmd->supported = mlxsw_sp_from_ptys_supported_port(eth_proto_cap) | - mlxsw_sp_from_ptys_supported_link(eth_proto_cap) | - SUPPORTED_Pause | SUPPORTED_Asym_Pause | - SUPPORTED_Autoneg; - cmd->advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_admin); - mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), - eth_proto_oper, cmd); - - eth_proto_oper = eth_proto_oper ? eth_proto_oper : eth_proto_cap; - cmd->port = mlxsw_sp_port_connector_port(eth_proto_oper); - cmd->lp_advertising = mlxsw_sp_from_ptys_advert_link(eth_proto_oper); - - cmd->transceiver = XCVR_INTERNAL; - return 0; -} - -static u32 mlxsw_sp_to_ptys_advert_link(u32 advertising) +static u32 +mlxsw_sp_to_ptys_advert_link(const struct ethtool_link_ksettings *cmd) { u32 ptys_proto = 0; int i; for (i = 0; i < MLXSW_SP_PORT_LINK_MODE_LEN; i++) { - if (advertising & mlxsw_sp_port_link_mode[i].advertised) + if (test_bit(mlxsw_sp_port_link_mode[i].mask_ethtool, + cmd->link_modes.advertising)) ptys_proto |= mlxsw_sp_port_link_mode[i].mask; } return ptys_proto; @@ -1863,61 +1956,113 @@ static u32 mlxsw_sp_to_ptys_upper_speed(u32 upper_speed) return ptys_proto; } -static int mlxsw_sp_port_set_settings(struct net_device *dev, - struct ethtool_cmd *cmd) +static void mlxsw_sp_port_get_link_supported(u32 eth_proto_cap, + struct ethtool_link_ksettings *cmd) +{ + ethtool_link_ksettings_add_link_mode(cmd, supported, Asym_Pause); + ethtool_link_ksettings_add_link_mode(cmd, supported, Autoneg); + ethtool_link_ksettings_add_link_mode(cmd, supported, Pause); + + mlxsw_sp_from_ptys_supported_port(eth_proto_cap, cmd); + mlxsw_sp_from_ptys_link(eth_proto_cap, cmd->link_modes.supported); +} + +static void mlxsw_sp_port_get_link_advertise(u32 eth_proto_admin, bool autoneg, + struct ethtool_link_ksettings *cmd) +{ + if (!autoneg) + return; + + ethtool_link_ksettings_add_link_mode(cmd, advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_admin, cmd->link_modes.advertising); +} + +static void +mlxsw_sp_port_get_link_lp_advertise(u32 eth_proto_lp, u8 autoneg_status, + struct ethtool_link_ksettings *cmd) +{ + if (autoneg_status != MLXSW_REG_PTYS_AN_STATUS_OK || !eth_proto_lp) + return; + + ethtool_link_ksettings_add_link_mode(cmd, lp_advertising, Autoneg); + mlxsw_sp_from_ptys_link(eth_proto_lp, cmd->link_modes.lp_advertising); +} + +static int mlxsw_sp_port_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) +{ + u32 eth_proto_cap, eth_proto_admin, eth_proto_oper, eth_proto_lp; + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + char ptys_pl[MLXSW_REG_PTYS_LEN]; + u8 autoneg_status; + bool autoneg; + int err; + + autoneg = mlxsw_sp_port->link.autoneg; + mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); + if (err) + return err; + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, + ð_proto_oper); + + mlxsw_sp_port_get_link_supported(eth_proto_cap, cmd); + + mlxsw_sp_port_get_link_advertise(eth_proto_admin, autoneg, cmd); + + eth_proto_lp = mlxsw_reg_ptys_eth_proto_lp_advertise_get(ptys_pl); + autoneg_status = mlxsw_reg_ptys_an_status_get(ptys_pl); + mlxsw_sp_port_get_link_lp_advertise(eth_proto_lp, autoneg_status, cmd); + + cmd->base.autoneg = autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + cmd->base.port = mlxsw_sp_port_connector_port(eth_proto_oper); + mlxsw_sp_from_ptys_speed_duplex(netif_carrier_ok(dev), eth_proto_oper, + cmd); + + return 0; +} + +static int +mlxsw_sp_port_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; char ptys_pl[MLXSW_REG_PTYS_LEN]; - u32 speed; - u32 eth_proto_new; - u32 eth_proto_cap; - u32 eth_proto_admin; + u32 eth_proto_cap, eth_proto_new; + bool autoneg; int err; - speed = ethtool_cmd_speed(cmd); - - eth_proto_new = cmd->autoneg == AUTONEG_ENABLE ? - mlxsw_sp_to_ptys_advert_link(cmd->advertising) : - mlxsw_sp_to_ptys_speed(speed); - mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, 0); err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to get proto"); + if (err) return err; - } - mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, ð_proto_admin, NULL); + mlxsw_reg_ptys_unpack(ptys_pl, ð_proto_cap, NULL, NULL); + + autoneg = cmd->base.autoneg == AUTONEG_ENABLE; + eth_proto_new = autoneg ? + mlxsw_sp_to_ptys_advert_link(cmd) : + mlxsw_sp_to_ptys_speed(cmd->base.speed); eth_proto_new = eth_proto_new & eth_proto_cap; if (!eth_proto_new) { - netdev_err(dev, "Not supported proto admin requested"); + netdev_err(dev, "No supported speed requested\n"); return -EINVAL; } - if (eth_proto_new == eth_proto_admin) - return 0; mlxsw_reg_ptys_pack(ptys_pl, mlxsw_sp_port->local_port, eth_proto_new); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ptys), ptys_pl); - if (err) { - netdev_err(dev, "Failed to set proto admin"); + if (err) return err; - } if (!netif_running(dev)) return 0; - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } + mlxsw_sp_port->link.autoneg = autoneg; - err = mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); - if (err) { - netdev_err(dev, "Failed to set admin status"); - return err; - } + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, false); + mlxsw_sp_port_admin_status_set(mlxsw_sp_port, true); return 0; } @@ -1931,8 +2076,8 @@ static const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .set_phys_id = mlxsw_sp_port_set_phys_id, .get_ethtool_stats = mlxsw_sp_port_get_stats, .get_sset_count = mlxsw_sp_port_get_sset_count, - .get_settings = mlxsw_sp_port_get_settings, - .set_settings = mlxsw_sp_port_set_settings, + .get_link_ksettings = mlxsw_sp_port_get_link_ksettings, + .set_link_ksettings = mlxsw_sp_port_set_link_ksettings, }; static int @@ -2082,6 +2227,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, mlxsw_sp_port->mapping.module = module; mlxsw_sp_port->mapping.width = width; mlxsw_sp_port->mapping.lane = lane; + mlxsw_sp_port->link.autoneg = 1; bytes = DIV_ROUND_UP(VLAN_N_VID, BITS_PER_BYTE); mlxsw_sp_port->active_vlans = kzalloc(bytes, GFP_KERNEL); if (!mlxsw_sp_port->active_vlans) { @@ -2103,6 +2249,16 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_alloc_stats; } + mlxsw_sp_port->hw_stats.cache = + kzalloc(sizeof(*mlxsw_sp_port->hw_stats.cache), GFP_KERNEL); + + if (!mlxsw_sp_port->hw_stats.cache) { + err = -ENOMEM; + goto err_alloc_hw_stats; + } + INIT_DELAYED_WORK(&mlxsw_sp_port->hw_stats.update_dw, + &update_stats_cache); + dev->netdev_ops = &mlxsw_sp_port_netdev_ops; dev->ethtool_ops = &mlxsw_sp_port_ethtool_ops; @@ -2129,7 +2285,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ - dev->hard_header_len += MLXSW_TXHDR_LEN; + dev->needed_headroom = MLXSW_TXHDR_LEN; err = mlxsw_sp_port_system_port_mapping_set(mlxsw_sp_port); if (err) { @@ -2203,6 +2359,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port, goto err_core_port_init; } + mlxsw_core_schedule_dw(&mlxsw_sp_port->hw_stats.update_dw, 0); return 0; err_core_port_init: @@ -2223,6 +2380,8 @@ err_port_system_port_mapping_set: err_dev_addr_init: mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); err_port_swid_set: + kfree(mlxsw_sp_port->hw_stats.cache); +err_alloc_hw_stats: free_percpu(mlxsw_sp_port->pcpu_stats); err_alloc_stats: kfree(mlxsw_sp_port->untagged_vlans); @@ -2239,6 +2398,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) if (!mlxsw_sp_port) return; + cancel_delayed_work_sync(&mlxsw_sp_port->hw_stats.update_dw); mlxsw_core_port_fini(&mlxsw_sp_port->core_port); unregister_netdev(mlxsw_sp_port->dev); /* This calls ndo_stop */ mlxsw_sp->ports[local_port] = NULL; @@ -2248,6 +2408,7 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port) mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT); mlxsw_sp_port_module_unmap(mlxsw_sp, mlxsw_sp_port->local_port); free_percpu(mlxsw_sp_port->pcpu_stats); + kfree(mlxsw_sp_port->hw_stats.cache); kfree(mlxsw_sp_port->untagged_vlans); kfree(mlxsw_sp_port->active_vlans); WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vports_list)); @@ -2571,123 +2732,47 @@ static void mlxsw_sp_rx_listener_func(struct sk_buff *skb, u8 local_port, netif_receive_skb(skb); } +static void mlxsw_sp_rx_listener_mark_func(struct sk_buff *skb, u8 local_port, + void *priv) +{ + skb->offload_fwd_mark = 1; + return mlxsw_sp_rx_listener_func(skb, local_port, priv); +} + +#define MLXSW_SP_RXL(_func, _trap_id, _action) \ + { \ + .func = _func, \ + .local_port = MLXSW_PORT_DONT_CARE, \ + .trap_id = MLXSW_TRAP_ID_##_trap_id, \ + .action = MLXSW_REG_HPKT_ACTION_##_action, \ + } + static const struct mlxsw_rx_listener mlxsw_sp_rx_listener[] = { - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_FDB_MC, - }, + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, FDB_MC, TRAP_TO_CPU), /* Traps for specific L2 packet types, not trapped as FDB MC */ - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_STP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LACP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_EAPOL, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LLDP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MMRP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MVRP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_RPVST, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_DHCP, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_QUERY, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V1_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V2_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V2_LEAVE, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IGMP_V3_REPORT, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_ARPBC, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_ARPUC, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_MTUERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_TTLERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_LBERROR, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_OSPF, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_IP2ME, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_RTR_INGRESS0, - }, - { - .func = mlxsw_sp_rx_listener_func, - .local_port = MLXSW_PORT_DONT_CARE, - .trap_id = MLXSW_TRAP_ID_HOST_MISS_IPV4, - }, + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, STP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LACP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, EAPOL, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LLDP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MMRP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MVRP, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RPVST, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, DHCP, MIRROR_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, IGMP_QUERY, MIRROR_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V1_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V2_LEAVE, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IGMP_V3_REPORT, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPBC, MIRROR_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, ARPUC, MIRROR_TO_CPU), + /* L3 traps */ + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, MTUERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, TTLERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, LBERROR, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_mark_func, OSPF, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, IP2ME, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, RTR_INGRESS0, TRAP_TO_CPU), + MLXSW_SP_RXL(mlxsw_sp_rx_listener_func, HOST_MISS_IPV4, TRAP_TO_CPU), }; static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) @@ -2714,7 +2799,7 @@ static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp) if (err) goto err_rx_listener_register; - mlxsw_reg_hpkt_pack(hpkt_pl, MLXSW_REG_HPKT_ACTION_TRAP_TO_CPU, + mlxsw_reg_hpkt_pack(hpkt_pl, mlxsw_sp_rx_listener[i].action, mlxsw_sp_rx_listener[i].trap_id); err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(hpkt), hpkt_pl); if (err) @@ -2802,7 +2887,9 @@ static int mlxsw_sp_flood_init(struct mlxsw_sp *mlxsw_sp) static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; char slcr_pl[MLXSW_REG_SLCR_LEN]; + int err; mlxsw_reg_slcr_pack(slcr_pl, MLXSW_REG_SLCR_LAG_HASH_SMAC | MLXSW_REG_SLCR_LAG_HASH_DMAC | @@ -2813,7 +2900,26 @@ static int mlxsw_sp_lag_init(struct mlxsw_sp *mlxsw_sp) MLXSW_REG_SLCR_LAG_HASH_SPORT | MLXSW_REG_SLCR_LAG_HASH_DPORT | MLXSW_REG_SLCR_LAG_HASH_IPPROTO); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(slcr), slcr_pl); + if (err) + return err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!(resources->max_lag_valid && resources->max_ports_in_lag_valid)) + return -EIO; + + mlxsw_sp->lags = kcalloc(resources->max_lag, + sizeof(struct mlxsw_sp_upper), + GFP_KERNEL); + if (!mlxsw_sp->lags) + return -ENOMEM; + + return 0; +} + +static void mlxsw_sp_lag_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->lags); } static int mlxsw_sp_init(struct mlxsw_core *mlxsw_core, @@ -2897,6 +3003,7 @@ err_span_init: err_router_init: mlxsw_sp_switchdev_fini(mlxsw_sp); err_switchdev_init: + mlxsw_sp_lag_fini(mlxsw_sp); err_lag_init: mlxsw_sp_buffers_fini(mlxsw_sp); err_buffers_init: @@ -2910,38 +3017,26 @@ err_rx_listener_register: static void mlxsw_sp_fini(struct mlxsw_core *mlxsw_core) { struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); - int i; mlxsw_sp_ports_remove(mlxsw_sp); mlxsw_sp_span_fini(mlxsw_sp); mlxsw_sp_router_fini(mlxsw_sp); mlxsw_sp_switchdev_fini(mlxsw_sp); + mlxsw_sp_lag_fini(mlxsw_sp); mlxsw_sp_buffers_fini(mlxsw_sp); mlxsw_sp_traps_fini(mlxsw_sp); mlxsw_sp_event_unregister(mlxsw_sp, MLXSW_TRAP_ID_PUDE); WARN_ON(!list_empty(&mlxsw_sp->vfids.list)); WARN_ON(!list_empty(&mlxsw_sp->fids)); - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) - WARN_ON_ONCE(mlxsw_sp->rifs[i]); } static struct mlxsw_config_profile mlxsw_sp_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = MLXSW_SP_LAG_MAX, - .used_max_port_per_lag = 1, - .max_port_per_lag = MLXSW_SP_PORT_PER_LAG_MAX, .used_max_mid = 1, .max_mid = MLXSW_SP_MID_MAX, .used_max_pgt = 1, .max_pgt = 0, - .used_max_system_port = 1, - .max_system_port = 64, - .used_max_vlan_groups = 1, - .max_vlan_groups = 127, - .used_max_regions = 1, - .max_regions = 400, .used_flood_tables = 1, .used_flood_mode = 1, .flood_mode = 3, @@ -2953,10 +3048,11 @@ static struct mlxsw_config_profile mlxsw_sp_config_profile = { .max_ib_mc = 0, .used_max_pkey = 1, .max_pkey = 0, - .used_kvd_sizes = 1, + .used_kvd_split_data = 1, + .kvd_hash_granularity = MLXSW_SP_KVD_GRANULARITY, + .kvd_hash_single_parts = 2, + .kvd_hash_double_parts = 1, .kvd_linear_size = MLXSW_SP_KVD_LINEAR_SIZE, - .kvd_hash_single_size = MLXSW_SP_KVD_HASH_SINGLE_SIZE, - .kvd_hash_double_size = MLXSW_SP_KVD_HASH_DOUBLE_SIZE, .swid_config = { { .used_type = 1, @@ -3073,13 +3169,15 @@ static bool mlxsw_sp_rif_should_config(struct mlxsw_sp_rif *r, static int mlxsw_sp_avail_rif_get(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) if (!mlxsw_sp->rifs[i]) return i; - return MLXSW_SP_RIF_MAX; + return MLXSW_SP_INVALID_RIF; } static void mlxsw_sp_vport_rif_sp_attr_get(struct mlxsw_sp_port *mlxsw_sp_vport, @@ -3159,7 +3257,7 @@ mlxsw_sp_vport_rif_sp_create(struct mlxsw_sp_port *mlxsw_sp_vport, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return ERR_PTR(-ERANGE); err = mlxsw_sp_vport_rif_sp_op(mlxsw_sp_vport, l3_dev, rif, true); @@ -3391,7 +3489,7 @@ static int mlxsw_sp_rif_bridge_create(struct mlxsw_sp *mlxsw_sp, int err; rif = mlxsw_sp_avail_rif_get(mlxsw_sp); - if (rif == MLXSW_SP_RIF_MAX) + if (rif == MLXSW_SP_INVALID_RIF) return -ERANGE; err = mlxsw_sp_router_port_flood_set(mlxsw_sp, f->fid, true); @@ -3598,12 +3696,14 @@ static bool mlxsw_sp_port_fdb_should_flush(struct mlxsw_sp_port *mlxsw_sp_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u16 lag_id = mlxsw_sp_port->lag_id; + struct mlxsw_resources *resources; int i, count = 0; if (!mlxsw_sp_port->lagged) return true; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { struct mlxsw_sp_port *lag_port; lag_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); @@ -3809,11 +3909,13 @@ static int mlxsw_sp_lag_index_get(struct mlxsw_sp *mlxsw_sp, struct net_device *lag_dev, u16 *p_lag_id) { + struct mlxsw_resources *resources; struct mlxsw_sp_upper *lag; int free_lag_id = -1; int i; - for (i = 0; i < MLXSW_SP_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_lag; i++) { lag = mlxsw_sp_lag_get(mlxsw_sp, i); if (lag->ref_count) { if (lag->dev == lag_dev) { @@ -3847,9 +3949,11 @@ mlxsw_sp_master_lag_check(struct mlxsw_sp *mlxsw_sp, static int mlxsw_sp_port_lag_index_get(struct mlxsw_sp *mlxsw_sp, u16 lag_id, u8 *p_port_index) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { if (!mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i)) { *p_port_index = i; return 0; diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h index ac48abebe904..9b22863a924b 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h @@ -45,7 +45,7 @@ #include #include #include -#include +#include #include "port.h" #include "core.h" @@ -54,10 +54,7 @@ #define MLXSW_SP_VFID_MAX 6656 /* Bridged VLAN interfaces */ #define MLXSW_SP_RFID_BASE 15360 -#define MLXSW_SP_RIF_MAX 800 - -#define MLXSW_SP_LAG_MAX 64 -#define MLXSW_SP_PORT_PER_LAG_MAX 16 +#define MLXSW_SP_INVALID_RIF 0xffff #define MLXSW_SP_MID_MAX 7000 @@ -67,8 +64,6 @@ #define MLXSW_SP_LPM_TREE_MAX 22 #define MLXSW_SP_LPM_TREE_COUNT (MLXSW_SP_LPM_TREE_MAX - MLXSW_SP_LPM_TREE_MIN) -#define MLXSW_SP_VIRTUAL_ROUTER_MAX 256 - #define MLXSW_SP_PORT_BASE_SPEED 25000 /* Mb/s */ #define MLXSW_SP_BYTES_PER_CELL 96 @@ -77,8 +72,7 @@ #define MLXSW_SP_CELLS_TO_BYTES(c) (c * MLXSW_SP_BYTES_PER_CELL) #define MLXSW_SP_KVD_LINEAR_SIZE 65536 /* entries */ -#define MLXSW_SP_KVD_HASH_SINGLE_SIZE 163840 /* entries */ -#define MLXSW_SP_KVD_HASH_DOUBLE_SIZE 32768 /* entries */ +#define MLXSW_SP_KVD_GRANULARITY 128 /* Maximum delay buffer needed in case of PAUSE frames, in cells. * Assumes 100m cable and maximum MTU. @@ -253,7 +247,7 @@ struct mlxsw_sp_port_mall_tc_entry { struct mlxsw_sp_router { struct mlxsw_sp_lpm_tree lpm_trees[MLXSW_SP_LPM_TREE_COUNT]; - struct mlxsw_sp_vr vrs[MLXSW_SP_VIRTUAL_ROUTER_MAX]; + struct mlxsw_sp_vr *vrs; struct rhashtable neigh_ht; struct { struct delayed_work dw; @@ -263,6 +257,7 @@ struct mlxsw_sp_router { #define MLXSW_SP_UNRESOLVED_NH_PROBE_INTERVAL 5000 /* ms */ struct list_head nexthop_group_list; struct list_head nexthop_neighs_list; + bool aborted; }; struct mlxsw_sp { @@ -275,7 +270,7 @@ struct mlxsw_sp { DECLARE_BITMAP(mapped, MLXSW_SP_MID_MAX); } br_mids; struct list_head fids; /* VLAN-aware bridge FIDs */ - struct mlxsw_sp_rif *rifs[MLXSW_SP_RIF_MAX]; + struct mlxsw_sp_rif **rifs; struct mlxsw_sp_port **ports; struct mlxsw_core *core; const struct mlxsw_bus_info *bus_info; @@ -290,7 +285,7 @@ struct mlxsw_sp { #define MLXSW_SP_DEFAULT_AGEING_TIME 300 u32 ageing_time; struct mlxsw_sp_upper master_bridge; - struct mlxsw_sp_upper lags[MLXSW_SP_LAG_MAX]; + struct mlxsw_sp_upper *lags; u8 port_to_module[MLXSW_PORT_MAX_PORTS]; struct mlxsw_sp_sb sb; struct mlxsw_sp_router router; @@ -302,6 +297,7 @@ struct mlxsw_sp { struct mlxsw_sp_span_entry *entries; int entries_count; } span; + struct notifier_block fib_nb; }; static inline struct mlxsw_sp_upper * @@ -341,7 +337,8 @@ struct mlxsw_sp_port { } vport; struct { u8 tx_pause:1, - rx_pause:1; + rx_pause:1, + autoneg:1; } link; struct { struct ieee_ets *ets; @@ -360,6 +357,11 @@ struct mlxsw_sp_port { struct list_head vports_list; /* TC handles */ struct list_head mall_tc_list; + struct { + #define MLXSW_HW_STATS_UPDATE_TIME HZ + struct rtnl_link_stats64 *cache; + struct delayed_work update_dw; + } hw_stats; }; struct mlxsw_sp_port *mlxsw_sp_port_lower_dev_hold(struct net_device *dev); @@ -477,9 +479,12 @@ static inline struct mlxsw_sp_rif * mlxsw_sp_rif_find_by_dev(const struct mlxsw_sp *mlxsw_sp, const struct net_device *dev) { + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_RIF_MAX; i++) + resources = mlxsw_core_resources_get(mlxsw_sp->core); + + for (i = 0; i < resources->max_rif; i++) if (mlxsw_sp->rifs[i] && mlxsw_sp->rifs[i]->dev == dev) return mlxsw_sp->rifs[i]; @@ -558,6 +563,9 @@ int __mlxsw_sp_port_headroom_set(struct mlxsw_sp_port *mlxsw_sp_port, int mtu, int mlxsw_sp_port_ets_maxrate_set(struct mlxsw_sp_port *mlxsw_sp_port, enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index, u32 maxrate); +int __mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable); #ifdef CONFIG_MLXSW_SPECTRUM_DCB @@ -578,11 +586,6 @@ static inline void mlxsw_sp_port_dcb_fini(struct mlxsw_sp_port *mlxsw_sp_port) int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp); void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp); -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4); int mlxsw_sp_router_neigh_construct(struct net_device *dev, struct neighbour *n); void mlxsw_sp_router_neigh_destroy(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c index 953b214f38d0..bcaed8a38037 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c @@ -595,9 +595,9 @@ int mlxsw_sp_sb_pool_get(struct mlxsw_core *mlxsw_core, enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); struct mlxsw_sp_sb_pr *pr = mlxsw_sp_sb_pr_get(mlxsw_sp, pool, dir); - pool_info->pool_type = dir; + pool_info->pool_type = (enum devlink_sb_pool_type) dir; pool_info->size = MLXSW_SP_CELLS_TO_BYTES(pr->size); - pool_info->threshold_type = pr->mode; + pool_info->threshold_type = (enum devlink_sb_threshold_type) pr->mode; return 0; } @@ -608,9 +608,10 @@ int mlxsw_sp_sb_pool_set(struct mlxsw_core *mlxsw_core, struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core); u8 pool = pool_get(pool_index); enum mlxsw_reg_sbxx_dir dir = dir_get(pool_index); - enum mlxsw_reg_sbpr_mode mode = threshold_type; u32 pool_size = MLXSW_SP_BYTES_TO_CELLS(size); + enum mlxsw_reg_sbpr_mode mode; + mode = (enum mlxsw_reg_sbpr_mode) threshold_type; return mlxsw_sp_sb_pr_write(mlxsw_sp, pool, dir, mode, pool_size); } @@ -696,13 +697,13 @@ int mlxsw_sp_sb_tc_pool_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); *p_threshold = mlxsw_sp_sb_threshold_out(mlxsw_sp, cm->pool, dir, cm->max_buff); - *p_pool_index = pool_index_get(cm->pool, pool_type); + *p_pool_index = pool_index_get(cm->pool, dir); return 0; } @@ -716,7 +717,7 @@ int mlxsw_sp_sb_tc_pool_bind_set(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; u8 pool = pool_get(pool_index); u32 max_buff; int err; @@ -943,7 +944,7 @@ int mlxsw_sp_sb_occ_tc_port_bind_get(struct mlxsw_core_port *mlxsw_core_port, struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; u8 local_port = mlxsw_sp_port->local_port; u8 pg_buff = tc_index; - enum mlxsw_reg_sbxx_dir dir = pool_type; + enum mlxsw_reg_sbxx_dir dir = (enum mlxsw_reg_sbxx_dir) pool_type; struct mlxsw_sp_sb_cm *cm = mlxsw_sp_sb_cm_get(mlxsw_sp, local_port, pg_buff, dir); diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 3f5c51da6d3e..78fc557d6dd7 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -43,6 +43,7 @@ #include #include #include +#include #include "spectrum.h" #include "core.h" @@ -122,17 +123,20 @@ struct mlxsw_sp_nexthop_group; struct mlxsw_sp_fib_entry { struct rhash_head ht_node; + struct list_head list; struct mlxsw_sp_fib_key key; enum mlxsw_sp_fib_entry_type type; unsigned int ref_count; u16 rif; /* used for action local */ struct mlxsw_sp_vr *vr; + struct fib_info *fi; struct list_head nexthop_group_node; struct mlxsw_sp_nexthop_group *nh_group; }; struct mlxsw_sp_fib { struct rhashtable ht; + struct list_head entry_list; unsigned long prefix_ref_count[MLXSW_SP_PREFIX_COUNT]; struct mlxsw_sp_prefix_usage prefix_usage; }; @@ -154,6 +158,7 @@ static int mlxsw_sp_fib_entry_insert(struct mlxsw_sp_fib *fib, mlxsw_sp_fib_ht_params); if (err) return err; + list_add_tail(&fib_entry->list, &fib->entry_list); if (fib->prefix_ref_count[prefix_len]++ == 0) mlxsw_sp_prefix_usage_set(&fib->prefix_usage, prefix_len); return 0; @@ -166,6 +171,7 @@ static void mlxsw_sp_fib_entry_remove(struct mlxsw_sp_fib *fib, if (--fib->prefix_ref_count[prefix_len] == 0) mlxsw_sp_prefix_usage_clear(&fib->prefix_usage, prefix_len); + list_del(&fib_entry->list); rhashtable_remove_fast(&fib->ht, &fib_entry->ht_node, mlxsw_sp_fib_ht_params); } @@ -216,6 +222,7 @@ static struct mlxsw_sp_fib *mlxsw_sp_fib_create(void) err = rhashtable_init(&fib->ht, &mlxsw_sp_fib_ht_params); if (err) goto err_rhashtable_init; + INIT_LIST_HEAD(&fib->entry_list); return fib; err_rhashtable_init: @@ -252,7 +259,9 @@ static int mlxsw_sp_lpm_tree_alloc(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, true, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, true, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -261,7 +270,9 @@ static int mlxsw_sp_lpm_tree_free(struct mlxsw_sp *mlxsw_sp, { char ralta_pl[MLXSW_REG_RALTA_LEN]; - mlxsw_reg_ralta_pack(ralta_pl, false, lpm_tree->proto, lpm_tree->id); + mlxsw_reg_ralta_pack(ralta_pl, false, + (enum mlxsw_reg_ralxx_protocol) lpm_tree->proto, + lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); } @@ -368,10 +379,12 @@ static void mlxsw_sp_lpm_init(struct mlxsw_sp *mlxsw_sp) static struct mlxsw_sp_vr *mlxsw_sp_vr_find_unused(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (!vr->used) return vr; @@ -384,7 +397,9 @@ static int mlxsw_sp_vr_lpm_tree_bind(struct mlxsw_sp *mlxsw_sp, { char raltb_pl[MLXSW_REG_RALTB_LEN]; - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, vr->lpm_tree->id); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, + vr->lpm_tree->id); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -394,7 +409,8 @@ static int mlxsw_sp_vr_lpm_tree_unbind(struct mlxsw_sp *mlxsw_sp, char raltb_pl[MLXSW_REG_RALTB_LEN]; /* Bind to tree 0 which is default */ - mlxsw_reg_raltb_pack(raltb_pl, vr->id, vr->proto, 0); + mlxsw_reg_raltb_pack(raltb_pl, vr->id, + (enum mlxsw_reg_ralxx_protocol) vr->proto, 0); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); } @@ -410,11 +426,14 @@ static struct mlxsw_sp_vr *mlxsw_sp_vr_find(struct mlxsw_sp *mlxsw_sp, u32 tb_id, enum mlxsw_sp_l3proto proto) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; tb_id = mlxsw_sp_fix_tb_id(tb_id); - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; if (vr->used && vr->proto == proto && vr->tb_id == tb_id) return vr; @@ -548,15 +567,33 @@ static void mlxsw_sp_vr_put(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_vr *vr) &vr->fib->prefix_usage); } -static void mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) +static int mlxsw_sp_vrs_init(struct mlxsw_sp *mlxsw_sp) { + struct mlxsw_resources *resources; struct mlxsw_sp_vr *vr; int i; - for (i = 0; i < MLXSW_SP_VIRTUAL_ROUTER_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_virtual_routers_valid) + return -EIO; + + mlxsw_sp->router.vrs = kcalloc(resources->max_virtual_routers, + sizeof(struct mlxsw_sp_vr), + GFP_KERNEL); + if (!mlxsw_sp->router.vrs) + return -ENOMEM; + + for (i = 0; i < resources->max_virtual_routers; i++) { vr = &mlxsw_sp->router.vrs[i]; vr->id = i; } + + return 0; +} + +static void mlxsw_sp_vrs_fini(struct mlxsw_sp *mlxsw_sp) +{ + kfree(mlxsw_sp->router.vrs); } struct mlxsw_sp_neigh_key { @@ -1081,9 +1118,10 @@ static int mlxsw_sp_adj_index_mass_update_vr(struct mlxsw_sp *mlxsw_sp, { char raleu_pl[MLXSW_REG_RALEU_LEN]; - mlxsw_reg_raleu_pack(raleu_pl, vr->proto, vr->id, - adj_index, ecmp_size, - new_adj_index, new_ecmp_size); + mlxsw_reg_raleu_pack(raleu_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, vr->id, + adj_index, ecmp_size, new_adj_index, + new_ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raleu), raleu_pl); } @@ -1489,50 +1527,6 @@ static void mlxsw_sp_nexthop_group_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_nexthop_group_destroy(mlxsw_sp, nh_grp); } -static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - - mlxsw_reg_rgcr_pack(rgcr_pl, true); - mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, MLXSW_SP_RIF_MAX); - return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); -} - -static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - char rgcr_pl[MLXSW_REG_RGCR_LEN]; - - mlxsw_reg_rgcr_pack(rgcr_pl, false); - mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); -} - -int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) -{ - int err; - - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); - INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); - err = __mlxsw_sp_router_init(mlxsw_sp); - if (err) - return err; - mlxsw_sp_lpm_init(mlxsw_sp); - mlxsw_sp_vrs_init(mlxsw_sp); - err = mlxsw_sp_neigh_init(mlxsw_sp); - if (err) - goto err_neigh_init; - return 0; - -err_neigh_init: - __mlxsw_sp_router_fini(mlxsw_sp); - return err; -} - -void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) -{ - mlxsw_sp_neigh_fini(mlxsw_sp); - __mlxsw_sp_router_fini(mlxsw_sp); -} - static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry, enum mlxsw_reg_ralue_op op) @@ -1558,8 +1552,9 @@ static int mlxsw_sp_fib_entry_op4_remote(struct mlxsw_sp *mlxsw_sp, trap_id = MLXSW_TRAP_ID_RTR_INGRESS0; } - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_remote_pack(ralue_pl, trap_action, trap_id, adjacency_index, ecmp_size); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); @@ -1573,8 +1568,9 @@ static int mlxsw_sp_fib_entry_op4_local(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_local_pack(ralue_pl, MLXSW_REG_RALUE_TRAP_ACTION_NOP, 0, fib_entry->rif); @@ -1589,8 +1585,9 @@ static int mlxsw_sp_fib_entry_op4_trap(struct mlxsw_sp *mlxsw_sp, u32 *p_dip = (u32 *) fib_entry->key.addr; struct mlxsw_sp_vr *vr = fib_entry->vr; - mlxsw_reg_ralue_pack4(ralue_pl, vr->proto, op, vr->id, - fib_entry->key.prefix_len, *p_dip); + mlxsw_reg_ralue_pack4(ralue_pl, + (enum mlxsw_reg_ralxx_protocol) vr->proto, op, + vr->id, fib_entry->key.prefix_len, *p_dip); mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); } @@ -1637,94 +1634,102 @@ static int mlxsw_sp_fib_entry_del(struct mlxsw_sp *mlxsw_sp, MLXSW_REG_RALUE_OP_WRITE_DELETE); } -struct mlxsw_sp_router_fib4_add_info { - struct switchdev_trans_item tritem; - struct mlxsw_sp *mlxsw_sp; - struct mlxsw_sp_fib_entry *fib_entry; -}; - -static void mlxsw_sp_router_fib4_add_info_destroy(void const *data) -{ - const struct mlxsw_sp_router_fib4_add_info *info = data; - struct mlxsw_sp_fib_entry *fib_entry = info->fib_entry; - struct mlxsw_sp *mlxsw_sp = info->mlxsw_sp; - struct mlxsw_sp_vr *vr = fib_entry->vr; - - mlxsw_sp_fib_entry_destroy(fib_entry); - mlxsw_sp_vr_put(mlxsw_sp, vr); - kfree(info); -} - static int mlxsw_sp_router_fib4_entry_init(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4, + const struct fib_entry_notifier_info *fen_info, struct mlxsw_sp_fib_entry *fib_entry) { - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; + struct mlxsw_sp_rif *r = NULL; + int nhsel; + int err; - if (fib4->type == RTN_LOCAL || fib4->type == RTN_BROADCAST) { + if (fen_info->type == RTN_LOCAL || fen_info->type == RTN_BROADCAST) { fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; } - if (fib4->type != RTN_UNICAST) + if (fen_info->type != RTN_UNICAST) return -EINVAL; - if (fi->fib_scope != RT_SCOPE_UNIVERSE) { - struct mlxsw_sp_rif *r; + for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { + const struct fib_nh *nh = &fi->fib_nh[nhsel]; - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; - r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, fi->fib_dev); - if (!r) - return -EINVAL; - fib_entry->rif = r->rif; + if (!nh->nh_dev) + continue; + r = mlxsw_sp_rif_find_by_dev(mlxsw_sp, nh->nh_dev); + if (!r) { + /* In case router interface is not found for + * at least one of the nexthops, that means + * the nexthop points to some device unrelated + * to us. Set trap and pass the packets for + * this prefix to kernel. + */ + break; + } + } + + if (!r) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_TRAP; return 0; } - fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; - return mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + + if (fi->fib_scope != RT_SCOPE_UNIVERSE) { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_LOCAL; + fib_entry->rif = r->rif; + } else { + fib_entry->type = MLXSW_SP_FIB_ENTRY_TYPE_REMOTE; + err = mlxsw_sp_nexthop_group_get(mlxsw_sp, fib_entry, fi); + if (err) + return err; + } + fib_info_offload_inc(fen_info->fi); + return 0; } static void mlxsw_sp_router_fib4_entry_fini(struct mlxsw_sp *mlxsw_sp, struct mlxsw_sp_fib_entry *fib_entry) { - if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) - return; - mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); + if (fib_entry->type != MLXSW_SP_FIB_ENTRY_TYPE_TRAP) + fib_info_offload_dec(fib_entry->fi); + if (fib_entry->type == MLXSW_SP_FIB_ENTRY_TYPE_REMOTE) + mlxsw_sp_nexthop_group_put(mlxsw_sp, fib_entry); } static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_get(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_fib_entry *fib_entry; - struct fib_info *fi = fib4->fi; + struct fib_info *fi = fen_info->fi; struct mlxsw_sp_vr *vr; int err; - vr = mlxsw_sp_vr_get(mlxsw_sp, fib4->dst_len, fib4->tb_id, + vr = mlxsw_sp_vr_get(mlxsw_sp, fen_info->dst_len, fen_info->tb_id, MLXSW_SP_L3_PROTO_IPV4); if (IS_ERR(vr)) return ERR_CAST(vr); - fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (fib_entry) { /* Already exists, just take a reference */ fib_entry->ref_count++; return fib_entry; } - fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fib4->dst, - sizeof(fib4->dst), - fib4->dst_len, fi->fib_dev); + fib_entry = mlxsw_sp_fib_entry_create(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, fi->fib_dev); if (!fib_entry) { err = -ENOMEM; goto err_fib_entry_create; } fib_entry->vr = vr; + fib_entry->fi = fi; fib_entry->ref_count = 1; - err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fib4, fib_entry); + err = mlxsw_sp_router_fib4_entry_init(mlxsw_sp, fen_info, fib_entry); if (err) goto err_fib4_entry_init; @@ -1740,21 +1745,23 @@ err_fib_entry_create: static struct mlxsw_sp_fib_entry * mlxsw_sp_fib_entry_find(struct mlxsw_sp *mlxsw_sp, - const struct switchdev_obj_ipv4_fib *fib4) + const struct fib_entry_notifier_info *fen_info) { struct mlxsw_sp_vr *vr; - vr = mlxsw_sp_vr_find(mlxsw_sp, fib4->tb_id, MLXSW_SP_L3_PROTO_IPV4); + vr = mlxsw_sp_vr_find(mlxsw_sp, fen_info->tb_id, + MLXSW_SP_L3_PROTO_IPV4); if (!vr) return NULL; - return mlxsw_sp_fib_entry_lookup(vr->fib, &fib4->dst, - sizeof(fib4->dst), fib4->dst_len, - fib4->fi->fib_dev); + return mlxsw_sp_fib_entry_lookup(vr->fib, &fen_info->dst, + sizeof(fen_info->dst), + fen_info->dst_len, + fen_info->fi->fib_dev); } -void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, - struct mlxsw_sp_fib_entry *fib_entry) +static void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { struct mlxsw_sp_vr *vr = fib_entry->vr; @@ -1765,60 +1772,43 @@ void mlxsw_sp_fib_entry_put(struct mlxsw_sp *mlxsw_sp, mlxsw_sp_vr_put(mlxsw_sp, vr); } -static int -mlxsw_sp_router_fib4_add_prepare(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static void mlxsw_sp_fib_entry_put_all(struct mlxsw_sp *mlxsw_sp, + struct mlxsw_sp_fib_entry *fib_entry) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; - struct mlxsw_sp_fib_entry *fib_entry; - int err; + unsigned int last_ref_count; - fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fib4); - if (IS_ERR(fib_entry)) - return PTR_ERR(fib_entry); - - info = kmalloc(sizeof(*info), GFP_KERNEL); - if (!info) { - err = -ENOMEM; - goto err_alloc_info; - } - info->mlxsw_sp = mlxsw_sp; - info->fib_entry = fib_entry; - switchdev_trans_item_enqueue(trans, info, - mlxsw_sp_router_fib4_add_info_destroy, - &info->tritem); - return 0; - -err_alloc_info: - mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); - return err; + do { + last_ref_count = fib_entry->ref_count; + mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); + } while (last_ref_count != 1); } -static int -mlxsw_sp_router_fib4_add_commit(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_add(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; - struct mlxsw_sp_router_fib4_add_info *info; struct mlxsw_sp_fib_entry *fib_entry; struct mlxsw_sp_vr *vr; int err; - info = switchdev_trans_item_dequeue(trans); - fib_entry = info->fib_entry; - kfree(info); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_get(mlxsw_sp, fen_info); + if (IS_ERR(fib_entry)) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to get FIB4 entry being added.\n"); + return PTR_ERR(fib_entry); + } if (fib_entry->ref_count != 1) return 0; vr = fib_entry->vr; err = mlxsw_sp_fib_entry_insert(vr->fib, fib_entry); - if (err) + if (err) { + dev_warn(mlxsw_sp->bus_info->dev, "Failed to insert FIB4 entry being added.\n"); goto err_fib_entry_insert; - err = mlxsw_sp_fib_entry_update(mlxsw_sp_port->mlxsw_sp, fib_entry); + } + err = mlxsw_sp_fib_entry_update(mlxsw_sp, fib_entry); if (err) goto err_fib_entry_add; return 0; @@ -1830,24 +1820,15 @@ err_fib_entry_insert: return err; } -int mlxsw_sp_router_fib4_add(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) +static int mlxsw_sp_router_fib4_del(struct mlxsw_sp *mlxsw_sp, + struct fib_entry_notifier_info *fen_info) { - if (switchdev_trans_ph_prepare(trans)) - return mlxsw_sp_router_fib4_add_prepare(mlxsw_sp_port, - fib4, trans); - return mlxsw_sp_router_fib4_add_commit(mlxsw_sp_port, - fib4, trans); -} - -int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; struct mlxsw_sp_fib_entry *fib_entry; - fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fib4); + if (mlxsw_sp->router.aborted) + return 0; + + fib_entry = mlxsw_sp_fib_entry_find(mlxsw_sp, fen_info); if (!fib_entry) { dev_warn(mlxsw_sp->bus_info->dev, "Failed to find FIB4 entry being removed.\n"); return -ENOENT; @@ -1861,3 +1842,172 @@ int mlxsw_sp_router_fib4_del(struct mlxsw_sp_port *mlxsw_sp_port, mlxsw_sp_fib_entry_put(mlxsw_sp, fib_entry); return 0; } + +static int mlxsw_sp_router_set_abort_trap(struct mlxsw_sp *mlxsw_sp) +{ + char ralta_pl[MLXSW_REG_RALTA_LEN]; + char ralst_pl[MLXSW_REG_RALST_LEN]; + char raltb_pl[MLXSW_REG_RALTB_LEN]; + char ralue_pl[MLXSW_REG_RALUE_LEN]; + int err; + + mlxsw_reg_ralta_pack(ralta_pl, true, MLXSW_REG_RALXX_PROTOCOL_IPV4, + MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralta), ralta_pl); + if (err) + return err; + + mlxsw_reg_ralst_pack(ralst_pl, 0xff, MLXSW_SP_LPM_TREE_MIN); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralst), ralst_pl); + if (err) + return err; + + mlxsw_reg_raltb_pack(raltb_pl, 0, MLXSW_REG_RALXX_PROTOCOL_IPV4, 0); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(raltb), raltb_pl); + if (err) + return err; + + mlxsw_reg_ralue_pack4(ralue_pl, MLXSW_SP_L3_PROTO_IPV4, + MLXSW_REG_RALUE_OP_WRITE_WRITE, 0, 0, 0); + mlxsw_reg_ralue_act_ip2me_pack(ralue_pl); + return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(ralue), ralue_pl); +} + +static void mlxsw_sp_router_fib4_abort(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + struct mlxsw_sp_fib_entry *fib_entry; + struct mlxsw_sp_fib_entry *tmp; + struct mlxsw_sp_vr *vr; + int i; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_virtual_routers; i++) { + vr = &mlxsw_sp->router.vrs[i]; + if (!vr->used) + continue; + + list_for_each_entry_safe(fib_entry, tmp, + &vr->fib->entry_list, list) { + bool do_break = &tmp->list == &vr->fib->entry_list; + + mlxsw_sp_fib_entry_del(mlxsw_sp, fib_entry); + mlxsw_sp_fib_entry_remove(fib_entry->vr->fib, + fib_entry); + mlxsw_sp_fib_entry_put_all(mlxsw_sp, fib_entry); + if (do_break) + break; + } + } + mlxsw_sp->router.aborted = true; + err = mlxsw_sp_router_set_abort_trap(mlxsw_sp); + if (err) + dev_warn(mlxsw_sp->bus_info->dev, "Failed to set abort trap.\n"); +} + +static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int err; + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + if (!resources->max_rif_valid) + return -EIO; + + mlxsw_sp->rifs = kcalloc(resources->max_rif, + sizeof(struct mlxsw_sp_rif *), GFP_KERNEL); + if (!mlxsw_sp->rifs) + return -ENOMEM; + + mlxsw_reg_rgcr_pack(rgcr_pl, true); + mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, resources->max_rif); + err = mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + if (err) + goto err_rgcr_fail; + + return 0; + +err_rgcr_fail: + kfree(mlxsw_sp->rifs); + return err; +} + +static void __mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + struct mlxsw_resources *resources; + char rgcr_pl[MLXSW_REG_RGCR_LEN]; + int i; + + mlxsw_reg_rgcr_pack(rgcr_pl, false); + mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(rgcr), rgcr_pl); + + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_rif; i++) + WARN_ON_ONCE(mlxsw_sp->rifs[i]); + + kfree(mlxsw_sp->rifs); +} + +static int mlxsw_sp_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlxsw_sp *mlxsw_sp = container_of(nb, struct mlxsw_sp, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = mlxsw_sp_router_fib4_add(mlxsw_sp, fen_info); + if (err) + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + case FIB_EVENT_ENTRY_DEL: + mlxsw_sp_router_fib4_del(mlxsw_sp, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + mlxsw_sp_router_fib4_abort(mlxsw_sp); + break; + } + return NOTIFY_DONE; +} + +int mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp) +{ + int err; + + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_neighs_list); + INIT_LIST_HEAD(&mlxsw_sp->router.nexthop_group_list); + err = __mlxsw_sp_router_init(mlxsw_sp); + if (err) + return err; + + mlxsw_sp_lpm_init(mlxsw_sp); + err = mlxsw_sp_vrs_init(mlxsw_sp); + if (err) + goto err_vrs_init; + + err = mlxsw_sp_neigh_init(mlxsw_sp); + if (err) + goto err_neigh_init; + + mlxsw_sp->fib_nb.notifier_call = mlxsw_sp_router_fib_event; + register_fib_notifier(&mlxsw_sp->fib_nb); + return 0; + +err_neigh_init: + mlxsw_sp_vrs_fini(mlxsw_sp); +err_vrs_init: + __mlxsw_sp_router_fini(mlxsw_sp); + return err; +} + +void mlxsw_sp_router_fini(struct mlxsw_sp *mlxsw_sp) +{ + unregister_fib_notifier(&mlxsw_sp->fib_nb); + mlxsw_sp_neigh_fini(mlxsw_sp); + mlxsw_sp_vrs_fini(mlxsw_sp); + __mlxsw_sp_router_fini(mlxsw_sp); +} diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c index 7b654c517b91..5e00c79e8133 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_switchdev.c @@ -254,12 +254,40 @@ int mlxsw_sp_vport_flood_set(struct mlxsw_sp_port *mlxsw_sp_vport, u16 fid, return __mlxsw_sp_port_flood_set(mlxsw_sp_vport, vfid, vfid, set, set); } +static int mlxsw_sp_port_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + bool set) +{ + u16 vid; + int err; + + if (mlxsw_sp_port_is_vport(mlxsw_sp_port)) { + vid = mlxsw_sp_vport_vid_get(mlxsw_sp_port); + + return __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + set); + } + + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) { + err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, + set); + if (err) + goto err_port_vid_learning_set; + } + + return 0; + +err_port_vid_learning_set: + for_each_set_bit(vid, mlxsw_sp_port->active_vlans, VLAN_N_VID) + __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, vid, !set); + return err; +} + static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, struct switchdev_trans *trans, unsigned long brport_flags) { + unsigned long learning = mlxsw_sp_port->learning ? BR_LEARNING : 0; unsigned long uc_flood = mlxsw_sp_port->uc_flood ? BR_FLOOD : 0; - bool set; int err; if (!mlxsw_sp_port->bridged) @@ -269,17 +297,30 @@ static int mlxsw_sp_port_attr_br_flags_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; if ((uc_flood ^ brport_flags) & BR_FLOOD) { - set = mlxsw_sp_port->uc_flood ? false : true; - err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, set); + err = mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, + !mlxsw_sp_port->uc_flood); if (err) return err; } + if ((learning ^ brport_flags) & BR_LEARNING) { + err = mlxsw_sp_port_learning_set(mlxsw_sp_port, + !mlxsw_sp_port->learning); + if (err) + goto err_port_learning_set; + } + mlxsw_sp_port->uc_flood = brport_flags & BR_FLOOD ? 1 : 0; mlxsw_sp_port->learning = brport_flags & BR_LEARNING ? 1 : 0; mlxsw_sp_port->learning_sync = brport_flags & BR_LEARNING_SYNC ? 1 : 0; return 0; + +err_port_learning_set: + if ((uc_flood ^ brport_flags) & BR_FLOOD) + mlxsw_sp_port_uc_flood_set(mlxsw_sp_port, + mlxsw_sp_port->uc_flood); + return err; } static int mlxsw_sp_ageing_set(struct mlxsw_sp *mlxsw_sp, u32 ageing_time) @@ -631,6 +672,27 @@ static int __mlxsw_sp_port_vlans_set(struct mlxsw_sp_port *mlxsw_sp_port, return 0; } +static int mlxsw_sp_port_vid_learning_set(struct mlxsw_sp_port *mlxsw_sp_port, + u16 vid_begin, u16 vid_end, + bool learn_enable) +{ + u16 vid, vid_e; + int err; + + for (vid = vid_begin; vid <= vid_end; + vid += MLXSW_REG_SPVMLR_REC_MAX_COUNT) { + vid_e = min((u16) (vid + MLXSW_REG_SPVMLR_REC_MAX_COUNT - 1), + vid_end); + + err = __mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid, + vid_e, learn_enable); + if (err) + return err; + } + + return 0; +} + static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end, bool flag_untagged, bool flag_pvid) @@ -671,6 +733,14 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, } } + err = mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + mlxsw_sp_port->learning); + if (err) { + netdev_err(dev, "Failed to set learning for VIDs %d-%d\n", + vid_begin, vid_end); + goto err_port_vid_learning_set; + } + /* Changing activity bits only if HW operation succeded */ for (vid = vid_begin; vid <= vid_end; vid++) { set_bit(vid, mlxsw_sp_port->active_vlans); @@ -693,6 +763,9 @@ static int __mlxsw_sp_port_vlans_add(struct mlxsw_sp_port *mlxsw_sp_port, err_port_stp_state_set: for (vid = vid_begin; vid <= vid_end; vid++) clear_bit(vid, mlxsw_sp_port->active_vlans); + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + false); +err_port_vid_learning_set: if (old_pvid != mlxsw_sp_port->pvid) mlxsw_sp_port_pvid_set(mlxsw_sp_port, old_pvid); err_port_pvid_set: @@ -971,11 +1044,6 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_add(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_add(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -997,29 +1065,20 @@ static int mlxsw_sp_port_obj_add(struct net_device *dev, static int __mlxsw_sp_port_vlans_del(struct mlxsw_sp_port *mlxsw_sp_port, u16 vid_begin, u16 vid_end) { - struct net_device *dev = mlxsw_sp_port->dev; u16 vid, pvid; - int err; if (!mlxsw_sp_port->bridged) return -EINVAL; - err = __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, - false, false); - if (err) { - netdev_err(dev, "Unable to del VIDs %d-%d\n", vid_begin, - vid_end); - return err; - } + mlxsw_sp_port_vid_learning_set(mlxsw_sp_port, vid_begin, vid_end, + false); pvid = mlxsw_sp_port->pvid; - if (pvid >= vid_begin && pvid <= vid_end) { - err = mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); - if (err) { - netdev_err(dev, "Unable to del PVID %d\n", pvid); - return err; - } - } + if (pvid >= vid_begin && pvid <= vid_end) + mlxsw_sp_port_pvid_set(mlxsw_sp_port, 0); + + __mlxsw_sp_port_vlans_set(mlxsw_sp_port, vid_begin, vid_end, false, + false); mlxsw_sp_port_fid_leave(mlxsw_sp_port, vid_begin, vid_end); @@ -1117,10 +1176,6 @@ static int mlxsw_sp_port_obj_del(struct net_device *dev, err = mlxsw_sp_port_vlans_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = mlxsw_sp_router_fib4_del(mlxsw_sp_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = mlxsw_sp_port_fdb_static_del(mlxsw_sp_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -1141,9 +1196,11 @@ static struct mlxsw_sp_port *mlxsw_sp_lag_rep_port(struct mlxsw_sp *mlxsw_sp, u16 lag_id) { struct mlxsw_sp_port *mlxsw_sp_port; + struct mlxsw_resources *resources; int i; - for (i = 0; i < MLXSW_SP_PORT_PER_LAG_MAX; i++) { + resources = mlxsw_core_resources_get(mlxsw_sp->core); + for (i = 0; i < resources->max_ports_in_lag; i++) { mlxsw_sp_port = mlxsw_sp_port_lagged_get(mlxsw_sp, lag_id, i); if (mlxsw_sp_port) return mlxsw_sp_port; @@ -1362,8 +1419,6 @@ static void mlxsw_sp_fdb_notify_mac_process(struct mlxsw_sp *mlxsw_sp, vid = fid; } - adding = adding && mlxsw_sp_port->learning; - do_fdb_op: err = mlxsw_sp_port_fdb_uc_op(mlxsw_sp, local_port, mac, fid, adding, true); @@ -1425,8 +1480,6 @@ static void mlxsw_sp_fdb_notify_mac_lag_process(struct mlxsw_sp *mlxsw_sp, vid = fid; } - adding = adding && mlxsw_sp_port->learning; - do_fdb_op: err = mlxsw_sp_port_fdb_uc_lag_op(mlxsw_sp, lag_id, mac, fid, lag_vid, adding, true); @@ -1492,20 +1545,18 @@ static void mlxsw_sp_fdb_notify_work(struct work_struct *work) mlxsw_sp = container_of(work, struct mlxsw_sp, fdb_notify.dw.work); rtnl_lock(); - do { - mlxsw_reg_sfn_pack(sfn_pl); - err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); - if (err) { - dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); - break; - } - num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); - for (i = 0; i < num_rec; i++) - mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); + mlxsw_reg_sfn_pack(sfn_pl); + err = mlxsw_reg_query(mlxsw_sp->core, MLXSW_REG(sfn), sfn_pl); + if (err) { + dev_err_ratelimited(mlxsw_sp->bus_info->dev, "Failed to get FDB notifications\n"); + goto out; + } + num_rec = mlxsw_reg_sfn_num_rec_get(sfn_pl); + for (i = 0; i < num_rec; i++) + mlxsw_sp_fdb_notify_rec_process(mlxsw_sp, sfn_pl, i); - } while (num_rec); +out: rtnl_unlock(); - kfree(sfn_pl); mlxsw_sp_fdb_notify_work_schedule(mlxsw_sp); } diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 377daa4d509c..c0c23e2f3275 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -997,7 +997,7 @@ static int mlxsw_sx_port_create(struct mlxsw_sx *mlxsw_sx, u8 local_port) /* Each packet needs to have a Tx header (metadata) on top all other * headers. */ - dev->hard_header_len += MLXSW_TXHDR_LEN; + dev->needed_headroom = MLXSW_TXHDR_LEN; err = mlxsw_sx_port_module_check(mlxsw_sx_port, &usable); if (err) { @@ -1512,10 +1512,6 @@ static void mlxsw_sx_fini(struct mlxsw_core *mlxsw_core) static struct mlxsw_config_profile mlxsw_sx_config_profile = { .used_max_vepa_channels = 1, .max_vepa_channels = 0, - .used_max_lag = 1, - .max_lag = 64, - .used_max_port_per_lag = 1, - .max_port_per_lag = 16, .used_max_mid = 1, .max_mid = 7000, .used_max_pgt = 1, diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c index eb807b0dc72a..569ade6cf85c 100644 --- a/drivers/net/ethernet/natsemi/ns83820.c +++ b/drivers/net/ethernet/natsemi/ns83820.c @@ -134,7 +134,7 @@ static int lnksts = 0; /* CFG_LNKSTS bit polarity */ /* tunables */ #define RX_BUF_SIZE 1500 /* 8192 */ -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define NS83820_VLAN_ACCEL_SUPPORT #endif diff --git a/drivers/net/ethernet/netronome/nfp/Makefile b/drivers/net/ethernet/netronome/nfp/Makefile index 68178819ff12..0efb2ba9a558 100644 --- a/drivers/net/ethernet/netronome/nfp/Makefile +++ b/drivers/net/ethernet/netronome/nfp/Makefile @@ -3,6 +3,13 @@ obj-$(CONFIG_NFP_NETVF) += nfp_netvf.o nfp_netvf-objs := \ nfp_net_common.o \ nfp_net_ethtool.o \ + nfp_net_offload.o \ nfp_netvf_main.o +ifeq ($(CONFIG_BPF_SYSCALL),y) +nfp_netvf-objs += \ + nfp_bpf_verifier.o \ + nfp_bpf_jit.o +endif + nfp_netvf-$(CONFIG_NFP_NET_DEBUG) += nfp_net_debugfs.o diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h new file mode 100644 index 000000000000..22484b6fd3e8 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -0,0 +1,233 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_ASM_H__ +#define __NFP_ASM_H__ 1 + +#include "nfp_bpf.h" + +#define REG_NONE 0 + +#define RE_REG_NO_DST 0x020 +#define RE_REG_IMM 0x020 +#define RE_REG_IMM_encode(x) \ + (RE_REG_IMM | ((x) & 0x1f) | (((x) & 0x60) << 1)) +#define RE_REG_IMM_MAX 0x07fULL +#define RE_REG_XFR 0x080 + +#define UR_REG_XFR 0x180 +#define UR_REG_NN 0x280 +#define UR_REG_NO_DST 0x300 +#define UR_REG_IMM UR_REG_NO_DST +#define UR_REG_IMM_encode(x) (UR_REG_IMM | (x)) +#define UR_REG_IMM_MAX 0x0ffULL + +#define OP_BR_BASE 0x0d800000020ULL +#define OP_BR_BASE_MASK 0x0f8000c3ce0ULL +#define OP_BR_MASK 0x0000000001fULL +#define OP_BR_EV_PIP 0x00000000300ULL +#define OP_BR_CSS 0x0000003c000ULL +#define OP_BR_DEFBR 0x00000300000ULL +#define OP_BR_ADDR_LO 0x007ffc00000ULL +#define OP_BR_ADDR_HI 0x10000000000ULL + +#define nfp_is_br(_insn) \ + (((_insn) & OP_BR_BASE_MASK) == OP_BR_BASE) + +enum br_mask { + BR_BEQ = 0x00, + BR_BNE = 0x01, + BR_BHS = 0x04, + BR_BLO = 0x05, + BR_BGE = 0x08, + BR_UNC = 0x18, +}; + +enum br_ev_pip { + BR_EV_PIP_UNCOND = 0, + BR_EV_PIP_COND = 1, +}; + +enum br_ctx_signal_state { + BR_CSS_NONE = 2, +}; + +#define OP_BBYTE_BASE 0x0c800000000ULL +#define OP_BB_A_SRC 0x000000000ffULL +#define OP_BB_BYTE 0x00000000300ULL +#define OP_BB_B_SRC 0x0000003fc00ULL +#define OP_BB_I8 0x00000040000ULL +#define OP_BB_EQ 0x00000080000ULL +#define OP_BB_DEFBR 0x00000300000ULL +#define OP_BB_ADDR_LO 0x007ffc00000ULL +#define OP_BB_ADDR_HI 0x10000000000ULL + +#define OP_BALU_BASE 0x0e800000000ULL +#define OP_BA_A_SRC 0x000000003ffULL +#define OP_BA_B_SRC 0x000000ffc00ULL +#define OP_BA_DEFBR 0x00000300000ULL +#define OP_BA_ADDR_HI 0x0007fc00000ULL + +#define OP_IMMED_A_SRC 0x000000003ffULL +#define OP_IMMED_B_SRC 0x000000ffc00ULL +#define OP_IMMED_IMM 0x0000ff00000ULL +#define OP_IMMED_WIDTH 0x00060000000ULL +#define OP_IMMED_INV 0x00080000000ULL +#define OP_IMMED_SHIFT 0x00600000000ULL +#define OP_IMMED_BASE 0x0f000000000ULL +#define OP_IMMED_WR_AB 0x20000000000ULL + +enum immed_width { + IMMED_WIDTH_ALL = 0, + IMMED_WIDTH_BYTE = 1, + IMMED_WIDTH_WORD = 2, +}; + +enum immed_shift { + IMMED_SHIFT_0B = 0, + IMMED_SHIFT_1B = 1, + IMMED_SHIFT_2B = 2, +}; + +#define OP_SHF_BASE 0x08000000000ULL +#define OP_SHF_A_SRC 0x000000000ffULL +#define OP_SHF_SC 0x00000000300ULL +#define OP_SHF_B_SRC 0x0000003fc00ULL +#define OP_SHF_I8 0x00000040000ULL +#define OP_SHF_SW 0x00000080000ULL +#define OP_SHF_DST 0x0000ff00000ULL +#define OP_SHF_SHIFT 0x001f0000000ULL +#define OP_SHF_OP 0x00e00000000ULL +#define OP_SHF_DST_AB 0x01000000000ULL +#define OP_SHF_WR_AB 0x20000000000ULL + +enum shf_op { + SHF_OP_NONE = 0, + SHF_OP_AND = 2, + SHF_OP_OR = 5, +}; + +enum shf_sc { + SHF_SC_R_ROT = 0, + SHF_SC_R_SHF = 1, + SHF_SC_L_SHF = 2, + SHF_SC_R_DSHF = 3, +}; + +#define OP_ALU_A_SRC 0x000000003ffULL +#define OP_ALU_B_SRC 0x000000ffc00ULL +#define OP_ALU_DST 0x0003ff00000ULL +#define OP_ALU_SW 0x00040000000ULL +#define OP_ALU_OP 0x00f80000000ULL +#define OP_ALU_DST_AB 0x01000000000ULL +#define OP_ALU_BASE 0x0a000000000ULL +#define OP_ALU_WR_AB 0x20000000000ULL + +enum alu_op { + ALU_OP_NONE = 0x00, + ALU_OP_ADD = 0x01, + ALU_OP_NEG = 0x04, + ALU_OP_AND = 0x08, + ALU_OP_SUB_C = 0x0d, + ALU_OP_ADD_C = 0x11, + ALU_OP_OR = 0x14, + ALU_OP_SUB = 0x15, + ALU_OP_XOR = 0x18, +}; + +enum alu_dst_ab { + ALU_DST_A = 0, + ALU_DST_B = 1, +}; + +#define OP_LDF_BASE 0x0c000000000ULL +#define OP_LDF_A_SRC 0x000000000ffULL +#define OP_LDF_SC 0x00000000300ULL +#define OP_LDF_B_SRC 0x0000003fc00ULL +#define OP_LDF_I8 0x00000040000ULL +#define OP_LDF_SW 0x00000080000ULL +#define OP_LDF_ZF 0x00000100000ULL +#define OP_LDF_BMASK 0x0000f000000ULL +#define OP_LDF_SHF 0x001f0000000ULL +#define OP_LDF_WR_AB 0x20000000000ULL + +#define OP_CMD_A_SRC 0x000000000ffULL +#define OP_CMD_CTX 0x00000000300ULL +#define OP_CMD_B_SRC 0x0000003fc00ULL +#define OP_CMD_TOKEN 0x000000c0000ULL +#define OP_CMD_XFER 0x00001f00000ULL +#define OP_CMD_CNT 0x0000e000000ULL +#define OP_CMD_SIG 0x000f0000000ULL +#define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_MODE 0x1c0000000000ULL + +struct cmd_tgt_act { + u8 token; + u8 tgt_cmd; +}; + +enum cmd_tgt_map { + CMD_TGT_READ8, + CMD_TGT_WRITE8, + CMD_TGT_READ_LE, + CMD_TGT_READ_SWAP_LE, + __CMD_TGT_MAP_SIZE, +}; + +enum cmd_mode { + CMD_MODE_40b_AB = 0, + CMD_MODE_40b_BA = 1, + CMD_MODE_32b = 4, +}; + +enum cmd_ctx_swap { + CMD_CTX_SWAP = 0, + CMD_CTX_NO_SWAP = 3, +}; + +#define OP_LCSR_BASE 0x0fc00000000ULL +#define OP_LCSR_A_SRC 0x000000003ffULL +#define OP_LCSR_B_SRC 0x000000ffc00ULL +#define OP_LCSR_WRITE 0x00000200000ULL +#define OP_LCSR_ADDR 0x001ffc00000ULL + +enum lcsr_wr_src { + LCSR_WR_AREG, + LCSR_WR_BREG, + LCSR_WR_IMM, +}; + +#define OP_CARB_BASE 0x0e000000000ULL +#define OP_CARB_OR 0x00000010000ULL + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf.h b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h new file mode 100644 index 000000000000..87aa8a3e9112 --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf.h @@ -0,0 +1,202 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __NFP_BPF_H__ +#define __NFP_BPF_H__ 1 + +#include +#include +#include +#include + +#define FIELD_FIT(mask, val) (!((((u64)val) << __bf_shf(mask)) & ~(mask))) + +/* For branch fixup logic use up-most byte of branch instruction as scratch + * area. Remember to clear this before sending instructions to HW! + */ +#define OP_BR_SPECIAL 0xff00000000000000ULL + +enum br_special { + OP_BR_NORMAL = 0, + OP_BR_GO_OUT, + OP_BR_GO_ABORT, +}; + +enum static_regs { + STATIC_REG_PKT = 1, +#define REG_PKT_BANK ALU_DST_A + STATIC_REG_IMM = 2, /* Bank AB */ +}; + +enum nfp_bpf_action_type { + NN_ACT_TC_DROP, + NN_ACT_TC_REDIR, + NN_ACT_DIRECT, +}; + +/* Software register representation, hardware encoding in asm.h */ +#define NN_REG_TYPE GENMASK(31, 24) +#define NN_REG_VAL GENMASK(7, 0) + +enum nfp_bpf_reg_type { + NN_REG_GPR_A = BIT(0), + NN_REG_GPR_B = BIT(1), + NN_REG_NNR = BIT(2), + NN_REG_XFER = BIT(3), + NN_REG_IMM = BIT(4), + NN_REG_NONE = BIT(5), +}; + +#define NN_REG_GPR_BOTH (NN_REG_GPR_A | NN_REG_GPR_B) + +#define reg_both(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_BOTH)) +#define reg_a(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_A)) +#define reg_b(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_GPR_B)) +#define reg_nnr(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_NNR)) +#define reg_xfer(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_XFER)) +#define reg_imm(x) ((x) | FIELD_PREP(NN_REG_TYPE, NN_REG_IMM)) +#define reg_none() (FIELD_PREP(NN_REG_TYPE, NN_REG_NONE)) + +#define pkt_reg(np) reg_a((np)->regs_per_thread - STATIC_REG_PKT) +#define imm_a(np) reg_a((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_b(np) reg_b((np)->regs_per_thread - STATIC_REG_IMM) +#define imm_both(np) reg_both((np)->regs_per_thread - STATIC_REG_IMM) + +#define NFP_BPF_ABI_FLAGS reg_nnr(0) +#define NFP_BPF_ABI_FLAG_MARK 1 +#define NFP_BPF_ABI_MARK reg_nnr(1) +#define NFP_BPF_ABI_PKT reg_nnr(2) +#define NFP_BPF_ABI_LEN reg_nnr(3) + +struct nfp_prog; +struct nfp_insn_meta; +typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); + +#define nfp_prog_first_meta(nfp_prog) \ + list_first_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_prog_last_meta(nfp_prog) \ + list_last_entry(&(nfp_prog)->insns, struct nfp_insn_meta, l) +#define nfp_meta_next(meta) list_next_entry(meta, l) +#define nfp_meta_prev(meta) list_prev_entry(meta, l) + +/** + * struct nfp_insn_meta - BPF instruction wrapper + * @insn: BPF instruction + * @off: index of first generated machine instruction (in nfp_prog.prog) + * @n: eBPF instruction number + * @skip: skip this instruction (optimized out) + * @double_cb: callback for second part of the instruction + * @l: link on nfp_prog->insns list + */ +struct nfp_insn_meta { + struct bpf_insn insn; + unsigned int off; + unsigned short n; + bool skip; + instr_cb_t double_cb; + + struct list_head l; +}; + +#define BPF_SIZE_MASK 0x18 + +static inline u8 mbpf_class(const struct nfp_insn_meta *meta) +{ + return BPF_CLASS(meta->insn.code); +} + +static inline u8 mbpf_src(const struct nfp_insn_meta *meta) +{ + return BPF_SRC(meta->insn.code); +} + +static inline u8 mbpf_op(const struct nfp_insn_meta *meta) +{ + return BPF_OP(meta->insn.code); +} + +static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) +{ + return BPF_MODE(meta->insn.code); +} + +/** + * struct nfp_prog - nfp BPF program + * @prog: machine code + * @prog_len: number of valid instructions in @prog array + * @__prog_alloc_len: alloc size of @prog array + * @act: BPF program/action type (TC DA, TC with action, XDP etc.) + * @num_regs: number of registers used by this program + * @regs_per_thread: number of basic registers allocated per thread + * @start_off: address of the first instruction in the memory + * @tgt_out: jump target for normal exit + * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) + * @tgt_done: jump target to get the next packet + * @n_translated: number of successfully translated instructions (for errors) + * @error: error code if something went wrong + * @insns: list of BPF instruction wrappers (struct nfp_insn_meta) + */ +struct nfp_prog { + u64 *prog; + unsigned int prog_len; + unsigned int __prog_alloc_len; + + enum nfp_bpf_action_type act; + + unsigned int num_regs; + unsigned int regs_per_thread; + + unsigned int start_off; + unsigned int tgt_out; + unsigned int tgt_abort; + unsigned int tgt_done; + + unsigned int n_translated; + int error; + + struct list_head insns; +}; + +struct nfp_bpf_result { + unsigned int n_instr; + bool dense_mode; +}; + +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog, enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res); + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog); + +#endif diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c new file mode 100644 index 000000000000..f8df5300f49c --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_jit.c @@ -0,0 +1,1813 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include +#include + +#include "nfp_asm.h" +#include "nfp_bpf.h" + +/* --- NFP prog --- */ +/* Foreach "multiple" entries macros provide pos and next pointers. + * It's safe to modify the next pointers (but not pos). + */ +#define nfp_for_each_insn_walk2(nfp_prog, pos, next) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos)) + +#define nfp_for_each_insn_walk3(nfp_prog, pos, next, next2) \ + for (pos = list_first_entry(&(nfp_prog)->insns, typeof(*pos), l), \ + next = list_next_entry(pos, l), \ + next2 = list_next_entry(next, l); \ + &(nfp_prog)->insns != &pos->l && \ + &(nfp_prog)->insns != &next->l && \ + &(nfp_prog)->insns != &next2->l; \ + pos = nfp_meta_next(pos), \ + next = nfp_meta_next(pos), \ + next2 = nfp_meta_next(next)) + +static bool +nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.next != &nfp_prog->insns; +} + +static bool +nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return meta->l.prev != &nfp_prog->insns; +} + +static void nfp_prog_free(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *tmp; + + list_for_each_entry_safe(meta, tmp, &nfp_prog->insns, l) { + list_del(&meta->l); + kfree(meta); + } + kfree(nfp_prog); +} + +static void nfp_prog_push(struct nfp_prog *nfp_prog, u64 insn) +{ + if (nfp_prog->__prog_alloc_len == nfp_prog->prog_len) { + nfp_prog->error = -ENOSPC; + return; + } + + nfp_prog->prog[nfp_prog->prog_len] = insn; + nfp_prog->prog_len++; +} + +static unsigned int nfp_prog_current_offset(struct nfp_prog *nfp_prog) +{ + return nfp_prog->start_off + nfp_prog->prog_len; +} + +static unsigned int +nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) +{ + return offset - nfp_prog->start_off; +} + +/* --- SW reg --- */ +struct nfp_insn_ur_regs { + enum alu_dst_ab dst_ab; + u16 dst; + u16 areg, breg; + bool swap; + bool wr_both; +}; + +struct nfp_insn_re_regs { + enum alu_dst_ab dst_ab; + u8 dst; + u8 areg, breg; + bool swap; + bool wr_both; + bool i8; +}; + +static u16 nfp_swreg_to_unreg(u32 swreg, bool is_dst) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_NNR: + return UR_REG_NN | val; + case NN_REG_XFER: + return UR_REG_XFR | val; + case NN_REG_IMM: + if (val & ~0xff) { + pr_err("immediate too large\n"); + return 0; + } + return UR_REG_IMM_encode(val); + case NN_REG_NONE: + return is_dst ? UR_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding %08x\n", swreg); + return 0; + } +} + +static int +swreg_to_unrestricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_ur_regs *reg) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_unreg(dst, true); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_unreg(rreg, false); + reg->breg = nfp_swreg_to_unreg(lreg, false); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_unreg(lreg, false); + reg->breg = nfp_swreg_to_unreg(rreg, false); + } + + return 0; +} + +static u16 nfp_swreg_to_rereg(u32 swreg, bool is_dst, bool has_imm8, bool *i8) +{ + u16 val = FIELD_GET(NN_REG_VAL, swreg); + + switch (FIELD_GET(NN_REG_TYPE, swreg)) { + case NN_REG_GPR_A: + case NN_REG_GPR_B: + case NN_REG_GPR_BOTH: + return val; + case NN_REG_XFER: + return RE_REG_XFR | val; + case NN_REG_IMM: + if (val & ~(0x7f | has_imm8 << 7)) { + pr_err("immediate too large\n"); + return 0; + } + *i8 = val & 0x80; + return RE_REG_IMM_encode(val & 0x7f); + case NN_REG_NONE: + return is_dst ? RE_REG_NO_DST : REG_NONE; + default: + pr_err("unrecognized reg encoding\n"); + return 0; + } +} + +static int +swreg_to_restricted(u32 dst, u32 lreg, u32 rreg, struct nfp_insn_re_regs *reg, + bool has_imm8) +{ + memset(reg, 0, sizeof(*reg)); + + /* Decode destination */ + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_B) + reg->dst_ab = ALU_DST_B; + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_GPR_BOTH) + reg->wr_both = true; + reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); + + /* Decode source operands */ + if (FIELD_GET(NN_REG_TYPE, lreg) == FIELD_GET(NN_REG_TYPE, rreg)) + return -EFAULT; + + if (FIELD_GET(NN_REG_TYPE, lreg) == NN_REG_GPR_B || + FIELD_GET(NN_REG_TYPE, rreg) == NN_REG_GPR_A) { + reg->areg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->swap = true; + } else { + reg->areg = nfp_swreg_to_rereg(lreg, false, has_imm8, ®->i8); + reg->breg = nfp_swreg_to_rereg(rreg, false, has_imm8, ®->i8); + } + + return 0; +} + +/* --- Emitters --- */ +static const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { + [CMD_TGT_WRITE8] = { 0x00, 0x42 }, + [CMD_TGT_READ8] = { 0x01, 0x43 }, + [CMD_TGT_READ_LE] = { 0x01, 0x40 }, + [CMD_TGT_READ_SWAP_LE] = { 0x03, 0x40 }, +}; + +static void +__emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) +{ + enum cmd_ctx_swap ctx; + u64 insn; + + if (sync) + ctx = CMD_CTX_SWAP; + else + ctx = CMD_CTX_NO_SWAP; + + insn = FIELD_PREP(OP_CMD_A_SRC, areg) | + FIELD_PREP(OP_CMD_CTX, ctx) | + FIELD_PREP(OP_CMD_B_SRC, breg) | + FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | + FIELD_PREP(OP_CMD_XFER, xfer) | + FIELD_PREP(OP_CMD_CNT, size) | + FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_MODE, mode); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, + u8 mode, u8 xfer, u32 lreg, u32 rreg, u8 size, bool sync) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), lreg, rreg, ®, false); + if (err) { + nfp_prog->error = err; + return; + } + if (reg.swap) { + pr_err("cmd can't swap arguments\n"); + nfp_prog->error = -EFAULT; + return; + } + + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); +} + +static void +__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, + enum br_ctx_signal_state css, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BR_BASE | + FIELD_PREP(OP_BR_MASK, mask) | + FIELD_PREP(OP_BR_EV_PIP, ev_pip) | + FIELD_PREP(OP_BR_CSS, css) | + FIELD_PREP(OP_BR_DEFBR, defer) | + FIELD_PREP(OP_BR_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void emit_br_def(struct nfp_prog *nfp_prog, u16 addr, u8 defer) +{ + if (defer > 2) { + pr_err("BUG: branch defer out of bounds %d\n", defer); + nfp_prog->error = -EFAULT; + return; + } + __emit_br(nfp_prog, BR_UNC, BR_EV_PIP_UNCOND, BR_CSS_NONE, addr, defer); +} + +static void +emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, u16 addr, u8 defer) +{ + __emit_br(nfp_prog, mask, + mask != BR_UNC ? BR_EV_PIP_COND : BR_EV_PIP_UNCOND, + BR_CSS_NONE, addr, defer); +} + +static void +__emit_br_byte(struct nfp_prog *nfp_prog, u8 areg, u8 breg, bool imm8, + u8 byte, bool equal, u16 addr, u8 defer) +{ + u16 addr_lo, addr_hi; + u64 insn; + + addr_lo = addr & (OP_BB_ADDR_LO >> __bf_shf(OP_BB_ADDR_LO)); + addr_hi = addr != addr_lo; + + insn = OP_BBYTE_BASE | + FIELD_PREP(OP_BB_A_SRC, areg) | + FIELD_PREP(OP_BB_BYTE, byte) | + FIELD_PREP(OP_BB_B_SRC, breg) | + FIELD_PREP(OP_BB_I8, imm8) | + FIELD_PREP(OP_BB_EQ, equal) | + FIELD_PREP(OP_BB_DEFBR, defer) | + FIELD_PREP(OP_BB_ADDR_LO, addr_lo) | + FIELD_PREP(OP_BB_ADDR_HI, addr_hi); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_br_byte_neq(struct nfp_prog *nfp_prog, + u32 dst, u8 imm, u8 byte, u16 addr, u8 defer) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, reg_imm(imm), ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_br_byte(nfp_prog, reg.areg, reg.breg, reg.i8, byte, false, addr, + defer); +} + +static void +__emit_immed(struct nfp_prog *nfp_prog, u16 areg, u16 breg, u16 imm_hi, + enum immed_width width, bool invert, + enum immed_shift shift, bool wr_both) +{ + u64 insn; + + insn = OP_IMMED_BASE | + FIELD_PREP(OP_IMMED_A_SRC, areg) | + FIELD_PREP(OP_IMMED_B_SRC, breg) | + FIELD_PREP(OP_IMMED_IMM, imm_hi) | + FIELD_PREP(OP_IMMED_WIDTH, width) | + FIELD_PREP(OP_IMMED_INV, invert) | + FIELD_PREP(OP_IMMED_SHIFT, shift) | + FIELD_PREP(OP_IMMED_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_immed(struct nfp_prog *nfp_prog, u32 dst, u16 imm, + enum immed_width width, bool invert, enum immed_shift shift) +{ + struct nfp_insn_ur_regs reg; + int err; + + if (FIELD_GET(NN_REG_TYPE, dst) == NN_REG_IMM) { + nfp_prog->error = -EFAULT; + return; + } + + err = swreg_to_unrestricted(dst, dst, reg_imm(imm & 0xff), ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, + invert, shift, reg.wr_both); +} + +static void +__emit_shf(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + enum shf_sc sc, u8 shift, + u16 areg, enum shf_op op, u16 breg, bool i8, bool sw, bool wr_both) +{ + u64 insn; + + if (!FIELD_FIT(OP_SHF_SHIFT, shift)) { + nfp_prog->error = -EFAULT; + return; + } + + if (sc == SHF_SC_L_SHF) + shift = 32 - shift; + + insn = OP_SHF_BASE | + FIELD_PREP(OP_SHF_A_SRC, areg) | + FIELD_PREP(OP_SHF_SC, sc) | + FIELD_PREP(OP_SHF_B_SRC, breg) | + FIELD_PREP(OP_SHF_I8, i8) | + FIELD_PREP(OP_SHF_SW, sw) | + FIELD_PREP(OP_SHF_DST, dst) | + FIELD_PREP(OP_SHF_SHIFT, shift) | + FIELD_PREP(OP_SHF_OP, op) | + FIELD_PREP(OP_SHF_DST_AB, dst_ab) | + FIELD_PREP(OP_SHF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_shf(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum shf_op op, u32 rreg, + enum shf_sc sc, u8 shift) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(dst, lreg, rreg, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_shf(nfp_prog, reg.dst, reg.dst_ab, sc, shift, + reg.areg, op, reg.breg, reg.i8, reg.swap, reg.wr_both); +} + +static void +__emit_alu(struct nfp_prog *nfp_prog, u16 dst, enum alu_dst_ab dst_ab, + u16 areg, enum alu_op op, u16 breg, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_ALU_BASE | + FIELD_PREP(OP_ALU_A_SRC, areg) | + FIELD_PREP(OP_ALU_B_SRC, breg) | + FIELD_PREP(OP_ALU_DST, dst) | + FIELD_PREP(OP_ALU_SW, swap) | + FIELD_PREP(OP_ALU_OP, op) | + FIELD_PREP(OP_ALU_DST_AB, dst_ab) | + FIELD_PREP(OP_ALU_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_alu(struct nfp_prog *nfp_prog, u32 dst, u32 lreg, enum alu_op op, u32 rreg) +{ + struct nfp_insn_ur_regs reg; + int err; + + err = swreg_to_unrestricted(dst, lreg, rreg, ®); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_alu(nfp_prog, reg.dst, reg.dst_ab, + reg.areg, op, reg.breg, reg.swap, reg.wr_both); +} + +static void +__emit_ld_field(struct nfp_prog *nfp_prog, enum shf_sc sc, + u8 areg, u8 bmask, u8 breg, u8 shift, bool imm8, + bool zero, bool swap, bool wr_both) +{ + u64 insn; + + insn = OP_LDF_BASE | + FIELD_PREP(OP_LDF_A_SRC, areg) | + FIELD_PREP(OP_LDF_SC, sc) | + FIELD_PREP(OP_LDF_B_SRC, breg) | + FIELD_PREP(OP_LDF_I8, imm8) | + FIELD_PREP(OP_LDF_SW, swap) | + FIELD_PREP(OP_LDF_ZF, zero) | + FIELD_PREP(OP_LDF_BMASK, bmask) | + FIELD_PREP(OP_LDF_SHF, shift) | + FIELD_PREP(OP_LDF_WR_AB, wr_both); + + nfp_prog_push(nfp_prog, insn); +} + +static void +emit_ld_field_any(struct nfp_prog *nfp_prog, enum shf_sc sc, u8 shift, + u32 dst, u8 bmask, u32 src, bool zero) +{ + struct nfp_insn_re_regs reg; + int err; + + err = swreg_to_restricted(reg_none(), dst, src, ®, true); + if (err) { + nfp_prog->error = err; + return; + } + + __emit_ld_field(nfp_prog, sc, reg.areg, bmask, reg.breg, shift, + reg.i8, zero, reg.swap, reg.wr_both); +} + +static void +emit_ld_field(struct nfp_prog *nfp_prog, u32 dst, u8 bmask, u32 src, + enum shf_sc sc, u8 shift) +{ + emit_ld_field_any(nfp_prog, sc, shift, dst, bmask, src, false); +} + +/* --- Wrappers --- */ +static bool pack_immed(u32 imm, u16 *val, enum immed_shift *shift) +{ + if (!(imm & 0xffff0000)) { + *val = imm; + *shift = IMMED_SHIFT_0B; + } else if (!(imm & 0xff0000ff)) { + *val = imm >> 8; + *shift = IMMED_SHIFT_1B; + } else if (!(imm & 0x0000ffff)) { + *val = imm >> 16; + *shift = IMMED_SHIFT_2B; + } else { + return false; + } + + return true; +} + +static void wrp_immed(struct nfp_prog *nfp_prog, u32 dst, u32 imm) +{ + enum immed_shift shift; + u16 val; + + if (pack_immed(imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, false, shift); + } else if (pack_immed(~imm, &val, &shift)) { + emit_immed(nfp_prog, dst, val, IMMED_WIDTH_ALL, true, shift); + } else { + emit_immed(nfp_prog, dst, imm & 0xffff, IMMED_WIDTH_ALL, + false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, dst, imm >> 16, IMMED_WIDTH_WORD, + false, IMMED_SHIFT_2B); + } +} + +/* ur_load_imm_any() - encode immediate or use tmp register (unrestricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 ur_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(UR_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +/* re_load_imm_any() - encode immediate or use tmp register (restricted) + * If the @imm is small enough encode it directly in operand and return + * otherwise load @imm to a spare register and return its encoding. + */ +static u32 re_load_imm_any(struct nfp_prog *nfp_prog, u32 imm, u32 tmp_reg) +{ + if (FIELD_FIT(RE_REG_IMM_MAX, imm)) + return reg_imm(imm); + + wrp_immed(nfp_prog, tmp_reg, imm); + return tmp_reg; +} + +static void +wrp_br_special(struct nfp_prog *nfp_prog, enum br_mask mask, + enum br_special special) +{ + emit_br(nfp_prog, mask, 0, 0); + + nfp_prog->prog[nfp_prog->prog_len - 1] |= + FIELD_PREP(OP_BR_SPECIAL, special); +} + +static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) +{ + emit_alu(nfp_prog, reg_both(dst), reg_none(), ALU_OP_NONE, reg_b(src)); +} + +static int +construct_data_ind_ld(struct nfp_prog *nfp_prog, u16 offset, + u16 src, bool src_valid, u8 size) +{ + unsigned int i; + u16 shift, sz; + u32 tmp_reg; + + /* We load the value from the address indicated in @offset and then + * shift out the data we don't need. Note: this is big endian! + */ + sz = size < 4 ? 4 : size; + shift = size < 4 ? 4 - size : 0; + + if (src_valid) { + /* Calculate the true offset (src_reg + imm) */ + tmp_reg = ur_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_both(nfp_prog), + reg_a(src), ALU_OP_ADD, tmp_reg); + /* Check packet length (size guaranteed to fit b/c it's u8) */ + emit_alu(nfp_prog, imm_a(nfp_prog), + imm_a(nfp_prog), ALU_OP_ADD, reg_imm(size)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, imm_a(nfp_prog)); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), imm_b(nfp_prog), sz - 1, true); + } else { + /* Check packet length */ + tmp_reg = ur_load_imm_any(nfp_prog, offset + size, + imm_a(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + NFP_BPF_ABI_LEN, ALU_OP_SUB, tmp_reg); + wrp_br_special(nfp_prog, BR_BLO, OP_BR_GO_ABORT); + /* Load data */ + tmp_reg = re_load_imm_any(nfp_prog, offset, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, + pkt_reg(nfp_prog), tmp_reg, sz - 1, true); + } + + i = 0; + if (shift) + emit_shf(nfp_prog, reg_both(0), reg_none(), SHF_OP_NONE, + reg_xfer(0), SHF_SC_R_SHF, shift * 8); + else + for (; i * 4 < size; i++) + emit_alu(nfp_prog, reg_both(i), + reg_none(), ALU_OP_NONE, reg_xfer(i)); + + if (i < 2) + wrp_immed(nfp_prog, reg_both(1), 0); + + return 0; +} + +static int construct_data_ld(struct nfp_prog *nfp_prog, u16 offset, u8 size) +{ + return construct_data_ind_ld(nfp_prog, offset, 0, false, size); +} + +static int wrp_set_mark(struct nfp_prog *nfp_prog, u8 src) +{ + emit_alu(nfp_prog, NFP_BPF_ABI_MARK, + reg_none(), ALU_OP_NONE, reg_b(src)); + emit_alu(nfp_prog, NFP_BPF_ABI_FLAGS, + NFP_BPF_ABI_FLAGS, ALU_OP_OR, reg_imm(NFP_BPF_ABI_FLAG_MARK)); + + return 0; +} + +static void +wrp_alu_imm(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u32 imm) +{ + u32 tmp_reg; + + if (alu_op == ALU_OP_AND) { + if (!imm) + wrp_immed(nfp_prog, reg_both(dst), 0); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_OR) { + if (!~imm) + wrp_immed(nfp_prog, reg_both(dst), ~0U); + if (!imm || !~imm) + return; + } + if (alu_op == ALU_OP_XOR) { + if (!~imm) + emit_alu(nfp_prog, reg_both(dst), reg_none(), + ALU_OP_NEG, reg_b(dst)); + if (!imm || !~imm) + return; + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, tmp_reg); +} + +static int +wrp_alu64_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, alu_op, imm >> 32); + + return 0; +} + +static int +wrp_alu64_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + emit_alu(nfp_prog, reg_both(dst + 1), + reg_a(dst + 1), alu_op, reg_b(src + 1)); + + return 0; +} + +static int +wrp_alu32_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, bool skip) +{ + const struct bpf_insn *insn = &meta->insn; + + if (skip) { + meta->skip = true; + return 0; + } + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, alu_op, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int +wrp_alu32_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op) +{ + u8 dst = meta->insn.dst_reg * 2, src = meta->insn.src_reg * 2; + + emit_alu(nfp_prog, reg_both(dst), reg_a(dst), alu_op, reg_b(src)); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static void +wrp_test_reg_one(struct nfp_prog *nfp_prog, u8 dst, enum alu_op alu_op, u8 src, + enum br_mask br_mask, u16 off) +{ + emit_alu(nfp_prog, reg_none(), reg_a(dst), alu_op, reg_b(src)); + emit_br(nfp_prog, br_mask, off, 0); +} + +static int +wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum alu_op alu_op, enum br_mask br_mask) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, + insn->src_reg * 2, br_mask, insn->off); + wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, + insn->src_reg * 2 + 1, br_mask, insn->off); + + return 0; +} + +static int +wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u8 reg = insn->dst_reg * 2; + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), tmp_reg, ALU_OP_SUB, reg_a(reg)); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + if (!swap) + emit_alu(nfp_prog, reg_none(), + reg_a(reg + 1), ALU_OP_SUB_C, tmp_reg); + else + emit_alu(nfp_prog, reg_none(), + tmp_reg, ALU_OP_SUB_C, reg_a(reg + 1)); + + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +static int +wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + enum br_mask br_mask, bool swap) +{ + const struct bpf_insn *insn = &meta->insn; + u8 areg = insn->src_reg * 2, breg = insn->dst_reg * 2; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (swap) { + areg ^= breg; + breg ^= areg; + areg ^= breg; + } + + emit_alu(nfp_prog, reg_none(), reg_a(areg), ALU_OP_SUB, reg_b(breg)); + emit_alu(nfp_prog, reg_none(), + reg_a(areg + 1), ALU_OP_SUB_C, reg_b(breg + 1)); + emit_br(nfp_prog, br_mask, insn->off, 0); + + return 0; +} + +/* --- Callbacks --- */ +static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->src_reg * 2 + 1); + + return 0; +} + +static int mov_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + u64 imm = meta->insn.imm; /* sign extend */ + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2), imm & ~0U); + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), imm >> 32); + + return 0; +} + +static int xor_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_XOR, !meta->insn.imm); +} + +static int and_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu64_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_ADD, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_ADD_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int add_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_ADD, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_ADD_C, imm >> 32); + + return 0; +} + +static int sub_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2), + reg_a(insn->dst_reg * 2), ALU_OP_SUB, + reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, reg_both(insn->dst_reg * 2 + 1), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_SUB_C, + reg_b(insn->src_reg * 2 + 1)); + + return 0; +} + +static int sub_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + + wrp_alu_imm(nfp_prog, insn->dst_reg * 2, ALU_OP_SUB, imm & ~0U); + wrp_alu_imm(nfp_prog, insn->dst_reg * 2 + 1, ALU_OP_SUB_C, imm >> 32); + + return 0; +} + +static int shl_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2 + 1, insn->dst_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), 0); + + return 0; +} + +static int shr_imm64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->imm != 32) + return 1; /* TODO */ + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->dst_reg * 2 + 1); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_reg_mov(nfp_prog, insn->dst_reg * 2, insn->src_reg * 2); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int mov_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int xor_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_XOR); +} + +static int xor_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_XOR, !~meta->insn.imm); +} + +static int and_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_AND); +} + +static int and_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_AND, !~meta->insn.imm); +} + +static int or_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_OR); +} + +static int or_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_OR, !meta->insn.imm); +} + +static int add_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_ADD); +} + +static int add_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_ADD, !meta->insn.imm); +} + +static int sub_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_reg(nfp_prog, meta, ALU_OP_SUB); +} + +static int sub_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_alu32_imm(nfp_prog, meta, ALU_OP_SUB, !meta->insn.imm); +} + +static int shl_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (!insn->imm) + return 1; /* TODO: zero shift means indirect */ + + emit_shf(nfp_prog, reg_both(insn->dst_reg * 2), + reg_none(), SHF_OP_NONE, reg_b(insn->dst_reg * 2), + SHF_SC_L_SHF, insn->imm); + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2 + 1), 0); + + return 0; +} + +static int imm_ld8_part2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_immed(nfp_prog, reg_both(nfp_meta_prev(meta)->insn.dst_reg * 2 + 1), + meta->insn.imm); + + return 0; +} + +static int imm_ld8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + meta->double_cb = imm_ld8_part2; + wrp_immed(nfp_prog, reg_both(insn->dst_reg * 2), insn->imm); + + return 0; +} + +static int data_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 1); +} + +static int data_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 2); +} + +static int data_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ld(nfp_prog, meta->insn.imm, 4); +} + +static int data_ind_ld1(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 1); +} + +static int data_ind_ld2(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 2); +} + +static int data_ind_ld4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return construct_data_ind_ld(nfp_prog, meta->insn.imm, + meta->insn.src_reg * 2, true, 4); +} + +static int mem_ldx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, len)) + emit_alu(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_LEN); + else + return -ENOTSUPP; + + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + +static int mem_stx4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off == offsetof(struct sk_buff, mark)) + return wrp_set_mark(nfp_prog, meta->insn.src_reg * 2); + + return -ENOTSUPP; +} + +static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + if (meta->insn.off < 0) /* TODO */ + return -ENOTSUPP; + emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); + + return 0; +} + +static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 or1 = reg_a(insn->dst_reg * 2), or2 = reg_b(insn->dst_reg * 2 + 1); + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_a(nfp_prog), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + or1 = imm_a(nfp_prog); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, imm_b(nfp_prog), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + or2 = imm_b(nfp_prog); + } + + emit_alu(nfp_prog, reg_none(), or1, ALU_OP_OR, or2); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BLO, false); +} + +static int jge_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_imm(nfp_prog, meta, BR_BHS, true); +} + +static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + meta->skip = true; + return 0; + } + + if (imm & ~0U) { + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + if (imm >> 32) { + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_AND, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + return 0; +} + +static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + u64 imm = insn->imm; /* sign extend */ + u32 tmp_reg; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + if (!imm) { + emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), + ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + } + + tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + tmp_reg = ur_load_imm_any(nfp_prog, imm >> 32, imm_b(nfp_prog)); + emit_alu(nfp_prog, reg_none(), + reg_a(insn->dst_reg * 2 + 1), ALU_OP_XOR, tmp_reg); + emit_br(nfp_prog, BR_BNE, insn->off, 0); + + return 0; +} + +static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + const struct bpf_insn *insn = &meta->insn; + + if (insn->off < 0) /* TODO */ + return -ENOTSUPP; + + emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), + ALU_OP_XOR, reg_b(insn->src_reg * 2)); + emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), + ALU_OP_XOR, reg_b(insn->src_reg * 2 + 1)); + emit_alu(nfp_prog, reg_none(), + imm_a(nfp_prog), ALU_OP_OR, imm_b(nfp_prog)); + emit_br(nfp_prog, BR_BEQ, insn->off, 0); + + return 0; +} + +static int jgt_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BLO, false); +} + +static int jge_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_cmp_reg(nfp_prog, meta, BR_BHS, true); +} + +static int jset_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_AND, BR_BNE); +} + +static int jne_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return wrp_test_reg(nfp_prog, meta, ALU_OP_XOR, BR_BNE); +} + +static int goto_out(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + wrp_br_special(nfp_prog, BR_UNC, OP_BR_GO_OUT); + + return 0; +} + +static const instr_cb_t instr_cb[256] = { + [BPF_ALU64 | BPF_MOV | BPF_X] = mov_reg64, + [BPF_ALU64 | BPF_MOV | BPF_K] = mov_imm64, + [BPF_ALU64 | BPF_XOR | BPF_X] = xor_reg64, + [BPF_ALU64 | BPF_XOR | BPF_K] = xor_imm64, + [BPF_ALU64 | BPF_AND | BPF_X] = and_reg64, + [BPF_ALU64 | BPF_AND | BPF_K] = and_imm64, + [BPF_ALU64 | BPF_OR | BPF_X] = or_reg64, + [BPF_ALU64 | BPF_OR | BPF_K] = or_imm64, + [BPF_ALU64 | BPF_ADD | BPF_X] = add_reg64, + [BPF_ALU64 | BPF_ADD | BPF_K] = add_imm64, + [BPF_ALU64 | BPF_SUB | BPF_X] = sub_reg64, + [BPF_ALU64 | BPF_SUB | BPF_K] = sub_imm64, + [BPF_ALU64 | BPF_LSH | BPF_K] = shl_imm64, + [BPF_ALU64 | BPF_RSH | BPF_K] = shr_imm64, + [BPF_ALU | BPF_MOV | BPF_X] = mov_reg, + [BPF_ALU | BPF_MOV | BPF_K] = mov_imm, + [BPF_ALU | BPF_XOR | BPF_X] = xor_reg, + [BPF_ALU | BPF_XOR | BPF_K] = xor_imm, + [BPF_ALU | BPF_AND | BPF_X] = and_reg, + [BPF_ALU | BPF_AND | BPF_K] = and_imm, + [BPF_ALU | BPF_OR | BPF_X] = or_reg, + [BPF_ALU | BPF_OR | BPF_K] = or_imm, + [BPF_ALU | BPF_ADD | BPF_X] = add_reg, + [BPF_ALU | BPF_ADD | BPF_K] = add_imm, + [BPF_ALU | BPF_SUB | BPF_X] = sub_reg, + [BPF_ALU | BPF_SUB | BPF_K] = sub_imm, + [BPF_ALU | BPF_LSH | BPF_K] = shl_imm, + [BPF_LD | BPF_IMM | BPF_DW] = imm_ld8, + [BPF_LD | BPF_ABS | BPF_B] = data_ld1, + [BPF_LD | BPF_ABS | BPF_H] = data_ld2, + [BPF_LD | BPF_ABS | BPF_W] = data_ld4, + [BPF_LD | BPF_IND | BPF_B] = data_ind_ld1, + [BPF_LD | BPF_IND | BPF_H] = data_ind_ld2, + [BPF_LD | BPF_IND | BPF_W] = data_ind_ld4, + [BPF_LDX | BPF_MEM | BPF_W] = mem_ldx4, + [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, + [BPF_JMP | BPF_JA | BPF_K] = jump, + [BPF_JMP | BPF_JEQ | BPF_K] = jeq_imm, + [BPF_JMP | BPF_JGT | BPF_K] = jgt_imm, + [BPF_JMP | BPF_JGE | BPF_K] = jge_imm, + [BPF_JMP | BPF_JSET | BPF_K] = jset_imm, + [BPF_JMP | BPF_JNE | BPF_K] = jne_imm, + [BPF_JMP | BPF_JEQ | BPF_X] = jeq_reg, + [BPF_JMP | BPF_JGT | BPF_X] = jgt_reg, + [BPF_JMP | BPF_JGE | BPF_X] = jge_reg, + [BPF_JMP | BPF_JSET | BPF_X] = jset_reg, + [BPF_JMP | BPF_JNE | BPF_X] = jne_reg, + [BPF_JMP | BPF_EXIT] = goto_out, +}; + +/* --- Misc code --- */ +static void br_set_offset(u64 *instr, u16 offset) +{ + u16 addr_lo, addr_hi; + + addr_lo = offset & (OP_BR_ADDR_LO >> __bf_shf(OP_BR_ADDR_LO)); + addr_hi = offset != addr_lo; + *instr &= ~(OP_BR_ADDR_HI | OP_BR_ADDR_LO); + *instr |= FIELD_PREP(OP_BR_ADDR_HI, addr_hi); + *instr |= FIELD_PREP(OP_BR_ADDR_LO, addr_lo); +} + +/* --- Assembler logic --- */ +static int nfp_fixup_branches(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *next; + u32 off, br_idx; + u32 idx; + + nfp_for_each_insn_walk2(nfp_prog, meta, next) { + if (meta->skip) + continue; + if (BPF_CLASS(meta->insn.code) != BPF_JMP) + continue; + + br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (!nfp_is_br(nfp_prog->prog[br_idx])) { + pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", + br_idx, meta->insn.code, nfp_prog->prog[br_idx]); + return -ELOOP; + } + /* Leave special branches for later */ + if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) + continue; + + /* Find the target offset in assembler realm */ + off = meta->insn.off; + if (!off) { + pr_err("Fixup found zero offset!!\n"); + return -ELOOP; + } + + while (off && nfp_meta_has_next(nfp_prog, next)) { + next = nfp_meta_next(next); + off--; + } + if (off) { + pr_err("Fixup found too large jump!! %d\n", off); + return -ELOOP; + } + + if (next->skip) { + pr_err("Branch landing on removed instruction!!\n"); + return -ELOOP; + } + + for (idx = nfp_prog_offset_to_index(nfp_prog, meta->off); + idx <= br_idx; idx++) { + if (!nfp_is_br(nfp_prog->prog[idx])) + continue; + br_set_offset(&nfp_prog->prog[idx], next->off); + } + } + + /* Fixup 'goto out's separately, they can be scattered around */ + for (br_idx = 0; br_idx < nfp_prog->prog_len; br_idx++) { + enum br_special special; + + if ((nfp_prog->prog[br_idx] & OP_BR_BASE_MASK) != OP_BR_BASE) + continue; + + special = FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx]); + switch (special) { + case OP_BR_NORMAL: + break; + case OP_BR_GO_OUT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_out); + break; + case OP_BR_GO_ABORT: + br_set_offset(&nfp_prog->prog[br_idx], + nfp_prog->tgt_abort); + break; + } + + nfp_prog->prog[br_idx] &= ~OP_BR_SPECIAL; + } + + return 0; +} + +static void nfp_intro(struct nfp_prog *nfp_prog) +{ + emit_alu(nfp_prog, pkt_reg(nfp_prog), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_PKT); +} + +static void nfp_outro_tc_legacy(struct nfp_prog *nfp_prog) +{ + const u8 act2code[] = { + [NN_ACT_TC_DROP] = 0x22, + [NN_ACT_TC_REDIR] = 0x24 + }; + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + wrp_immed(nfp_prog, reg_both(0), 0); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + /* Legacy TC mode: + * 0 0x11 -> pass, count as stat0 + * -1 drop 0x22 -> drop, count as stat1 + * redir 0x24 -> redir, count as stat1 + * ife mark 0x21 -> pass, count as stat1 + * ife + tx 0x24 -> redir, count as stat1 + */ + emit_br_byte_neq(nfp_prog, reg_b(0), 0xff, 0, nfp_prog->tgt_done, 2); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + emit_br(nfp_prog, BR_UNC, nfp_prog->tgt_done, 1); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(act2code[nfp_prog->act]), + SHF_SC_L_SHF, 16); +} + +static void nfp_outro_tc_da(struct nfp_prog *nfp_prog) +{ + /* TC direct-action mode: + * 0,1 ok NOT SUPPORTED[1] + * 2 drop 0x22 -> drop, count as stat1 + * 4,5 nuke 0x02 -> drop + * 7 redir 0x44 -> redir, count as stat2 + * * unspec 0x11 -> pass, count as stat0 + * + * [1] We can't support OK and RECLASSIFY because we can't tell TC + * the exact decision made. We are forced to support UNSPEC + * to handle aborts so that's the only one we handle for passing + * packets up the stack. + */ + /* Target for aborts */ + nfp_prog->tgt_abort = nfp_prog_current_offset(nfp_prog); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_imm(0x11), SHF_SC_L_SHF, 16); + + /* Target for normal exits */ + nfp_prog->tgt_out = nfp_prog_current_offset(nfp_prog); + + /* if R0 > 7 jump to abort */ + emit_alu(nfp_prog, reg_none(), reg_imm(7), ALU_OP_SUB, reg_b(0)); + emit_br(nfp_prog, BR_BLO, nfp_prog->tgt_abort, 0); + emit_alu(nfp_prog, reg_a(0), + reg_none(), ALU_OP_NONE, NFP_BPF_ABI_FLAGS); + + wrp_immed(nfp_prog, reg_b(2), 0x41221211); + wrp_immed(nfp_prog, reg_b(3), 0x41001211); + + emit_shf(nfp_prog, reg_a(1), + reg_none(), SHF_OP_NONE, reg_b(0), SHF_SC_L_SHF, 2); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_a(2), + reg_imm(0xf), SHF_OP_AND, reg_b(2), SHF_SC_R_SHF, 0); + + emit_alu(nfp_prog, reg_none(), reg_a(1), ALU_OP_OR, reg_imm(0)); + emit_shf(nfp_prog, reg_b(2), + reg_imm(0xf), SHF_OP_AND, reg_b(3), SHF_SC_R_SHF, 0); + + emit_br_def(nfp_prog, nfp_prog->tgt_done, 2); + + emit_shf(nfp_prog, reg_b(2), + reg_a(2), SHF_OP_OR, reg_b(2), SHF_SC_L_SHF, 4); + emit_ld_field(nfp_prog, reg_a(0), 0xc, reg_b(2), SHF_SC_L_SHF, 16); +} + +static void nfp_outro(struct nfp_prog *nfp_prog) +{ + switch (nfp_prog->act) { + case NN_ACT_DIRECT: + nfp_outro_tc_da(nfp_prog); + break; + case NN_ACT_TC_DROP: + case NN_ACT_TC_REDIR: + nfp_outro_tc_legacy(nfp_prog); + break; + } +} + +static int nfp_translate(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + int err; + + nfp_intro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + instr_cb_t cb = instr_cb[meta->insn.code]; + + meta->off = nfp_prog_current_offset(nfp_prog); + + if (meta->skip) { + nfp_prog->n_translated++; + continue; + } + + if (nfp_meta_has_prev(nfp_prog, meta) && + nfp_meta_prev(meta)->double_cb) + cb = nfp_meta_prev(meta)->double_cb; + if (!cb) + return -ENOENT; + err = cb(nfp_prog, meta); + if (err) + return err; + + nfp_prog->n_translated++; + } + + nfp_outro(nfp_prog); + if (nfp_prog->error) + return nfp_prog->error; + + return nfp_fixup_branches(nfp_prog); +} + +static int +nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, + unsigned int cnt) +{ + unsigned int i; + + for (i = 0; i < cnt; i++) { + struct nfp_insn_meta *meta; + + meta = kzalloc(sizeof(*meta), GFP_KERNEL); + if (!meta) + return -ENOMEM; + + meta->insn = prog[i]; + meta->n = i; + + list_add_tail(&meta->l, &nfp_prog->insns); + } + + return 0; +} + +/* --- Optimizations --- */ +static void nfp_bpf_opt_reg_init(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + struct bpf_insn insn = meta->insn; + + /* Programs converted from cBPF start with register xoring */ + if (insn.code == (BPF_ALU64 | BPF_XOR | BPF_X) && + insn.src_reg == insn.dst_reg) + continue; + + /* Programs start with R6 = R1 but we ignore the skb pointer */ + if (insn.code == (BPF_ALU64 | BPF_MOV | BPF_X) && + insn.src_reg == 1 && insn.dst_reg == 6) + meta->skip = true; + + /* Return as soon as something doesn't match */ + if (!meta->skip) + return; + } +} + +/* Try to rename registers so that program uses only low ones */ +static int nfp_bpf_opt_reg_rename(struct nfp_prog *nfp_prog) +{ + bool reg_used[MAX_BPF_REG] = {}; + u8 tgt_reg[MAX_BPF_REG] = {}; + struct nfp_insn_meta *meta; + unsigned int i, j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + reg_used[meta->insn.src_reg] = true; + reg_used[meta->insn.dst_reg] = true; + } + + for (i = 0, j = 0; i < ARRAY_SIZE(tgt_reg); i++) { + if (!reg_used[i]) + continue; + + tgt_reg[i] = j++; + } + nfp_prog->num_regs = j; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + meta->insn.src_reg = tgt_reg[meta->insn.src_reg]; + meta->insn.dst_reg = tgt_reg[meta->insn.dst_reg]; + } + + return 0; +} + +/* Remove masking after load since our load guarantees this is not needed */ +static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2; + const s32 exp_mask[] = { + [BPF_B] = 0x000000ffU, + [BPF_H] = 0x0000ffffU, + [BPF_W] = 0xffffffffU, + }; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn insn, next; + + insn = meta1->insn; + next = meta2->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + + if (next.code != (BPF_ALU64 | BPF_AND | BPF_K)) + continue; + + if (!exp_mask[BPF_SIZE(insn.code)]) + continue; + if (exp_mask[BPF_SIZE(insn.code)] != next.imm) + continue; + + if (next.src_reg || next.dst_reg) + continue; + + meta2->skip = true; + } +} + +static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta1, *meta2, *meta3; + + nfp_for_each_insn_walk3(nfp_prog, meta1, meta2, meta3) { + struct bpf_insn insn, next1, next2; + + insn = meta1->insn; + next1 = meta2->insn; + next2 = meta3->insn; + + if (BPF_CLASS(insn.code) != BPF_LD) + continue; + if (BPF_MODE(insn.code) != BPF_ABS && + BPF_MODE(insn.code) != BPF_IND) + continue; + if (BPF_SIZE(insn.code) != BPF_W) + continue; + + if (!(next1.code == (BPF_LSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_RSH | BPF_K | BPF_ALU64)) && + !(next1.code == (BPF_RSH | BPF_K | BPF_ALU64) && + next2.code == (BPF_LSH | BPF_K | BPF_ALU64))) + continue; + + if (next1.src_reg || next1.dst_reg || + next2.src_reg || next2.dst_reg) + continue; + + if (next1.imm != 0x20 || next2.imm != 0x20) + continue; + + meta2->skip = true; + meta3->skip = true; + } +} + +static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) +{ + int ret; + + nfp_bpf_opt_reg_init(nfp_prog); + + ret = nfp_bpf_opt_reg_rename(nfp_prog); + if (ret) + return ret; + + nfp_bpf_opt_ld_mask(nfp_prog); + nfp_bpf_opt_ld_shift(nfp_prog); + + return 0; +} + +/** + * nfp_bpf_jit() - translate BPF code into NFP assembly + * @filter: kernel BPF filter struct + * @prog_mem: memory to store assembler instructions + * @act: action attached to this eBPF program + * @prog_start: offset of the first instruction when loaded + * @prog_done: where to jump on exit + * @prog_sz: size of @prog_mem in instructions + * @res: achieved parameters of translation results + */ +int +nfp_bpf_jit(struct bpf_prog *filter, void *prog_mem, + enum nfp_bpf_action_type act, + unsigned int prog_start, unsigned int prog_done, + unsigned int prog_sz, struct nfp_bpf_result *res) +{ + struct nfp_prog *nfp_prog; + int ret; + + nfp_prog = kzalloc(sizeof(*nfp_prog), GFP_KERNEL); + if (!nfp_prog) + return -ENOMEM; + + INIT_LIST_HEAD(&nfp_prog->insns); + nfp_prog->act = act; + nfp_prog->start_off = prog_start; + nfp_prog->tgt_done = prog_done; + + ret = nfp_prog_prepare(nfp_prog, filter->insnsi, filter->len); + if (ret) + goto out; + + ret = nfp_prog_verify(nfp_prog, filter); + if (ret) + goto out; + + ret = nfp_bpf_optimize(nfp_prog); + if (ret) + goto out; + + if (nfp_prog->num_regs <= 7) + nfp_prog->regs_per_thread = 16; + else + nfp_prog->regs_per_thread = 32; + + nfp_prog->prog = prog_mem; + nfp_prog->__prog_alloc_len = prog_sz; + + ret = nfp_translate(nfp_prog); + if (ret) { + pr_err("Translation failed with error %d (translated: %u)\n", + ret, nfp_prog->n_translated); + ret = -EINVAL; + } + + res->n_instr = nfp_prog->prog_len; + res->dense_mode = nfp_prog->num_regs <= 7; +out: + nfp_prog_free(nfp_prog); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c new file mode 100644 index 000000000000..144cae87f63a --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_bpf_verifier.c @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) "NFP net bpf: " fmt + +#include +#include +#include +#include + +#include "nfp_bpf.h" + +/* Analyzer/verifier definitions */ +struct nfp_bpf_analyzer_priv { + struct nfp_prog *prog; + struct nfp_insn_meta *meta; +}; + +static struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns) +{ + unsigned int forward, backward, i; + + backward = meta->n - insn_idx; + forward = insn_idx - meta->n; + + if (min(forward, backward) > n_insns - insn_idx - 1) { + backward = n_insns - insn_idx - 1; + meta = nfp_prog_last_meta(nfp_prog); + } + if (min(forward, backward) > insn_idx && backward > insn_idx) { + forward = insn_idx; + meta = nfp_prog_first_meta(nfp_prog); + } + + if (forward < backward) + for (i = 0; i < forward; i++) + meta = nfp_meta_next(meta); + else + for (i = 0; i < backward; i++) + meta = nfp_meta_prev(meta); + + return meta; +} + +static int +nfp_bpf_check_exit(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env) +{ + const struct bpf_reg_state *reg0 = &env->cur_state.regs[0]; + + if (reg0->type != CONST_IMM) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act != NN_ACT_DIRECT && + reg0->imm != 0 && (reg0->imm & ~0U) != ~0U) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + if (nfp_prog->act == NN_ACT_DIRECT && reg0->imm <= TC_ACT_REDIRECT && + reg0->imm != TC_ACT_SHOT && reg0->imm != TC_ACT_STOLEN && + reg0->imm != TC_ACT_QUEUED) { + pr_info("unsupported exit state: %d, imm: %llx\n", + reg0->type, reg0->imm); + return -EINVAL; + } + + return 0; +} + +static int +nfp_bpf_check_ctx_ptr(struct nfp_prog *nfp_prog, + const struct bpf_verifier_env *env, u8 reg) +{ + if (env->cur_state.regs[reg].type != PTR_TO_CTX) + return -EINVAL; + + return 0; +} + +static int +nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) +{ + struct nfp_bpf_analyzer_priv *priv = env->analyzer_priv; + struct nfp_insn_meta *meta = priv->meta; + + meta = nfp_bpf_goto_meta(priv->prog, meta, insn_idx, env->prog->len); + priv->meta = meta; + + if (meta->insn.src_reg == BPF_REG_10 || + meta->insn.dst_reg == BPF_REG_10) { + pr_err("stack not yet supported\n"); + return -EINVAL; + } + if (meta->insn.src_reg >= MAX_BPF_REG || + meta->insn.dst_reg >= MAX_BPF_REG) { + pr_err("program uses extended registers - jit hardening?\n"); + return -EINVAL; + } + + if (meta->insn.code == (BPF_JMP | BPF_EXIT)) + return nfp_bpf_check_exit(priv->prog, env); + + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.src_reg); + if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + return nfp_bpf_check_ctx_ptr(priv->prog, env, + meta->insn.dst_reg); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nfp_bpf_analyzer_ops = { + .insn_hook = nfp_verify_insn, +}; + +int nfp_prog_verify(struct nfp_prog *nfp_prog, struct bpf_prog *prog) +{ + struct nfp_bpf_analyzer_priv *priv; + int ret; + + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->prog = nfp_prog; + priv->meta = nfp_prog_first_meta(nfp_prog); + + ret = bpf_analyzer(prog, &nfp_bpf_analyzer_ops, priv); + + kfree(priv); + + return ret; +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 690635660195..ed824e11a1e3 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -62,6 +62,9 @@ /* Max time to wait for NFP to respond on updates (in seconds) */ #define NFP_NET_POLL_TIMEOUT 5 +/* Interval for reading offloaded filter stats */ +#define NFP_NET_STAT_POLL_IVL msecs_to_jiffies(100) + /* Bar allocation */ #define NFP_NET_CTRL_BAR 0 #define NFP_NET_Q0_BAR 2 @@ -220,7 +223,7 @@ struct nfp_net_tx_ring { #define PCIE_DESC_RX_I_TCP_CSUM_OK cpu_to_le16(BIT(11)) #define PCIE_DESC_RX_I_UDP_CSUM cpu_to_le16(BIT(10)) #define PCIE_DESC_RX_I_UDP_CSUM_OK cpu_to_le16(BIT(9)) -#define PCIE_DESC_RX_SPARE cpu_to_le16(BIT(8)) +#define PCIE_DESC_RX_BPF cpu_to_le16(BIT(8)) #define PCIE_DESC_RX_EOP cpu_to_le16(BIT(7)) #define PCIE_DESC_RX_IP4_CSUM cpu_to_le16(BIT(6)) #define PCIE_DESC_RX_IP4_CSUM_OK cpu_to_le16(BIT(5)) @@ -266,6 +269,8 @@ struct nfp_net_rx_desc { }; }; +#define NFP_NET_META_FIELD_MASK GENMASK(NFP_NET_META_FIELD_SIZE - 1, 0) + struct nfp_net_rx_hash { __be32 hash_type; __be32 hash; @@ -405,6 +410,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, fw_ver->minor == minor; } +struct nfp_stat_pair { + u64 pkts; + u64 bytes; +}; + /** * struct nfp_net - NFP network device structure * @pdev: Backpointer to PCI device @@ -413,6 +423,7 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @is_vf: Is the driver attached to a VF? * @is_nfp3200: Is the driver for a NFP-3200 card? * @fw_loaded: Is the firmware loaded? + * @bpf_offload_skip_sw: Offloaded BPF program will not be rerun by cls_bpf * @ctrl: Local copy of the control register/word. * @fl_bufsz: Currently configured size of the freelist buffers * @rx_offset: Offset in the RX buffers where packet data starts @@ -427,6 +438,11 @@ static inline bool nfp_net_fw_ver_eq(struct nfp_net_fw_version *fw_ver, * @rss_cfg: RSS configuration * @rss_key: RSS secret key * @rss_itbl: RSS indirection table + * @rx_filter: Filter offload statistics - dropped packets/bytes + * @rx_filter_prev: Filter offload statistics - values from previous update + * @rx_filter_change: Jiffies when statistics last changed + * @rx_filter_stats_timer: Timer for polling filter offload statistics + * @rx_filter_lock: Lock protecting timer state changes (teardown) * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware * @num_tx_rings: Currently configured number of TX rings @@ -473,6 +489,7 @@ struct nfp_net { unsigned is_vf:1; unsigned is_nfp3200:1; unsigned fw_loaded:1; + unsigned bpf_offload_skip_sw:1; u32 ctrl; u32 fl_bufsz; @@ -502,6 +519,11 @@ struct nfp_net { u8 rss_key[NFP_NET_CFG_RSS_KEY_SZ]; u8 rss_itbl[NFP_NET_CFG_RSS_ITBL_SZ]; + struct nfp_stat_pair rx_filter, rx_filter_prev; + unsigned long rx_filter_change; + struct timer_list rx_filter_stats_timer; + spinlock_t rx_filter_lock; + int max_tx_rings; int max_rx_rings; @@ -561,12 +583,28 @@ struct nfp_net { /* Functions to read/write from/to a BAR * Performs any endian conversion necessary. */ +static inline u16 nn_readb(struct nfp_net *nn, int off) +{ + return readb(nn->ctrl_bar + off); +} + static inline void nn_writeb(struct nfp_net *nn, int off, u8 val) { writeb(val, nn->ctrl_bar + off); } -/* NFP-3200 can't handle 16-bit accesses too well - hence no readw/writew */ +/* NFP-3200 can't handle 16-bit accesses too well */ +static inline u16 nn_readw(struct nfp_net *nn, int off) +{ + WARN_ON_ONCE(nn->is_nfp3200); + return readw(nn->ctrl_bar + off); +} + +static inline void nn_writew(struct nfp_net *nn, int off, u16 val) +{ + WARN_ON_ONCE(nn->is_nfp3200); + writew(val, nn->ctrl_bar + off); +} static inline u32 nn_readl(struct nfp_net *nn, int off) { @@ -757,4 +795,9 @@ static inline void nfp_net_debugfs_adapter_del(struct nfp_net *nn) } #endif /* CONFIG_NFP_NET_DEBUG */ +void nfp_net_filter_stats_timer(unsigned long data); +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf); + #endif /* _NFP_NET_H_ */ diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 39dadfca84ef..aee3fd2b6538 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -60,6 +60,7 @@ #include +#include #include #include "nfp_net_ctrl.h" @@ -1292,36 +1293,70 @@ static void nfp_net_rx_csum(struct nfp_net *nn, struct nfp_net_r_vector *r_vec, } } -/** - * nfp_net_set_hash() - Set SKB hash data - * @netdev: adapter's net_device structure - * @skb: SKB to set the hash data on - * @rxd: RX descriptor - * - * The RSS hash and hash-type are pre-pended to the packet data. - * Extract and decode it and set the skb fields. - */ static void nfp_net_set_hash(struct net_device *netdev, struct sk_buff *skb, - struct nfp_net_rx_desc *rxd) + unsigned int type, __be32 *hash) +{ + if (!(netdev->features & NETIF_F_RXHASH)) + return; + + switch (type) { + case NFP_NET_RSS_IPV4: + case NFP_NET_RSS_IPV6: + case NFP_NET_RSS_IPV6_EX: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L3); + break; + default: + skb_set_hash(skb, get_unaligned_be32(hash), PKT_HASH_TYPE_L4); + break; + } +} + +static void +nfp_net_set_hash_desc(struct net_device *netdev, struct sk_buff *skb, + struct nfp_net_rx_desc *rxd) { struct nfp_net_rx_hash *rx_hash; - if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS) || - !(netdev->features & NETIF_F_RXHASH)) + if (!(rxd->rxd.flags & PCIE_DESC_RX_RSS)) return; rx_hash = (struct nfp_net_rx_hash *)(skb->data - sizeof(*rx_hash)); - switch (be32_to_cpu(rx_hash->hash_type)) { - case NFP_NET_RSS_IPV4: - case NFP_NET_RSS_IPV6: - case NFP_NET_RSS_IPV6_EX: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L3); - break; - default: - skb_set_hash(skb, be32_to_cpu(rx_hash->hash), PKT_HASH_TYPE_L4); - break; + nfp_net_set_hash(netdev, skb, get_unaligned_be32(&rx_hash->hash_type), + &rx_hash->hash); +} + +static void * +nfp_net_parse_meta(struct net_device *netdev, struct sk_buff *skb, + int meta_len) +{ + u8 *data = skb->data - meta_len; + u32 meta_info; + + meta_info = get_unaligned_be32(data); + data += 4; + + while (meta_info) { + switch (meta_info & NFP_NET_META_FIELD_MASK) { + case NFP_NET_META_HASH: + meta_info >>= NFP_NET_META_FIELD_SIZE; + nfp_net_set_hash(netdev, skb, + meta_info & NFP_NET_META_FIELD_MASK, + (__be32 *)data); + data += 4; + break; + case NFP_NET_META_MARK: + skb->mark = get_unaligned_be32(data); + data += 4; + break; + default: + return NULL; + } + + meta_info >>= NFP_NET_META_FIELD_SIZE; } + + return data; } /** @@ -1438,14 +1473,29 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget) skb_reserve(skb, nn->rx_offset); skb_put(skb, data_len - meta_len); - nfp_net_set_hash(nn->netdev, skb, rxd); - /* Stats update */ u64_stats_update_begin(&r_vec->rx_sync); r_vec->rx_pkts++; r_vec->rx_bytes += skb->len; u64_stats_update_end(&r_vec->rx_sync); + if (nn->fw_ver.major <= 3) { + nfp_net_set_hash_desc(nn->netdev, skb, rxd); + } else if (meta_len) { + void *end; + + end = nfp_net_parse_meta(nn->netdev, skb, meta_len); + if (unlikely(end != skb->data)) { + u64_stats_update_begin(&r_vec->rx_sync); + r_vec->rx_drops++; + u64_stats_update_end(&r_vec->rx_sync); + + dev_kfree_skb_any(skb); + nn_warn_ratelimit(nn, "invalid RX packet metadata\n"); + continue; + } + } + skb_record_rx_queue(skb, rx_ring->idx); skb->protocol = eth_type_trans(skb, nn->netdev); @@ -2386,6 +2436,31 @@ static struct rtnl_link_stats64 *nfp_net_stat64(struct net_device *netdev, return stats; } +static bool nfp_net_ebpf_capable(struct nfp_net *nn) +{ + if (nn->cap & NFP_NET_CFG_CTRL_BPF && + nn_readb(nn, NFP_NET_CFG_BPF_ABI) == NFP_NET_BPF_ABI) + return true; + return false; +} + +static int +nfp_net_setup_tc(struct net_device *netdev, u32 handle, __be16 proto, + struct tc_to_netdev *tc) +{ + struct nfp_net *nn = netdev_priv(netdev); + + if (TC_H_MAJ(handle) != TC_H_MAJ(TC_H_INGRESS)) + return -ENOTSUPP; + if (proto != htons(ETH_P_ALL)) + return -ENOTSUPP; + + if (tc->type == TC_SETUP_CLSBPF && nfp_net_ebpf_capable(nn)) + return nfp_net_bpf_offload(nn, handle, proto, tc->cls_bpf); + + return -EINVAL; +} + static int nfp_net_set_features(struct net_device *netdev, netdev_features_t features) { @@ -2440,6 +2515,11 @@ static int nfp_net_set_features(struct net_device *netdev, new_ctrl &= ~NFP_NET_CFG_CTRL_GATHER; } + if (changed & NETIF_F_HW_TC && nn->ctrl & NFP_NET_CFG_CTRL_BPF) { + nn_err(nn, "Cannot disable HW TC offload while in use\n"); + return -EBUSY; + } + nn_dbg(nn, "Feature change 0x%llx -> 0x%llx (changed=0x%llx)\n", netdev->features, features, changed); @@ -2589,6 +2669,7 @@ static const struct net_device_ops nfp_net_netdev_ops = { .ndo_stop = nfp_net_netdev_close, .ndo_start_xmit = nfp_net_tx, .ndo_get_stats64 = nfp_net_stat64, + .ndo_setup_tc = nfp_net_setup_tc, .ndo_tx_timeout = nfp_net_tx_timeout, .ndo_set_rx_mode = nfp_net_set_rx_mode, .ndo_change_mtu = nfp_net_change_mtu, @@ -2614,7 +2695,7 @@ void nfp_net_info(struct nfp_net *nn) nn->fw_ver.resv, nn->fw_ver.class, nn->fw_ver.major, nn->fw_ver.minor, nn->max_mtu); - nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", + nn_info(nn, "CAP: %#x %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", nn->cap, nn->cap & NFP_NET_CFG_CTRL_PROMISC ? "PROMISC " : "", nn->cap & NFP_NET_CFG_CTRL_L2BC ? "L2BCFILT " : "", @@ -2631,7 +2712,8 @@ void nfp_net_info(struct nfp_net *nn) nn->cap & NFP_NET_CFG_CTRL_MSIXAUTO ? "AUTOMASK " : "", nn->cap & NFP_NET_CFG_CTRL_IRQMOD ? "IRQMOD " : "", nn->cap & NFP_NET_CFG_CTRL_VXLAN ? "VXLAN " : "", - nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : ""); + nn->cap & NFP_NET_CFG_CTRL_NVGRE ? "NVGRE " : "", + nfp_net_ebpf_capable(nn) ? "BPF " : ""); } /** @@ -2674,10 +2756,13 @@ struct nfp_net *nfp_net_netdev_alloc(struct pci_dev *pdev, nn->rxd_cnt = NFP_NET_RX_DESCS_DEFAULT; spin_lock_init(&nn->reconfig_lock); + spin_lock_init(&nn->rx_filter_lock); spin_lock_init(&nn->link_status_lock); setup_timer(&nn->reconfig_timer, nfp_net_reconfig_timer, (unsigned long)nn); + setup_timer(&nn->rx_filter_stats_timer, + nfp_net_filter_stats_timer, (unsigned long)nn); return nn; } @@ -2799,6 +2884,9 @@ int nfp_net_netdev_init(struct net_device *netdev) netdev->features = netdev->hw_features; + if (nfp_net_ebpf_capable(nn)) + netdev->hw_features |= NETIF_F_HW_TC; + /* Advertise but disable TSO by default. */ netdev->features &= ~(NETIF_F_TSO | NETIF_F_TSO6); diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h index ad6c4e31cedd..93b10b441acb 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ctrl.h @@ -65,6 +65,13 @@ */ #define NFP_NET_LSO_MAX_HDR_SZ 255 +/** + * Prepend field types + */ +#define NFP_NET_META_FIELD_SIZE 4 +#define NFP_NET_META_HASH 1 /* next field carries hash type */ +#define NFP_NET_META_MARK 2 + /** * Hash type pre-pended when a RSS hash was computed */ @@ -123,6 +130,7 @@ #define NFP_NET_CFG_CTRL_L2SWITCH_LOCAL (0x1 << 23) /* Switch to local */ #define NFP_NET_CFG_CTRL_VXLAN (0x1 << 24) /* VXLAN tunnel support */ #define NFP_NET_CFG_CTRL_NVGRE (0x1 << 25) /* NVGRE tunnel support */ +#define NFP_NET_CFG_CTRL_BPF (0x1 << 27) /* BPF offload capable */ #define NFP_NET_CFG_UPDATE 0x0004 #define NFP_NET_CFG_UPDATE_GEN (0x1 << 0) /* General update */ #define NFP_NET_CFG_UPDATE_RING (0x1 << 1) /* Ring config change */ @@ -134,6 +142,7 @@ #define NFP_NET_CFG_UPDATE_RESET (0x1 << 7) /* Update due to FLR */ #define NFP_NET_CFG_UPDATE_IRQMOD (0x1 << 8) /* IRQ mod change */ #define NFP_NET_CFG_UPDATE_VXLAN (0x1 << 9) /* VXLAN port change */ +#define NFP_NET_CFG_UPDATE_BPF (0x1 << 10) /* BPF program load */ #define NFP_NET_CFG_UPDATE_ERR (0x1 << 31) /* A error occurred */ #define NFP_NET_CFG_TXRS_ENABLE 0x0008 #define NFP_NET_CFG_RXRS_ENABLE 0x0010 @@ -196,10 +205,37 @@ #define NFP_NET_CFG_VXLAN_SZ 0x0008 /** - * 64B reserved for future use (0x0080 - 0x00c0) + * NFP6000 - BPF section + * @NFP_NET_CFG_BPF_ABI: BPF ABI version + * @NFP_NET_CFG_BPF_CAP: BPF capabilities + * @NFP_NET_CFG_BPF_MAX_LEN: Maximum size of JITed BPF code in bytes + * @NFP_NET_CFG_BPF_START: Offset at which BPF will be loaded + * @NFP_NET_CFG_BPF_DONE: Offset to jump to on exit + * @NFP_NET_CFG_BPF_STACK_SZ: Total size of stack area in 64B chunks + * @NFP_NET_CFG_BPF_INL_MTU: Packet data split offset in 64B chunks + * @NFP_NET_CFG_BPF_SIZE: Size of the JITed BPF code in instructions + * @NFP_NET_CFG_BPF_ADDR: DMA address of the buffer with JITed BPF code */ -#define NFP_NET_CFG_RESERVED 0x0080 -#define NFP_NET_CFG_RESERVED_SZ 0x0040 +#define NFP_NET_CFG_BPF_ABI 0x0080 +#define NFP_NET_BPF_ABI 1 +#define NFP_NET_CFG_BPF_CAP 0x0081 +#define NFP_NET_BPF_CAP_RELO (1 << 0) /* seamless reload */ +#define NFP_NET_CFG_BPF_MAX_LEN 0x0082 +#define NFP_NET_CFG_BPF_START 0x0084 +#define NFP_NET_CFG_BPF_DONE 0x0086 +#define NFP_NET_CFG_BPF_STACK_SZ 0x0088 +#define NFP_NET_CFG_BPF_INL_MTU 0x0089 +#define NFP_NET_CFG_BPF_SIZE 0x008e +#define NFP_NET_CFG_BPF_ADDR 0x0090 +#define NFP_NET_CFG_BPF_CFG_8CTX (1 << 0) /* 8ctx mode */ +#define NFP_NET_CFG_BPF_CFG_MASK 7ULL +#define NFP_NET_CFG_BPF_ADDR_MASK (~NFP_NET_CFG_BPF_CFG_MASK) + +/** + * 40B reserved for future use (0x0098 - 0x00c0) + */ +#define NFP_NET_CFG_RESERVED 0x0098 +#define NFP_NET_CFG_RESERVED_SZ 0x0028 /** * RSS configuration (0x0100 - 0x01ac): @@ -303,6 +339,15 @@ #define NFP_NET_CFG_STATS_TX_MC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x80) #define NFP_NET_CFG_STATS_TX_BC_FRAMES (NFP_NET_CFG_STATS_BASE + 0x88) +#define NFP_NET_CFG_STATS_APP0_FRAMES (NFP_NET_CFG_STATS_BASE + 0x90) +#define NFP_NET_CFG_STATS_APP0_BYTES (NFP_NET_CFG_STATS_BASE + 0x98) +#define NFP_NET_CFG_STATS_APP1_FRAMES (NFP_NET_CFG_STATS_BASE + 0xa0) +#define NFP_NET_CFG_STATS_APP1_BYTES (NFP_NET_CFG_STATS_BASE + 0xa8) +#define NFP_NET_CFG_STATS_APP2_FRAMES (NFP_NET_CFG_STATS_BASE + 0xb0) +#define NFP_NET_CFG_STATS_APP2_BYTES (NFP_NET_CFG_STATS_BASE + 0xb8) +#define NFP_NET_CFG_STATS_APP3_FRAMES (NFP_NET_CFG_STATS_BASE + 0xc0) +#define NFP_NET_CFG_STATS_APP3_BYTES (NFP_NET_CFG_STATS_BASE + 0xc8) + /** * Per ring stats (0x1000 - 0x1800) * options, 64bit per entry diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c index 4c9897220969..3418f2277e9d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c @@ -106,6 +106,18 @@ static const struct _nfp_net_et_stats nfp_net_et_stats[] = { {"dev_tx_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_FRAMES)}, {"dev_tx_mc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_MC_FRAMES)}, {"dev_tx_bc_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_TX_BC_FRAMES)}, + + {"bpf_pass_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_FRAMES)}, + {"bpf_pass_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP0_BYTES)}, + /* see comments in outro functions in nfp_bpf_jit.c to find out + * how different BPF modes use app-specific counters + */ + {"bpf_app1_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_FRAMES)}, + {"bpf_app1_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP1_BYTES)}, + {"bpf_app2_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_FRAMES)}, + {"bpf_app2_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP2_BYTES)}, + {"bpf_app3_pkts", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_FRAMES)}, + {"bpf_app3_bytes", NN_ET_DEV_STAT(NFP_NET_CFG_STATS_APP3_BYTES)}, }; #define NN_ET_GLOBAL_STATS_LEN ARRAY_SIZE(nfp_net_et_stats) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c new file mode 100644 index 000000000000..8acfb631a0ea --- /dev/null +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_offload.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2016 Netronome Systems, Inc. + * + * This software is dual licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree or the BSD 2-Clause License provided below. You have the + * option to license this software under the complete terms of either license. + * + * The BSD 2-Clause License: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * 1. Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +/* + * nfp_net_offload.c + * Netronome network device driver: TC offload functions for PF and VF + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "nfp_bpf.h" +#include "nfp_net_ctrl.h" +#include "nfp_net.h" + +void nfp_net_filter_stats_timer(unsigned long data) +{ + struct nfp_net *nn = (void *)data; + struct nfp_stat_pair latest; + + spin_lock_bh(&nn->rx_filter_lock); + + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + mod_timer(&nn->rx_filter_stats_timer, + jiffies + NFP_NET_STAT_POLL_IVL); + + spin_unlock_bh(&nn->rx_filter_lock); + + latest.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + latest.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + + if (latest.pkts != nn->rx_filter.pkts) + nn->rx_filter_change = jiffies; + + nn->rx_filter = latest; +} + +static void nfp_net_bpf_stats_reset(struct nfp_net *nn) +{ + nn->rx_filter.pkts = nn_readq(nn, NFP_NET_CFG_STATS_APP1_FRAMES); + nn->rx_filter.bytes = nn_readq(nn, NFP_NET_CFG_STATS_APP1_BYTES); + nn->rx_filter_prev = nn->rx_filter; + nn->rx_filter_change = jiffies; +} + +static int +nfp_net_bpf_stats_update(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + struct tc_action *a; + LIST_HEAD(actions); + u64 bytes, pkts; + + pkts = nn->rx_filter.pkts - nn->rx_filter_prev.pkts; + bytes = nn->rx_filter.bytes - nn->rx_filter_prev.bytes; + bytes -= pkts * ETH_HLEN; + + nn->rx_filter_prev = nn->rx_filter; + + preempt_disable(); + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) + tcf_action_stats_update(a, bytes, pkts, nn->rx_filter_change); + + preempt_enable(); + + return 0; +} + +static int +nfp_net_bpf_get_act(struct nfp_net *nn, struct tc_cls_bpf_offload *cls_bpf) +{ + const struct tc_action *a; + LIST_HEAD(actions); + + /* TC direct action */ + if (cls_bpf->exts_integrated) { + if (tc_no_actions(cls_bpf->exts)) + return NN_ACT_DIRECT; + + return -ENOTSUPP; + } + + /* TC legacy mode */ + if (!tc_single_action(cls_bpf->exts)) + return -ENOTSUPP; + + tcf_exts_to_list(cls_bpf->exts, &actions); + list_for_each_entry(a, &actions, list) { + if (is_tcf_gact_shot(a)) + return NN_ACT_TC_DROP; + + if (is_tcf_mirred_redirect(a) && + tcf_mirred_ifindex(a) == nn->netdev->ifindex) + return NN_ACT_TC_REDIR; + } + + return -ENOTSUPP; +} + +static int +nfp_net_bpf_offload_prepare(struct nfp_net *nn, + struct tc_cls_bpf_offload *cls_bpf, + struct nfp_bpf_result *res, + void **code, dma_addr_t *dma_addr, u16 max_instr) +{ + unsigned int code_sz = max_instr * sizeof(u64); + enum nfp_bpf_action_type act; + u16 start_off, done_off; + unsigned int max_mtu; + int ret; + + if (!IS_ENABLED(CONFIG_BPF_SYSCALL)) + return -ENOTSUPP; + + ret = nfp_net_bpf_get_act(nn, cls_bpf); + if (ret < 0) + return ret; + act = ret; + + max_mtu = nn_readb(nn, NFP_NET_CFG_BPF_INL_MTU) * 64 - 32; + if (max_mtu < nn->netdev->mtu) { + nn_info(nn, "BPF offload not supported with MTU larger than HW packet split boundary\n"); + return -ENOTSUPP; + } + + start_off = nn_readw(nn, NFP_NET_CFG_BPF_START); + done_off = nn_readw(nn, NFP_NET_CFG_BPF_DONE); + + *code = dma_zalloc_coherent(&nn->pdev->dev, code_sz, dma_addr, + GFP_KERNEL); + if (!*code) + return -ENOMEM; + + ret = nfp_bpf_jit(cls_bpf->prog, *code, act, start_off, done_off, + max_instr, res); + if (ret) + goto out; + + return 0; + +out: + dma_free_coherent(&nn->pdev->dev, code_sz, *code, *dma_addr); + return ret; +} + +static void +nfp_net_bpf_load_and_start(struct nfp_net *nn, u32 tc_flags, + void *code, dma_addr_t dma_addr, + unsigned int code_sz, unsigned int n_instr, + bool dense_mode) +{ + u64 bpf_addr = dma_addr; + int err; + + nn->bpf_offload_skip_sw = !!(tc_flags & TCA_CLS_FLAGS_SKIP_SW); + + if (dense_mode) + bpf_addr |= NFP_NET_CFG_BPF_CFG_8CTX; + + nn_writew(nn, NFP_NET_CFG_BPF_SIZE, n_instr); + nn_writeq(nn, NFP_NET_CFG_BPF_ADDR, bpf_addr); + + /* Load up the JITed code */ + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_BPF); + if (err) + nn_err(nn, "FW command error while loading BPF: %d\n", err); + + /* Enable passing packets through BPF function */ + nn->ctrl |= NFP_NET_CFG_CTRL_BPF; + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + err = nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); + if (err) + nn_err(nn, "FW command error while enabling BPF: %d\n", err); + + dma_free_coherent(&nn->pdev->dev, code_sz, code, dma_addr); + + nfp_net_bpf_stats_reset(nn); + mod_timer(&nn->rx_filter_stats_timer, jiffies + NFP_NET_STAT_POLL_IVL); +} + +static int nfp_net_bpf_stop(struct nfp_net *nn) +{ + if (!(nn->ctrl & NFP_NET_CFG_CTRL_BPF)) + return 0; + + spin_lock_bh(&nn->rx_filter_lock); + nn->ctrl &= ~NFP_NET_CFG_CTRL_BPF; + spin_unlock_bh(&nn->rx_filter_lock); + nn_writel(nn, NFP_NET_CFG_CTRL, nn->ctrl); + + del_timer_sync(&nn->rx_filter_stats_timer); + nn->bpf_offload_skip_sw = 0; + + return nfp_net_reconfig(nn, NFP_NET_CFG_UPDATE_GEN); +} + +int +nfp_net_bpf_offload(struct nfp_net *nn, u32 handle, __be16 proto, + struct tc_cls_bpf_offload *cls_bpf) +{ + struct nfp_bpf_result res; + dma_addr_t dma_addr; + u16 max_instr; + void *code; + int err; + + max_instr = nn_readw(nn, NFP_NET_CFG_BPF_MAX_LEN); + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + /* There is nothing stopping us from implementing seamless + * replace but the simple method of loading I adopted in + * the firmware does not handle atomic replace (i.e. we have to + * stop the BPF offload and re-enable it). Leaking-in a few + * frames which didn't have BPF applied in the hardware should + * be fine if software fallback is available, though. + */ + if (nn->bpf_offload_skip_sw) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_stop(nn); + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_ADD: + if (nn->ctrl & NFP_NET_CFG_CTRL_BPF) + return -EBUSY; + + err = nfp_net_bpf_offload_prepare(nn, cls_bpf, &res, &code, + &dma_addr, max_instr); + if (err) + return err; + + nfp_net_bpf_load_and_start(nn, cls_bpf->gen_flags, code, + dma_addr, max_instr * sizeof(u64), + res.n_instr, res.dense_mode); + return 0; + + case TC_CLSBPF_DESTROY: + return nfp_net_bpf_stop(nn); + + case TC_CLSBPF_STATS: + return nfp_net_bpf_stats_update(nn, cls_bpf); + + default: + return -ENOTSUPP; + } +} diff --git a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c index f7062cb648e1..2800bbf65a89 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_netvf_main.c @@ -148,7 +148,7 @@ static int nfp_netvf_pci_probe(struct pci_dev *pdev, dev_warn(&pdev->dev, "OBSOLETE Firmware detected - VF isolation not available\n"); } else { switch (fw_ver.major) { - case 1 ... 3: + case 1 ... 4: if (is_nfp3200) { stride = 2; tx_bar_no = NFP_NET_Q0_BAR; diff --git a/drivers/net/ethernet/nuvoton/w90p910_ether.c b/drivers/net/ethernet/nuvoton/w90p910_ether.c index 87b7b814778b..712d8bcb7d8c 100644 --- a/drivers/net/ethernet/nuvoton/w90p910_ether.c +++ b/drivers/net/ethernet/nuvoton/w90p910_ether.c @@ -751,7 +751,7 @@ static void netdev_rx(struct net_device *dev) dev_err(&pdev->dev, "rx crc err\n"); ether->stats.rx_crc_errors++; } else if (status & RXDS_ALIE) { - dev_err(&pdev->dev, "rx aligment err\n"); + dev_err(&pdev->dev, "rx alignment err\n"); ether->stats.rx_frame_errors++; } else if (status & RXDS_PTLE) { dev_err(&pdev->dev, "rx longer err\n"); diff --git a/drivers/net/ethernet/qlogic/Kconfig b/drivers/net/ethernet/qlogic/Kconfig index 6ba48406899e..0df1391f9663 100644 --- a/drivers/net/ethernet/qlogic/Kconfig +++ b/drivers/net/ethernet/qlogic/Kconfig @@ -88,6 +88,9 @@ config QED ---help--- This enables the support for ... +config QED_LL2 + bool + config QED_SRIOV bool "QLogic QED 25/40/100Gb SR-IOV support" depends on QED && PCI_IOV @@ -104,4 +107,15 @@ config QEDE ---help--- This enables the support for ... +config INFINIBAND_QEDR + tristate "QLogic qede RoCE sources [debug]" + depends on QEDE && 64BIT + select QED_LL2 + default n + ---help--- + This provides a temporary node that allows the compilation + and logical testing of the InfiniBand over Ethernet support + for QLogic QED. This would be replaced by the 'real' option + once the QEDR driver is added [+relocated]. + endif # NET_VENDOR_QLOGIC diff --git a/drivers/net/ethernet/qlogic/qed/Makefile b/drivers/net/ethernet/qlogic/qed/Makefile index d1f157e439cf..cda0af7fbc20 100644 --- a/drivers/net/ethernet/qlogic/qed/Makefile +++ b/drivers/net/ethernet/qlogic/qed/Makefile @@ -2,5 +2,7 @@ obj-$(CONFIG_QED) := qed.o qed-y := qed_cxt.o qed_dev.o qed_hw.o qed_init_fw_funcs.o qed_init_ops.o \ qed_int.o qed_main.o qed_mcp.o qed_sp_commands.o qed_spq.o qed_l2.o \ - qed_selftest.o qed_dcbx.o + qed_selftest.o qed_dcbx.o qed_debug.o qed-$(CONFIG_QED_SRIOV) += qed_sriov.o qed_vf.o +qed-$(CONFIG_QED_LL2) += qed_ll2.o +qed-$(CONFIG_INFINIBAND_QEDR) += qed_roce.o diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h index 45ab74676573..653bb5735f0c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed.h +++ b/drivers/net/ethernet/qlogic/qed/qed.h @@ -23,10 +23,11 @@ #include #include #include +#include "qed_debug.h" #include "qed_hsi.h" extern const struct qed_common_ops qed_common_ops_pass; -#define DRV_MODULE_VERSION "8.7.1.20" +#define DRV_MODULE_VERSION "8.10.9.20" #define MAX_HWFNS_PER_DEVICE (4) #define NAME_SIZE 16 @@ -34,6 +35,9 @@ extern const struct qed_common_ops qed_common_ops_pass; #define QED_WFQ_UNIT 100 +#define QED_WID_SIZE (1024) +#define QED_PF_DEMS_SIZE (4) + /* cau states */ enum qed_coalescing_mode { QED_COAL_MODE_DISABLE, @@ -42,9 +46,19 @@ enum qed_coalescing_mode { struct qed_eth_cb_ops; struct qed_dev_info; +union qed_mcp_protocol_stats; +enum qed_mcp_protocol_type; /* helpers */ static inline u32 qed_db_addr(u32 cid, u32 DEMS) +{ + u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | + (cid * QED_PF_DEMS_SIZE); + + return db_addr; +} + +static inline u32 qed_db_addr_vf(u32 cid, u32 DEMS) { u32 db_addr = FIELD_VALUE(DB_LEGACY_ADDR_DEMS, DEMS) | FIELD_VALUE(DB_LEGACY_ADDR_ICID, cid); @@ -69,6 +83,7 @@ struct qed_sb_info; struct qed_sb_attn_info; struct qed_cxt_mngr; struct qed_sb_sp_info; +struct qed_ll2_info; struct qed_mcp_info; struct qed_rt_data { @@ -148,13 +163,17 @@ enum QED_RESOURCES { QED_RL, QED_MAC, QED_VLAN, + QED_RDMA_CNQ_RAM, QED_ILT, + QED_LL2_QUEUE, + QED_RDMA_STATS_QUEUE, QED_MAX_RESC, }; enum QED_FEATURE { QED_PF_L2_QUE, QED_VF, + QED_RDMA_CNQ, QED_MAX_FEATURES, }; @@ -357,6 +376,9 @@ struct qed_hwfn { struct qed_sb_attn_info *p_sb_attn; /* Protocol related */ + bool using_ll2; + struct qed_ll2_info *p_ll2_info; + struct qed_rdma_info *p_rdma_info; struct qed_pf_params pf_params; bool b_rdma_enabled_in_prs; @@ -393,6 +415,19 @@ struct qed_hwfn { /* Buffer for unzipping firmware data */ void *unzip_buf; + struct dbg_tools_data dbg_info; + + /* PWM region specific data */ + u32 dpi_size; + u32 dpi_count; + + /* This is used to calculate the doorbell address */ + u32 dpi_start_offset; + + /* If one of the following is set then EDPM shouldn't be used */ + u8 dcbx_no_edpm; + u8 db_bar_no_edpm; + struct qed_simd_fp_handler simd_proto_handler[64]; #ifdef CONFIG_QED_SRIOV @@ -402,6 +437,7 @@ struct qed_hwfn { #endif struct z_stream_s *stream; + struct qed_roce_ll2_info *ll2; }; struct pci_params { @@ -426,6 +462,21 @@ struct qed_int_params { bool fp_initialized; u8 fp_msix_base; u8 fp_msix_cnt; + u8 rdma_msix_base; + u8 rdma_msix_cnt; +}; + +struct qed_dbg_feature { + struct dentry *dentry; + u8 *dump_buf; + u32 buf_size; + u32 dumped_dwords; +}; + +struct qed_dbg_params { + struct qed_dbg_feature features[DBG_FEATURE_NUM]; + u8 engine_for_debug; + bool print_data; }; struct qed_dev { @@ -442,6 +493,8 @@ struct qed_dev { CHIP_REV_IS_A0(dev)) #define QED_IS_BB_B0(dev) (QED_IS_BB(dev) && \ CHIP_REV_IS_B0(dev)) +#define QED_IS_AH(dev) ((dev)->type == QED_DEV_TYPE_AH) +#define QED_IS_K2(dev) QED_IS_AH(dev) #define QED_GET_TYPE(dev) (QED_IS_BB_A0(dev) ? CHIP_BB_A0 : \ QED_IS_BB_B0(dev) ? CHIP_BB_B0 : CHIP_K2) @@ -517,7 +570,6 @@ struct qed_dev { bool b_is_vf; u32 drv_type; - struct qed_eth_stats *reset_stats; struct qed_fw_data *fw_data; @@ -542,7 +594,18 @@ struct qed_dev { } protocol_ops; void *ops_cookie; + struct qed_dbg_params dbg_params; + +#ifdef CONFIG_QED_LL2 + struct qed_cb_ll2_info *ll2; + u8 ll2_mac_address[ETH_ALEN]; +#endif + const struct firmware *firmware; + + u32 rdma_max_sge; + u32 rdma_max_inline; + u32 rdma_max_srq_sge; }; #define NUM_OF_VFS(dev) MAX_NUM_VFS_BB @@ -606,7 +669,9 @@ void qed_link_update(struct qed_hwfn *hwfn); u32 qed_unzip_data(struct qed_hwfn *p_hwfn, u32 input_len, u8 *input_buf, u32 max_size, u8 *unzip_buf); - +void qed_get_protocol_stats(struct qed_dev *cdev, + enum qed_mcp_protocol_type type, + union qed_mcp_protocol_stats *stats); int qed_slowpath_irq_req(struct qed_hwfn *hwfn); #endif /* _QED_H */ diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c index 1c35f376143e..82370a1a59ad 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c @@ -48,7 +48,13 @@ #define TM_ELEM_SIZE 4 /* ILT constants */ +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +/* For RoCE we configure to 64K to cover for RoCE max tasks 256K purpose. */ +#define ILT_DEFAULT_HW_P_SIZE 4 +#else #define ILT_DEFAULT_HW_P_SIZE 3 +#endif + #define ILT_PAGE_IN_BYTES(hw_p_size) (1U << ((hw_p_size) + 12)) #define ILT_CFG_REG(cli, reg) PSWRQ2_REG_ ## cli ## _ ## reg ## _RT_OFFSET @@ -377,9 +383,8 @@ static void qed_cxt_set_proto_cid_count(struct qed_hwfn *p_hwfn, } } -u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, - enum protocol_type type, - u32 *vf_cid) +u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type, u32 *vf_cid) { if (vf_cid) *vf_cid = p_hwfn->p_cxt_mngr->conn_cfg[type].cids_per_vf; @@ -405,10 +410,10 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, return cnt; } -static void -qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, - enum protocol_type proto, - u8 seg, u8 seg_type, u32 count, bool has_fl) +static void qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type proto, + u8 seg, + u8 seg_type, u32 count, bool has_fl) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; struct qed_tid_seg *p_seg = &p_mngr->conn_cfg[proto].tid_seg[seg]; @@ -420,8 +425,7 @@ qed_cxt_set_proto_tid_count(struct qed_hwfn *p_hwfn, static void qed_ilt_cli_blk_fill(struct qed_ilt_client_cfg *p_cli, struct qed_ilt_cli_blk *p_blk, - u32 start_line, u32 total_size, - u32 elem_size) + u32 start_line, u32 total_size, u32 elem_size) { u32 ilt_size = ILT_PAGE_IN_BYTES(p_cli->p_size.val); @@ -448,8 +452,7 @@ static void qed_ilt_cli_adv_line(struct qed_hwfn *p_hwfn, p_cli->first.val = *p_line; p_cli->active = true; - *p_line += DIV_ROUND_UP(p_blk->total_size, - p_blk->real_size_in_page); + *p_line += DIV_ROUND_UP(p_blk->total_size, p_blk->real_size_in_page); p_cli->last.val = *p_line - 1; DP_VERBOSE(p_hwfn, QED_MSG_ILT, @@ -795,10 +798,9 @@ static int qed_cxt_src_t2_alloc(struct qed_hwfn *p_hwfn) p_mngr->t2_num_pages = DIV_ROUND_UP(total_size, psz); /* allocate t2 */ - p_mngr->t2 = kzalloc(p_mngr->t2_num_pages * sizeof(struct qed_dma_mem), + p_mngr->t2 = kcalloc(p_mngr->t2_num_pages, sizeof(struct qed_dma_mem), GFP_KERNEL); if (!p_mngr->t2) { - DP_NOTICE(p_hwfn, "Failed to allocate t2 table\n"); rc = -ENOMEM; goto t2_fail; } @@ -926,12 +928,9 @@ static int qed_ilt_blk_alloc(struct qed_hwfn *p_hwfn, void *p_virt; u32 size; - size = min_t(u32, sz_left, - p_blk->real_size_in_page); + size = min_t(u32, sz_left, p_blk->real_size_in_page); p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - size, - &p_phys, - GFP_KERNEL); + size, &p_phys, GFP_KERNEL); if (!p_virt) return -ENOMEM; memset(p_virt, 0, size); @@ -963,7 +962,6 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) p_mngr->ilt_shadow = kcalloc(size, sizeof(struct qed_dma_mem), GFP_KERNEL); if (!p_mngr->ilt_shadow) { - DP_NOTICE(p_hwfn, "Failed to allocate ilt shadow table\n"); rc = -ENOMEM; goto ilt_shadow_fail; } @@ -976,7 +974,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) for (j = 0; j < ILT_CLI_PF_BLOCKS; j++) { p_blk = &clients[i].pf_blks[j]; rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, 0); - if (rc != 0) + if (rc) goto ilt_shadow_fail; } for (k = 0; k < p_mngr->vf_count; k++) { @@ -985,7 +983,7 @@ static int qed_ilt_shadow_alloc(struct qed_hwfn *p_hwfn) p_blk = &clients[i].vf_blks[j]; rc = qed_ilt_blk_alloc(p_hwfn, p_blk, i, lines); - if (rc != 0) + if (rc) goto ilt_shadow_fail; } } @@ -1056,10 +1054,8 @@ int qed_cxt_mngr_alloc(struct qed_hwfn *p_hwfn) u32 i; p_mngr = kzalloc(sizeof(*p_mngr), GFP_KERNEL); - if (!p_mngr) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_cxt_mngr'\n"); + if (!p_mngr) return -ENOMEM; - } /* Initialize ILT client registers */ clients = p_mngr->clients; @@ -1111,24 +1107,18 @@ int qed_cxt_tables_alloc(struct qed_hwfn *p_hwfn) /* Allocate the ILT shadow table */ rc = qed_ilt_shadow_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate ilt memory\n"); + if (rc) goto tables_alloc_fail; - } /* Allocate the T2 table */ rc = qed_cxt_src_t2_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate T2 memory\n"); + if (rc) goto tables_alloc_fail; - } /* Allocate and initialize the acquired cids bitmaps */ rc = qed_cid_map_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate cid maps\n"); + if (rc) goto tables_alloc_fail; - } return 0; @@ -1672,7 +1662,7 @@ static void qed_tm_init_pf(struct qed_hwfn *p_hwfn) p_hwfn->rel_pf_id * NUM_TASK_PF_SEGMENTS + i); STORE_RT_REG_AGG(p_hwfn, rt_reg, cfg_word); - active_seg_mask |= (tm_iids.pf_tids[i] ? (1 << i) : 0); + active_seg_mask |= (tm_iids.pf_tids[i] ? BIT(i) : 0); tm_offset += tm_iids.pf_tids[i]; } @@ -1702,8 +1692,7 @@ void qed_cxt_hw_init_pf(struct qed_hwfn *p_hwfn) } int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, - enum protocol_type type, - u32 *p_cid) + enum protocol_type type, u32 *p_cid) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; u32 rel_cid; @@ -1717,8 +1706,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, p_mngr->acquired[type].max_count); if (rel_cid >= p_mngr->acquired[type].max_count) { - DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", - type); + DP_NOTICE(p_hwfn, "no CID available for protocol %d\n", type); return -EINVAL; } @@ -1730,8 +1718,7 @@ int qed_cxt_acquire_cid(struct qed_hwfn *p_hwfn, } static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn, - u32 cid, - enum protocol_type *p_type) + u32 cid, enum protocol_type *p_type) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; struct qed_cid_acquired_map *p_map; @@ -1763,8 +1750,7 @@ static bool qed_cxt_test_cid_acquired(struct qed_hwfn *p_hwfn, return true; } -void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, - u32 cid) +void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; enum protocol_type type; @@ -1781,8 +1767,7 @@ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, __clear_bit(rel_cid, p_mngr->acquired[type].cid_map); } -int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, - struct qed_cxt_info *p_info) +int qed_cxt_get_cid_info(struct qed_hwfn *p_hwfn, struct qed_cxt_info *p_info) { struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr; u32 conn_cxt_size, hw_p_size, cxts_per_p, line; @@ -1860,6 +1845,8 @@ int qed_cxt_set_pf_params(struct qed_hwfn *p_hwfn) /* Set the number of required CORE connections */ u32 core_cids = 1; /* SPQ */ + if (p_hwfn->using_ll2) + core_cids += 4; qed_cxt_set_proto_cid_count(p_hwfn, PROTOCOLID_CORE, core_cids, 0); switch (p_hwfn->hw_info.personality) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h index c6f6f2e8192d..2b8bdaa77800 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h +++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h @@ -170,6 +170,13 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); */ void qed_cxt_release_cid(struct qed_hwfn *p_hwfn, u32 cid); +int qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn, + enum qed_cxt_elem_type elem_type, u32 iid); +u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn, + enum protocol_type type); +u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn, + enum protocol_type type); +int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto); #define QED_CTX_WORKING_MEM 0 #define QED_CTX_FL_MEM 1 diff --git a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c index 3656d2fd673d..130da1c0490b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dcbx.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dcbx.c @@ -875,11 +875,8 @@ int qed_dcbx_info_alloc(struct qed_hwfn *p_hwfn) int rc = 0; p_hwfn->p_dcbx_info = kzalloc(sizeof(*p_hwfn->p_dcbx_info), GFP_KERNEL); - if (!p_hwfn->p_dcbx_info) { - DP_NOTICE(p_hwfn, - "Failed to allocate 'struct qed_dcbx_info'\n"); + if (!p_hwfn->p_dcbx_info) rc = -ENOMEM; - } return rc; } @@ -1190,10 +1187,8 @@ int qed_dcbx_get_config_params(struct qed_hwfn *p_hwfn, } dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); - if (!dcbx_info) { - DP_ERR(p_hwfn, "Failed to allocate struct qed_dcbx_info\n"); + if (!dcbx_info) return -ENOMEM; - } rc = qed_dcbx_query_params(p_hwfn, dcbx_info, QED_DCBX_OPERATIONAL_MIB); if (rc) { @@ -1227,10 +1222,8 @@ static struct qed_dcbx_get *qed_dcbnl_get_dcbx(struct qed_hwfn *hwfn, struct qed_dcbx_get *dcbx_info; dcbx_info = kzalloc(sizeof(*dcbx_info), GFP_KERNEL); - if (!dcbx_info) { - DP_ERR(hwfn->cdev, "Failed to allocate memory for dcbx_info\n"); + if (!dcbx_info) return NULL; - } if (qed_dcbx_query_params(hwfn, dcbx_info, type)) { kfree(dcbx_info); @@ -1982,6 +1975,7 @@ static int qed_dcbnl_get_ieee_pfc(struct qed_dev *cdev, if (!dcbx_info->operational.ieee) { DP_INFO(hwfn, "DCBX is not enabled/operational in IEEE mode\n"); + kfree(dcbx_info); return -EINVAL; } @@ -2150,17 +2144,19 @@ static int qed_dcbnl_ieee_setets(struct qed_dev *cdev, struct ieee_ets *ets) return rc; } -int qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) +static int +qed_dcbnl_ieee_peer_getets(struct qed_dev *cdev, struct ieee_ets *ets) { return qed_dcbnl_get_ieee_ets(cdev, ets, true); } -int qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) +static int +qed_dcbnl_ieee_peer_getpfc(struct qed_dev *cdev, struct ieee_pfc *pfc) { return qed_dcbnl_get_ieee_pfc(cdev, pfc, true); } -int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; @@ -2204,7 +2200,7 @@ int qed_dcbnl_ieee_getapp(struct qed_dev *cdev, struct dcb_app *app) return 0; } -int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) +static int qed_dcbnl_ieee_setapp(struct qed_dev *cdev, struct dcb_app *app) { struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); struct qed_dcbx_get *dcbx_info; diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c new file mode 100644 index 000000000000..88e7d5bef909 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c @@ -0,0 +1,6898 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" + +/* Chip IDs enum */ +enum chip_ids { + CHIP_RESERVED, + CHIP_BB_B0, + CHIP_K2, + MAX_CHIP_IDS +}; + +/* Memory groups enum */ +enum mem_groups { + MEM_GROUP_PXP_MEM, + MEM_GROUP_DMAE_MEM, + MEM_GROUP_CM_MEM, + MEM_GROUP_QM_MEM, + MEM_GROUP_TM_MEM, + MEM_GROUP_BRB_RAM, + MEM_GROUP_BRB_MEM, + MEM_GROUP_PRS_MEM, + MEM_GROUP_SDM_MEM, + MEM_GROUP_PBUF, + MEM_GROUP_IOR, + MEM_GROUP_RAM, + MEM_GROUP_BTB_RAM, + MEM_GROUP_RDIF_CTX, + MEM_GROUP_TDIF_CTX, + MEM_GROUP_CONN_CFC_MEM, + MEM_GROUP_TASK_CFC_MEM, + MEM_GROUP_CAU_PI, + MEM_GROUP_CAU_MEM, + MEM_GROUP_PXP_ILT, + MEM_GROUP_MULD_MEM, + MEM_GROUP_BTB_MEM, + MEM_GROUP_IGU_MEM, + MEM_GROUP_IGU_MSIX, + MEM_GROUP_CAU_SB, + MEM_GROUP_BMB_RAM, + MEM_GROUP_BMB_MEM, + MEM_GROUPS_NUM +}; + +/* Memory groups names */ +static const char * const s_mem_group_names[] = { + "PXP_MEM", + "DMAE_MEM", + "CM_MEM", + "QM_MEM", + "TM_MEM", + "BRB_RAM", + "BRB_MEM", + "PRS_MEM", + "SDM_MEM", + "PBUF", + "IOR", + "RAM", + "BTB_RAM", + "RDIF_CTX", + "TDIF_CTX", + "CONN_CFC_MEM", + "TASK_CFC_MEM", + "CAU_PI", + "CAU_MEM", + "PXP_ILT", + "MULD_MEM", + "BTB_MEM", + "IGU_MEM", + "IGU_MSIX", + "CAU_SB", + "BMB_RAM", + "BMB_MEM", +}; + +/* Idle check conditions */ +static u32 cond4(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) != imm[1]) && ((r[1] & imm[2]) != imm[3]); +} + +static u32 cond6(const u32 *r, const u32 *imm) +{ + return ((r[0] >> imm[0]) & imm[1]) != imm[2]; +} + +static u32 cond5(const u32 *r, const u32 *imm) +{ + return (r[0] & imm[0]) != imm[1]; +} + +static u32 cond8(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != + (((r[0] & imm[2]) >> imm[3]) | ((r[1] & imm[4]) << imm[5])); +} + +static u32 cond9(const u32 *r, const u32 *imm) +{ + return ((r[0] & imm[0]) >> imm[1]) != (r[0] & imm[2]); +} + +static u32 cond1(const u32 *r, const u32 *imm) +{ + return (r[0] & ~imm[0]) != imm[1]; +} + +static u32 cond0(const u32 *r, const u32 *imm) +{ + return r[0] != imm[0]; +} + +static u32 cond10(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] == imm[0]; +} + +static u32 cond11(const u32 *r, const u32 *imm) +{ + return r[0] != r[1] && r[2] > imm[0]; +} + +static u32 cond3(const u32 *r, const u32 *imm) +{ + return r[0] != r[1]; +} + +static u32 cond12(const u32 *r, const u32 *imm) +{ + return r[0] & imm[0]; +} + +static u32 cond7(const u32 *r, const u32 *imm) +{ + return r[0] < (r[1] - imm[0]); +} + +static u32 cond2(const u32 *r, const u32 *imm) +{ + return r[0] > imm[0]; +} + +/* Array of Idle Check conditions */ +static u32(*cond_arr[]) (const u32 *r, const u32 *imm) = { + cond0, + cond1, + cond2, + cond3, + cond4, + cond5, + cond6, + cond7, + cond8, + cond9, + cond10, + cond11, + cond12, +}; + +/******************************* Data Types **********************************/ + +enum platform_ids { + PLATFORM_ASIC, + PLATFORM_RESERVED, + PLATFORM_RESERVED2, + PLATFORM_RESERVED3, + MAX_PLATFORM_IDS +}; + +struct dbg_array { + const u32 *ptr; + u32 size_in_dwords; +}; + +/* Chip constant definitions */ +struct chip_defs { + const char *name; + struct { + u8 num_ports; + u8 num_pfs; + } per_platform[MAX_PLATFORM_IDS]; +}; + +/* Platform constant definitions */ +struct platform_defs { + const char *name; + u32 delay_factor; +}; + +/* Storm constant definitions */ +struct storm_defs { + char letter; + enum block_id block_id; + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + bool has_vfc; + u32 sem_fast_mem_addr; + u32 sem_frame_mode_addr; + u32 sem_slow_enable_addr; + u32 sem_slow_mode_addr; + u32 sem_slow_mode1_conf_addr; + u32 sem_sync_dbg_empty_addr; + u32 sem_slow_dbg_empty_addr; + u32 cm_ctx_wr_addr; + u32 cm_conn_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_ag_ctx_rd_addr; + u32 cm_conn_st_ctx_lid_size; /* In quad-regs */ + u32 cm_conn_st_ctx_rd_addr; + u32 cm_task_ag_ctx_lid_size; /* In quad-regs */ + u32 cm_task_ag_ctx_rd_addr; + u32 cm_task_st_ctx_lid_size; /* In quad-regs */ + u32 cm_task_st_ctx_rd_addr; +}; + +/* Block constant definitions */ +struct block_defs { + const char *name; + bool has_dbg_bus[MAX_CHIP_IDS]; + bool associated_to_storm; + u32 storm_id; /* Valid only if associated_to_storm is true */ + enum dbg_bus_clients dbg_client_id[MAX_CHIP_IDS]; + u32 dbg_select_addr; + u32 dbg_cycle_enable_addr; + u32 dbg_shift_addr; + u32 dbg_force_valid_addr; + u32 dbg_force_frame_addr; + bool has_reset_bit; + bool unreset; /* If true, the block is taken out of reset before dump */ + enum dbg_reset_regs reset_reg; + u8 reset_bit_offset; /* Bit offset in reset register */ +}; + +/* Reset register definitions */ +struct reset_reg_defs { + u32 addr; + u32 unreset_val; + bool exists[MAX_CHIP_IDS]; +}; + +struct grc_param_defs { + u32 default_val[MAX_CHIP_IDS]; + u32 min; + u32 max; + bool is_preset; + u32 exclude_all_preset_val; + u32 crash_preset_val; +}; + +struct rss_mem_defs { + const char *mem_name; + const char *type_name; + u32 addr; /* In 128b units */ + u32 num_entries[MAX_CHIP_IDS]; + u32 entry_width[MAX_CHIP_IDS]; /* In bits */ +}; + +struct vfc_ram_defs { + const char *mem_name; + const char *type_name; + u32 base_row; + u32 num_rows; +}; + +struct big_ram_defs { + const char *instance_name; + enum mem_groups mem_group_id; + enum mem_groups ram_mem_group_id; + enum dbg_grc_params grc_param; + u32 addr_reg_addr; + u32 data_reg_addr; + u32 num_of_blocks[MAX_CHIP_IDS]; +}; + +struct phy_defs { + const char *phy_name; + u32 base_addr; + u32 tbus_addr_lo_addr; + u32 tbus_addr_hi_addr; + u32 tbus_data_lo_addr; + u32 tbus_data_hi_addr; +}; + +/******************************** Constants **********************************/ + +#define MAX_LCIDS 320 +#define MAX_LTIDS 320 +#define NUM_IOR_SETS 2 +#define IORS_PER_SET 176 +#define IOR_SET_OFFSET(set_id) ((set_id) * 256) +#define BYTES_IN_DWORD sizeof(u32) + +/* In the macros below, size and offset are specified in bits */ +#define CEIL_DWORDS(size) DIV_ROUND_UP(size, 32) +#define FIELD_BIT_OFFSET(type, field) type ## _ ## field ## _ ## OFFSET +#define FIELD_BIT_SIZE(type, field) type ## _ ## field ## _ ## SIZE +#define FIELD_DWORD_OFFSET(type, field) \ + (int)(FIELD_BIT_OFFSET(type, field) / 32) +#define FIELD_DWORD_SHIFT(type, field) (FIELD_BIT_OFFSET(type, field) % 32) +#define FIELD_BIT_MASK(type, field) \ + (((1 << FIELD_BIT_SIZE(type, field)) - 1) << \ + FIELD_DWORD_SHIFT(type, field)) +#define SET_VAR_FIELD(var, type, field, val) \ + do { \ + var[FIELD_DWORD_OFFSET(type, field)] &= \ + (~FIELD_BIT_MASK(type, field)); \ + var[FIELD_DWORD_OFFSET(type, field)] |= \ + (val) << FIELD_DWORD_SHIFT(type, field); \ + } while (0) +#define ARR_REG_WR(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + qed_wr(dev, ptt, addr, (arr)[i]); \ + } while (0) +#define ARR_REG_RD(dev, ptt, addr, arr, arr_size) \ + do { \ + for (i = 0; i < (arr_size); i++) \ + (arr)[i] = qed_rd(dev, ptt, addr); \ + } while (0) + +#define DWORDS_TO_BYTES(dwords) ((dwords) * BYTES_IN_DWORD) +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) +#define RAM_LINES_TO_DWORDS(lines) ((lines) * 2) +#define RAM_LINES_TO_BYTES(lines) \ + DWORDS_TO_BYTES(RAM_LINES_TO_DWORDS(lines)) +#define REG_DUMP_LEN_SHIFT 24 +#define MEM_DUMP_ENTRY_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_dump_mem)) +#define IDLE_CHK_RULE_SIZE_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_rule)) +#define IDLE_CHK_RESULT_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_hdr)) +#define IDLE_CHK_RESULT_REG_HDR_DWORDS \ + BYTES_TO_DWORDS(sizeof(struct dbg_idle_chk_result_reg_hdr)) +#define IDLE_CHK_MAX_ENTRIES_SIZE 32 + +/* The sizes and offsets below are specified in bits */ +#define VFC_CAM_CMD_STRUCT_SIZE 64 +#define VFC_CAM_CMD_ROW_OFFSET 48 +#define VFC_CAM_CMD_ROW_SIZE 9 +#define VFC_CAM_ADDR_STRUCT_SIZE 16 +#define VFC_CAM_ADDR_OP_OFFSET 0 +#define VFC_CAM_ADDR_OP_SIZE 4 +#define VFC_CAM_RESP_STRUCT_SIZE 256 +#define VFC_RAM_ADDR_STRUCT_SIZE 16 +#define VFC_RAM_ADDR_OP_OFFSET 0 +#define VFC_RAM_ADDR_OP_SIZE 2 +#define VFC_RAM_ADDR_ROW_OFFSET 2 +#define VFC_RAM_ADDR_ROW_SIZE 10 +#define VFC_RAM_RESP_STRUCT_SIZE 256 +#define VFC_CAM_CMD_DWORDS CEIL_DWORDS(VFC_CAM_CMD_STRUCT_SIZE) +#define VFC_CAM_ADDR_DWORDS CEIL_DWORDS(VFC_CAM_ADDR_STRUCT_SIZE) +#define VFC_CAM_RESP_DWORDS CEIL_DWORDS(VFC_CAM_RESP_STRUCT_SIZE) +#define VFC_RAM_CMD_DWORDS VFC_CAM_CMD_DWORDS +#define VFC_RAM_ADDR_DWORDS CEIL_DWORDS(VFC_RAM_ADDR_STRUCT_SIZE) +#define VFC_RAM_RESP_DWORDS CEIL_DWORDS(VFC_RAM_RESP_STRUCT_SIZE) +#define NUM_VFC_RAM_TYPES 4 +#define VFC_CAM_NUM_ROWS 512 +#define VFC_OPCODE_CAM_RD 14 +#define VFC_OPCODE_RAM_RD 0 +#define NUM_RSS_MEM_TYPES 5 +#define NUM_BIG_RAM_TYPES 3 +#define BIG_RAM_BLOCK_SIZE_BYTES 128 +#define BIG_RAM_BLOCK_SIZE_DWORDS \ + BYTES_TO_DWORDS(BIG_RAM_BLOCK_SIZE_BYTES) +#define NUM_PHY_TBUS_ADDRESSES 2048 +#define PHY_DUMP_SIZE_DWORDS (NUM_PHY_TBUS_ADDRESSES / 2) +#define RESET_REG_UNRESET_OFFSET 4 +#define STALL_DELAY_MS 500 +#define STATIC_DEBUG_LINE_DWORDS 9 +#define NUM_DBG_BUS_LINES 256 +#define NUM_COMMON_GLOBAL_PARAMS 8 +#define FW_IMG_MAIN 1 +#define REG_FIFO_DEPTH_ELEMENTS 32 +#define REG_FIFO_ELEMENT_DWORDS 2 +#define REG_FIFO_DEPTH_DWORDS \ + (REG_FIFO_ELEMENT_DWORDS * REG_FIFO_DEPTH_ELEMENTS) +#define IGU_FIFO_DEPTH_ELEMENTS 64 +#define IGU_FIFO_ELEMENT_DWORDS 4 +#define IGU_FIFO_DEPTH_DWORDS \ + (IGU_FIFO_ELEMENT_DWORDS * IGU_FIFO_DEPTH_ELEMENTS) +#define PROTECTION_OVERRIDE_DEPTH_ELEMENTS 20 +#define PROTECTION_OVERRIDE_ELEMENT_DWORDS 2 +#define PROTECTION_OVERRIDE_DEPTH_DWORDS \ + (PROTECTION_OVERRIDE_DEPTH_ELEMENTS * \ + PROTECTION_OVERRIDE_ELEMENT_DWORDS) +#define MCP_SPAD_TRACE_OFFSIZE_ADDR \ + (MCP_REG_SCRATCH + \ + offsetof(struct static_init, sections[SPAD_SECTION_TRACE])) +#define MCP_TRACE_META_IMAGE_SIGNATURE 0x669955aa +#define EMPTY_FW_VERSION_STR "???_???_???_???" +#define EMPTY_FW_IMAGE_STR "???????????????" + +/***************************** Constant Arrays *******************************/ + +/* Debug arrays */ +static struct dbg_array s_dbg_arrays[MAX_BIN_DBG_BUFFER_TYPE] = { {0} }; + +/* Chip constant definitions array */ +static struct chip_defs s_chip_defs[MAX_CHIP_IDS] = { + { "reserved", { {0, 0}, {0, 0}, {0, 0}, {0, 0} } }, + { "bb_b0", + { {MAX_NUM_PORTS_BB, MAX_NUM_PFS_BB}, {0, 0}, {0, 0}, {0, 0} } }, + { "k2", { {MAX_NUM_PORTS_K2, MAX_NUM_PFS_K2}, {0, 0}, {0, 0}, {0, 0} } } +}; + +/* Storm constant definitions array */ +static struct storm_defs s_storm_defs[] = { + /* Tstorm */ + {'T', BLOCK_TSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCT}, true, + TSEM_REG_FAST_MEMORY, + TSEM_REG_DBG_FRAME_MODE, TSEM_REG_SLOW_DBG_ACTIVE, + TSEM_REG_SLOW_DBG_MODE, TSEM_REG_DBG_MODE1_CFG, + TSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + TCM_REG_CTX_RBC_ACCS, + 4, TCM_REG_AGG_CON_CTX, + 16, TCM_REG_SM_CON_CTX, + 2, TCM_REG_AGG_TASK_CTX, + 4, TCM_REG_SM_TASK_CTX}, + /* Mstorm */ + {'M', BLOCK_MSEM, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCM}, false, + MSEM_REG_FAST_MEMORY, + MSEM_REG_DBG_FRAME_MODE, MSEM_REG_SLOW_DBG_ACTIVE, + MSEM_REG_SLOW_DBG_MODE, MSEM_REG_DBG_MODE1_CFG, + MSEM_REG_SYNC_DBG_EMPTY, MSEM_REG_SLOW_DBG_EMPTY, + MCM_REG_CTX_RBC_ACCS, + 1, MCM_REG_AGG_CON_CTX, + 10, MCM_REG_SM_CON_CTX, + 2, MCM_REG_AGG_TASK_CTX, + 7, MCM_REG_SM_TASK_CTX}, + /* Ustorm */ + {'U', BLOCK_USEM, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCU}, false, + USEM_REG_FAST_MEMORY, + USEM_REG_DBG_FRAME_MODE, USEM_REG_SLOW_DBG_ACTIVE, + USEM_REG_SLOW_DBG_MODE, USEM_REG_DBG_MODE1_CFG, + USEM_REG_SYNC_DBG_EMPTY, USEM_REG_SLOW_DBG_EMPTY, + UCM_REG_CTX_RBC_ACCS, + 2, UCM_REG_AGG_CON_CTX, + 13, UCM_REG_SM_CON_CTX, + 3, UCM_REG_AGG_TASK_CTX, + 3, UCM_REG_SM_TASK_CTX}, + /* Xstorm */ + {'X', BLOCK_XSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCX}, false, + XSEM_REG_FAST_MEMORY, + XSEM_REG_DBG_FRAME_MODE, XSEM_REG_SLOW_DBG_ACTIVE, + XSEM_REG_SLOW_DBG_MODE, XSEM_REG_DBG_MODE1_CFG, + XSEM_REG_SYNC_DBG_EMPTY, XSEM_REG_SLOW_DBG_EMPTY, + XCM_REG_CTX_RBC_ACCS, + 9, XCM_REG_AGG_CON_CTX, + 15, XCM_REG_SM_CON_CTX, + 0, 0, + 0, 0}, + /* Ystorm */ + {'Y', BLOCK_YSEM, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCY}, false, + YSEM_REG_FAST_MEMORY, + YSEM_REG_DBG_FRAME_MODE, YSEM_REG_SLOW_DBG_ACTIVE, + YSEM_REG_SLOW_DBG_MODE, YSEM_REG_DBG_MODE1_CFG, + YSEM_REG_SYNC_DBG_EMPTY, TSEM_REG_SLOW_DBG_EMPTY, + YCM_REG_CTX_RBC_ACCS, + 2, YCM_REG_AGG_CON_CTX, + 3, YCM_REG_SM_CON_CTX, + 2, YCM_REG_AGG_TASK_CTX, + 12, YCM_REG_SM_TASK_CTX}, + /* Pstorm */ + {'P', BLOCK_PSEM, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCS}, true, + PSEM_REG_FAST_MEMORY, + PSEM_REG_DBG_FRAME_MODE, PSEM_REG_SLOW_DBG_ACTIVE, + PSEM_REG_SLOW_DBG_MODE, PSEM_REG_DBG_MODE1_CFG, + PSEM_REG_SYNC_DBG_EMPTY, PSEM_REG_SLOW_DBG_EMPTY, + PCM_REG_CTX_RBC_ACCS, + 0, 0, + 10, PCM_REG_SM_CON_CTX, + 0, 0, + 0, 0} +}; + +/* Block definitions array */ +static struct block_defs block_grc_defs = { + "grc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + GRC_REG_DBG_SELECT, GRC_REG_DBG_DWORD_ENABLE, + GRC_REG_DBG_SHIFT, GRC_REG_DBG_FORCE_VALID, + GRC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_UA, 1 +}; + +static struct block_defs block_miscs_defs = { + "miscs", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_misc_defs = { + "misc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbu_defs = { + "dbu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pglue_b_defs = { + "pglue_b", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH, DBG_BUS_CLIENT_RBCH}, + PGLUE_B_REG_DBG_SELECT, PGLUE_B_REG_DBG_DWORD_ENABLE, + PGLUE_B_REG_DBG_SHIFT, PGLUE_B_REG_DBG_FORCE_VALID, + PGLUE_B_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 1 +}; + +static struct block_defs block_cnig_defs = { + "cnig", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + CNIG_REG_DBG_SELECT_K2, CNIG_REG_DBG_DWORD_ENABLE_K2, + CNIG_REG_DBG_SHIFT_K2, CNIG_REG_DBG_FORCE_VALID_K2, + CNIG_REG_DBG_FORCE_FRAME_K2, + true, false, DBG_RESET_REG_MISCS_PL_HV, 0 +}; + +static struct block_defs block_cpmu_defs = { + "cpmu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 8 +}; + +static struct block_defs block_ncsi_defs = { + "ncsi", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + NCSI_REG_DBG_SELECT, NCSI_REG_DBG_DWORD_ENABLE, + NCSI_REG_DBG_SHIFT, NCSI_REG_DBG_FORCE_VALID, + NCSI_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 5 +}; + +static struct block_defs block_opte_defs = { + "opte", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 4 +}; + +static struct block_defs block_bmb_defs = { + "bmb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCB}, + BMB_REG_DBG_SELECT, BMB_REG_DBG_DWORD_ENABLE, + BMB_REG_DBG_SHIFT, BMB_REG_DBG_FORCE_VALID, + BMB_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_UA, 7 +}; + +static struct block_defs block_pcie_defs = { + "pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp_defs = { + "mcp", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_mcp2_defs = { + "mcp2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ, DBG_BUS_CLIENT_RBCZ}, + MCP2_REG_DBG_SELECT, MCP2_REG_DBG_DWORD_ENABLE, + MCP2_REG_DBG_SHIFT, MCP2_REG_DBG_FORCE_VALID, + MCP2_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_pswhst_defs = { + "pswhst", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST_REG_DBG_SELECT, PSWHST_REG_DBG_DWORD_ENABLE, + PSWHST_REG_DBG_SHIFT, PSWHST_REG_DBG_FORCE_VALID, + PSWHST_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswhst2_defs = { + "pswhst2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWHST2_REG_DBG_SELECT, PSWHST2_REG_DBG_DWORD_ENABLE, + PSWHST2_REG_DBG_SHIFT, PSWHST2_REG_DBG_FORCE_VALID, + PSWHST2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 0 +}; + +static struct block_defs block_pswrd_defs = { + "pswrd", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD_REG_DBG_SELECT, PSWRD_REG_DBG_DWORD_ENABLE, + PSWRD_REG_DBG_SHIFT, PSWRD_REG_DBG_FORCE_VALID, + PSWRD_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswrd2_defs = { + "pswrd2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRD2_REG_DBG_SELECT, PSWRD2_REG_DBG_DWORD_ENABLE, + PSWRD2_REG_DBG_SHIFT, PSWRD2_REG_DBG_FORCE_VALID, + PSWRD2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 2 +}; + +static struct block_defs block_pswwr_defs = { + "pswwr", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWWR_REG_DBG_SELECT, PSWWR_REG_DBG_DWORD_ENABLE, + PSWWR_REG_DBG_SHIFT, PSWWR_REG_DBG_FORCE_VALID, + PSWWR_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswwr2_defs = { + "pswwr2", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISC_PL_HV, 3 +}; + +static struct block_defs block_pswrq_defs = { + "pswrq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ_REG_DBG_SELECT, PSWRQ_REG_DBG_DWORD_ENABLE, + PSWRQ_REG_DBG_SHIFT, PSWRQ_REG_DBG_FORCE_VALID, + PSWRQ_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pswrq2_defs = { + "pswrq2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PSWRQ2_REG_DBG_SELECT, PSWRQ2_REG_DBG_DWORD_ENABLE, + PSWRQ2_REG_DBG_SHIFT, PSWRQ2_REG_DBG_FORCE_VALID, + PSWRQ2_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISC_PL_HV, 1 +}; + +static struct block_defs block_pglcs_defs = { + "pglcs", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PGLCS_REG_DBG_SELECT, PGLCS_REG_DBG_DWORD_ENABLE, + PGLCS_REG_DBG_SHIFT, PGLCS_REG_DBG_FORCE_VALID, + PGLCS_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 2 +}; + +static struct block_defs block_ptu_defs = { + "ptu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + PTU_REG_DBG_SELECT, PTU_REG_DBG_DWORD_ENABLE, + PTU_REG_DBG_SHIFT, PTU_REG_DBG_FORCE_VALID, + PTU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 20 +}; + +static struct block_defs block_dmae_defs = { + "dmae", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + DMAE_REG_DBG_SELECT, DMAE_REG_DBG_DWORD_ENABLE, + DMAE_REG_DBG_SHIFT, DMAE_REG_DBG_FORCE_VALID, + DMAE_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 28 +}; + +static struct block_defs block_tcm_defs = { + "tcm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TCM_REG_DBG_SELECT, TCM_REG_DBG_DWORD_ENABLE, + TCM_REG_DBG_SHIFT, TCM_REG_DBG_FORCE_VALID, + TCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 5 +}; + +static struct block_defs block_mcm_defs = { + "mcm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MCM_REG_DBG_SELECT, MCM_REG_DBG_DWORD_ENABLE, + MCM_REG_DBG_SHIFT, MCM_REG_DBG_FORCE_VALID, + MCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 3 +}; + +static struct block_defs block_ucm_defs = { + "ucm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + UCM_REG_DBG_SELECT, UCM_REG_DBG_DWORD_ENABLE, + UCM_REG_DBG_SHIFT, UCM_REG_DBG_FORCE_VALID, + UCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 8 +}; + +static struct block_defs block_xcm_defs = { + "xcm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XCM_REG_DBG_SELECT, XCM_REG_DBG_DWORD_ENABLE, + XCM_REG_DBG_SHIFT, XCM_REG_DBG_FORCE_VALID, + XCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 19 +}; + +static struct block_defs block_ycm_defs = { + "ycm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YCM_REG_DBG_SELECT, YCM_REG_DBG_DWORD_ENABLE, + YCM_REG_DBG_SHIFT, YCM_REG_DBG_FORCE_VALID, + YCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 5 +}; + +static struct block_defs block_pcm_defs = { + "pcm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PCM_REG_DBG_SELECT, PCM_REG_DBG_DWORD_ENABLE, + PCM_REG_DBG_SHIFT, PCM_REG_DBG_FORCE_VALID, + PCM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 4 +}; + +static struct block_defs block_qm_defs = { + "qm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCQ}, + QM_REG_DBG_SELECT, QM_REG_DBG_DWORD_ENABLE, + QM_REG_DBG_SHIFT, QM_REG_DBG_FORCE_VALID, + QM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 16 +}; + +static struct block_defs block_tm_defs = { + "tm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TM_REG_DBG_SELECT, TM_REG_DBG_DWORD_ENABLE, + TM_REG_DBG_SHIFT, TM_REG_DBG_FORCE_VALID, + TM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 17 +}; + +static struct block_defs block_dorq_defs = { + "dorq", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + DORQ_REG_DBG_SELECT, DORQ_REG_DBG_DWORD_ENABLE, + DORQ_REG_DBG_SHIFT, DORQ_REG_DBG_FORCE_VALID, + DORQ_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 18 +}; + +static struct block_defs block_brb_defs = { + "brb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + BRB_REG_DBG_SELECT, BRB_REG_DBG_DWORD_ENABLE, + BRB_REG_DBG_SHIFT, BRB_REG_DBG_FORCE_VALID, + BRB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 0 +}; + +static struct block_defs block_src_defs = { + "src", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + SRC_REG_DBG_SELECT, SRC_REG_DBG_DWORD_ENABLE, + SRC_REG_DBG_SHIFT, SRC_REG_DBG_FORCE_VALID, + SRC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 2 +}; + +static struct block_defs block_prs_defs = { + "prs", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR}, + PRS_REG_DBG_SELECT, PRS_REG_DBG_DWORD_ENABLE, + PRS_REG_DBG_SHIFT, PRS_REG_DBG_FORCE_VALID, + PRS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 1 +}; + +static struct block_defs block_tsdm_defs = { + "tsdm", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSDM_REG_DBG_SELECT, TSDM_REG_DBG_DWORD_ENABLE, + TSDM_REG_DBG_SHIFT, TSDM_REG_DBG_FORCE_VALID, + TSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 3 +}; + +static struct block_defs block_msdm_defs = { + "msdm", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSDM_REG_DBG_SELECT, MSDM_REG_DBG_DWORD_ENABLE, + MSDM_REG_DBG_SHIFT, MSDM_REG_DBG_FORCE_VALID, + MSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 6 +}; + +static struct block_defs block_usdm_defs = { + "usdm", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USDM_REG_DBG_SELECT, USDM_REG_DBG_DWORD_ENABLE, + USDM_REG_DBG_SHIFT, USDM_REG_DBG_FORCE_VALID, + USDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 7 +}; + +static struct block_defs block_xsdm_defs = { + "xsdm", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSDM_REG_DBG_SELECT, XSDM_REG_DBG_DWORD_ENABLE, + XSDM_REG_DBG_SHIFT, XSDM_REG_DBG_FORCE_VALID, + XSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 20 +}; + +static struct block_defs block_ysdm_defs = { + "ysdm", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSDM_REG_DBG_SELECT, YSDM_REG_DBG_DWORD_ENABLE, + YSDM_REG_DBG_SHIFT, YSDM_REG_DBG_FORCE_VALID, + YSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 8 +}; + +static struct block_defs block_psdm_defs = { + "psdm", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSDM_REG_DBG_SELECT, PSDM_REG_DBG_DWORD_ENABLE, + PSDM_REG_DBG_SHIFT, PSDM_REG_DBG_FORCE_VALID, + PSDM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 7 +}; + +static struct block_defs block_tsem_defs = { + "tsem", {true, true, true}, true, DBG_TSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + TSEM_REG_DBG_SELECT, TSEM_REG_DBG_DWORD_ENABLE, + TSEM_REG_DBG_SHIFT, TSEM_REG_DBG_FORCE_VALID, + TSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 4 +}; + +static struct block_defs block_msem_defs = { + "msem", {true, true, true}, true, DBG_MSTORM_ID, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + MSEM_REG_DBG_SELECT, MSEM_REG_DBG_DWORD_ENABLE, + MSEM_REG_DBG_SHIFT, MSEM_REG_DBG_FORCE_VALID, + MSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 9 +}; + +static struct block_defs block_usem_defs = { + "usem", {true, true, true}, true, DBG_USTORM_ID, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + USEM_REG_DBG_SELECT, USEM_REG_DBG_DWORD_ENABLE, + USEM_REG_DBG_SHIFT, USEM_REG_DBG_FORCE_VALID, + USEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 9 +}; + +static struct block_defs block_xsem_defs = { + "xsem", {true, true, true}, true, DBG_XSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XSEM_REG_DBG_SELECT, XSEM_REG_DBG_DWORD_ENABLE, + XSEM_REG_DBG_SHIFT, XSEM_REG_DBG_FORCE_VALID, + XSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 21 +}; + +static struct block_defs block_ysem_defs = { + "ysem", {true, true, true}, true, DBG_YSTORM_ID, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCY}, + YSEM_REG_DBG_SELECT, YSEM_REG_DBG_DWORD_ENABLE, + YSEM_REG_DBG_SHIFT, YSEM_REG_DBG_FORCE_VALID, + YSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 11 +}; + +static struct block_defs block_psem_defs = { + "psem", {true, true, true}, true, DBG_PSTORM_ID, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + PSEM_REG_DBG_SELECT, PSEM_REG_DBG_DWORD_ENABLE, + PSEM_REG_DBG_SHIFT, PSEM_REG_DBG_FORCE_VALID, + PSEM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 10 +}; + +static struct block_defs block_rss_defs = { + "rss", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT}, + RSS_REG_DBG_SELECT, RSS_REG_DBG_DWORD_ENABLE, + RSS_REG_DBG_SHIFT, RSS_REG_DBG_FORCE_VALID, + RSS_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 18 +}; + +static struct block_defs block_tmld_defs = { + "tmld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + TMLD_REG_DBG_SELECT, TMLD_REG_DBG_DWORD_ENABLE, + TMLD_REG_DBG_SHIFT, TMLD_REG_DBG_FORCE_VALID, + TMLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 13 +}; + +static struct block_defs block_muld_defs = { + "muld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + MULD_REG_DBG_SELECT, MULD_REG_DBG_DWORD_ENABLE, + MULD_REG_DBG_SHIFT, MULD_REG_DBG_FORCE_VALID, + MULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 14 +}; + +static struct block_defs block_yuld_defs = { + "yuld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU, DBG_BUS_CLIENT_RBCU}, + YULD_REG_DBG_SELECT, YULD_REG_DBG_DWORD_ENABLE, + YULD_REG_DBG_SHIFT, YULD_REG_DBG_FORCE_VALID, + YULD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 15 +}; + +static struct block_defs block_xyld_defs = { + "xyld", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX, DBG_BUS_CLIENT_RBCX}, + XYLD_REG_DBG_SELECT, XYLD_REG_DBG_DWORD_ENABLE, + XYLD_REG_DBG_SHIFT, XYLD_REG_DBG_FORCE_VALID, + XYLD_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 12 +}; + +static struct block_defs block_prm_defs = { + "prm", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + PRM_REG_DBG_SELECT, PRM_REG_DBG_DWORD_ENABLE, + PRM_REG_DBG_SHIFT, PRM_REG_DBG_FORCE_VALID, + PRM_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 21 +}; + +static struct block_defs block_pbf_pb1_defs = { + "pbf_pb1", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB1_REG_DBG_SELECT, PBF_PB1_REG_DBG_DWORD_ENABLE, + PBF_PB1_REG_DBG_SHIFT, PBF_PB1_REG_DBG_FORCE_VALID, + PBF_PB1_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 11 +}; + +static struct block_defs block_pbf_pb2_defs = { + "pbf_pb2", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_PB2_REG_DBG_SELECT, PBF_PB2_REG_DBG_DWORD_ENABLE, + PBF_PB2_REG_DBG_SHIFT, PBF_PB2_REG_DBG_FORCE_VALID, + PBF_PB2_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + 12 +}; + +static struct block_defs block_rpb_defs = { + "rpb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RPB_REG_DBG_SELECT, RPB_REG_DBG_DWORD_ENABLE, + RPB_REG_DBG_SHIFT, RPB_REG_DBG_FORCE_VALID, + RPB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 13 +}; + +static struct block_defs block_btb_defs = { + "btb", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCR, DBG_BUS_CLIENT_RBCV}, + BTB_REG_DBG_SELECT, BTB_REG_DBG_DWORD_ENABLE, + BTB_REG_DBG_SHIFT, BTB_REG_DBG_FORCE_VALID, + BTB_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 10 +}; + +static struct block_defs block_pbf_defs = { + "pbf", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCV}, + PBF_REG_DBG_SELECT, PBF_REG_DBG_DWORD_ENABLE, + PBF_REG_DBG_SHIFT, PBF_REG_DBG_FORCE_VALID, + PBF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 15 +}; + +static struct block_defs block_rdif_defs = { + "rdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCT, DBG_BUS_CLIENT_RBCM}, + RDIF_REG_DBG_SELECT, RDIF_REG_DBG_DWORD_ENABLE, + RDIF_REG_DBG_SHIFT, RDIF_REG_DBG_FORCE_VALID, + RDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 16 +}; + +static struct block_defs block_tdif_defs = { + "tdif", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS, DBG_BUS_CLIENT_RBCS}, + TDIF_REG_DBG_SELECT, TDIF_REG_DBG_DWORD_ENABLE, + TDIF_REG_DBG_SHIFT, TDIF_REG_DBG_FORCE_VALID, + TDIF_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 17 +}; + +static struct block_defs block_cdu_defs = { + "cdu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CDU_REG_DBG_SELECT, CDU_REG_DBG_DWORD_ENABLE, + CDU_REG_DBG_SHIFT, CDU_REG_DBG_FORCE_VALID, + CDU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 23 +}; + +static struct block_defs block_ccfc_defs = { + "ccfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + CCFC_REG_DBG_SELECT, CCFC_REG_DBG_DWORD_ENABLE, + CCFC_REG_DBG_SHIFT, CCFC_REG_DBG_FORCE_VALID, + CCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 24 +}; + +static struct block_defs block_tcfc_defs = { + "tcfc", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF, DBG_BUS_CLIENT_RBCF}, + TCFC_REG_DBG_SELECT, TCFC_REG_DBG_DWORD_ENABLE, + TCFC_REG_DBG_SHIFT, TCFC_REG_DBG_FORCE_VALID, + TCFC_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 25 +}; + +static struct block_defs block_igu_defs = { + "igu", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + IGU_REG_DBG_SELECT, IGU_REG_DBG_DWORD_ENABLE, + IGU_REG_DBG_SHIFT, IGU_REG_DBG_FORCE_VALID, + IGU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, 27 +}; + +static struct block_defs block_cau_defs = { + "cau", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP, DBG_BUS_CLIENT_RBCP}, + CAU_REG_DBG_SELECT, CAU_REG_DBG_DWORD_ENABLE, + CAU_REG_DBG_SHIFT, CAU_REG_DBG_FORCE_VALID, + CAU_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, 19 +}; + +static struct block_defs block_umac_defs = { + "umac", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + UMAC_REG_DBG_SELECT, UMAC_REG_DBG_DWORD_ENABLE, + UMAC_REG_DBG_SHIFT, UMAC_REG_DBG_FORCE_VALID, + UMAC_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV, 6 +}; + +static struct block_defs block_xmac_defs = { + "xmac", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_dbg_defs = { + "dbg", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 3 +}; + +static struct block_defs block_nig_defs = { + "nig", {true, true, true}, false, 0, + {DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN, DBG_BUS_CLIENT_RBCN}, + NIG_REG_DBG_SELECT, NIG_REG_DBG_DWORD_ENABLE, + NIG_REG_DBG_SHIFT, NIG_REG_DBG_FORCE_VALID, + NIG_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 0 +}; + +static struct block_defs block_wol_defs = { + "wol", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCZ}, + WOL_REG_DBG_SELECT, WOL_REG_DBG_DWORD_ENABLE, + WOL_REG_DBG_SHIFT, WOL_REG_DBG_FORCE_VALID, + WOL_REG_DBG_FORCE_FRAME, + true, true, DBG_RESET_REG_MISC_PL_PDA_VAUX, 7 +}; + +static struct block_defs block_bmbn_defs = { + "bmbn", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCB}, + BMBN_REG_DBG_SELECT, BMBN_REG_DBG_DWORD_ENABLE, + BMBN_REG_DBG_SHIFT, BMBN_REG_DBG_FORCE_VALID, + BMBN_REG_DBG_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_ipc_defs = { + "ipc", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_UA, 8 +}; + +static struct block_defs block_nwm_defs = { + "nwm", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCW}, + NWM_REG_DBG_SELECT, NWM_REG_DBG_DWORD_ENABLE, + NWM_REG_DBG_SHIFT, NWM_REG_DBG_FORCE_VALID, + NWM_REG_DBG_FORCE_FRAME, + true, false, DBG_RESET_REG_MISCS_PL_HV_2, 0 +}; + +static struct block_defs block_nws_defs = { + "nws", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 12 +}; + +static struct block_defs block_ms_defs = { + "ms", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, false, DBG_RESET_REG_MISCS_PL_HV, 13 +}; + +static struct block_defs block_phy_pcie_defs = { + "phy_pcie", {false, false, true}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, DBG_BUS_CLIENT_RBCH}, + PCIE_REG_DBG_COMMON_SELECT, PCIE_REG_DBG_COMMON_DWORD_ENABLE, + PCIE_REG_DBG_COMMON_SHIFT, PCIE_REG_DBG_COMMON_FORCE_VALID, + PCIE_REG_DBG_COMMON_FORCE_FRAME, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_led_defs = { + "led", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + true, true, DBG_RESET_REG_MISCS_PL_HV, 14 +}; + +static struct block_defs block_misc_aeu_defs = { + "misc_aeu", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs block_bar0_map_defs = { + "bar0_map", {false, false, false}, false, 0, + {MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS, MAX_DBG_BUS_CLIENTS}, + 0, 0, 0, 0, 0, + false, false, MAX_DBG_RESET_REGS, 0 +}; + +static struct block_defs *s_block_defs[MAX_BLOCK_ID] = { + &block_grc_defs, + &block_miscs_defs, + &block_misc_defs, + &block_dbu_defs, + &block_pglue_b_defs, + &block_cnig_defs, + &block_cpmu_defs, + &block_ncsi_defs, + &block_opte_defs, + &block_bmb_defs, + &block_pcie_defs, + &block_mcp_defs, + &block_mcp2_defs, + &block_pswhst_defs, + &block_pswhst2_defs, + &block_pswrd_defs, + &block_pswrd2_defs, + &block_pswwr_defs, + &block_pswwr2_defs, + &block_pswrq_defs, + &block_pswrq2_defs, + &block_pglcs_defs, + &block_dmae_defs, + &block_ptu_defs, + &block_tcm_defs, + &block_mcm_defs, + &block_ucm_defs, + &block_xcm_defs, + &block_ycm_defs, + &block_pcm_defs, + &block_qm_defs, + &block_tm_defs, + &block_dorq_defs, + &block_brb_defs, + &block_src_defs, + &block_prs_defs, + &block_tsdm_defs, + &block_msdm_defs, + &block_usdm_defs, + &block_xsdm_defs, + &block_ysdm_defs, + &block_psdm_defs, + &block_tsem_defs, + &block_msem_defs, + &block_usem_defs, + &block_xsem_defs, + &block_ysem_defs, + &block_psem_defs, + &block_rss_defs, + &block_tmld_defs, + &block_muld_defs, + &block_yuld_defs, + &block_xyld_defs, + &block_prm_defs, + &block_pbf_pb1_defs, + &block_pbf_pb2_defs, + &block_rpb_defs, + &block_btb_defs, + &block_pbf_defs, + &block_rdif_defs, + &block_tdif_defs, + &block_cdu_defs, + &block_ccfc_defs, + &block_tcfc_defs, + &block_igu_defs, + &block_cau_defs, + &block_umac_defs, + &block_xmac_defs, + &block_dbg_defs, + &block_nig_defs, + &block_wol_defs, + &block_bmbn_defs, + &block_ipc_defs, + &block_nwm_defs, + &block_nws_defs, + &block_ms_defs, + &block_phy_pcie_defs, + &block_led_defs, + &block_misc_aeu_defs, + &block_bar0_map_defs, +}; + +static struct platform_defs s_platform_defs[] = { + {"asic", 1}, + {"reserved", 0}, + {"reserved2", 0}, + {"reserved3", 0} +}; + +static struct grc_param_defs s_grc_param_defs[] = { + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_TSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_MSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_USTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_XSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_YSTORM */ + {{1, 1, 1}, 0, 1, false, 1, 1}, /* DBG_GRC_PARAM_DUMP_PSTORM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_REGS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RAM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PBUF */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IOR */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_VFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM_CTX */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_ILT */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_RSS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CAU */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_QM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MCP */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_RESERVED */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CFC */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_IGU */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BRB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BTB */ + {{0, 0, 0}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_BMB */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_NIG */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_MULD */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_PRS */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DMAE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_TM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_SDM */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_DIF */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_STATIC */ + {{0, 0, 0}, 0, 1, false, 0, 0}, /* DBG_GRC_PARAM_UNSTALL */ + {{MAX_LCIDS, MAX_LCIDS, MAX_LCIDS}, 1, MAX_LCIDS, false, MAX_LCIDS, + MAX_LCIDS}, /* DBG_GRC_PARAM_NUM_LCIDS */ + {{MAX_LTIDS, MAX_LTIDS, MAX_LTIDS}, 1, MAX_LTIDS, false, MAX_LTIDS, + MAX_LTIDS}, /* DBG_GRC_PARAM_NUM_LTIDS */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_EXCLUDE_ALL */ + {{0, 0, 0}, 0, 1, true, 0, 0}, /* DBG_GRC_PARAM_CRASH */ + {{0, 0, 0}, 0, 1, false, 1, 0}, /* DBG_GRC_PARAM_PARITY_SAFE */ + {{1, 1, 1}, 0, 1, false, 0, 1}, /* DBG_GRC_PARAM_DUMP_CM */ + {{1, 1, 1}, 0, 1, false, 0, 1} /* DBG_GRC_PARAM_DUMP_PHY */ +}; + +static struct rss_mem_defs s_rss_mem_defs[] = { + { "rss_mem_cid", "rss_cid", 0, + {256, 256, 320}, + {32, 32, 32} }, + { "rss_mem_key_msb", "rss_key", 1024, + {128, 128, 208}, + {256, 256, 256} }, + { "rss_mem_key_lsb", "rss_key", 2048, + {128, 128, 208}, + {64, 64, 64} }, + { "rss_mem_info", "rss_info", 3072, + {128, 128, 208}, + {16, 16, 16} }, + { "rss_mem_ind", "rss_ind", 4096, + {(128 * 128), (128 * 128), (128 * 208)}, + {16, 16, 16} } +}; + +static struct vfc_ram_defs s_vfc_ram_defs[] = { + {"vfc_ram_tt1", "vfc_ram", 0, 512}, + {"vfc_ram_mtt2", "vfc_ram", 512, 128}, + {"vfc_ram_stt2", "vfc_ram", 640, 32}, + {"vfc_ram_ro_vect", "vfc_ram", 672, 32} +}; + +static struct big_ram_defs s_big_ram_defs[] = { + { "BRB", MEM_GROUP_BRB_MEM, MEM_GROUP_BRB_RAM, DBG_GRC_PARAM_DUMP_BRB, + BRB_REG_BIG_RAM_ADDRESS, BRB_REG_BIG_RAM_DATA, + {4800, 4800, 5632} }, + { "BTB", MEM_GROUP_BTB_MEM, MEM_GROUP_BTB_RAM, DBG_GRC_PARAM_DUMP_BTB, + BTB_REG_BIG_RAM_ADDRESS, BTB_REG_BIG_RAM_DATA, + {2880, 2880, 3680} }, + { "BMB", MEM_GROUP_BMB_MEM, MEM_GROUP_BMB_RAM, DBG_GRC_PARAM_DUMP_BMB, + BMB_REG_BIG_RAM_ADDRESS, BMB_REG_BIG_RAM_DATA, + {1152, 1152, 1152} } +}; + +static struct reset_reg_defs s_reset_regs_defs[] = { + { MISCS_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_UA */ + { MISCS_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISCS_PL_HV */ + { MISCS_REG_RESET_PL_HV_2, 0x0, + {false, false, true} }, /* DBG_RESET_REG_MISCS_PL_HV_2 */ + { MISC_REG_RESET_PL_UA, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_UA */ + { MISC_REG_RESET_PL_HV, 0x0, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_HV */ + { MISC_REG_RESET_PL_PDA_VMAIN_1, 0x4404040, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_1 */ + { MISC_REG_RESET_PL_PDA_VMAIN_2, 0x7c00007, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VMAIN_2 */ + { MISC_REG_RESET_PL_PDA_VAUX, 0x2, + {true, true, true} }, /* DBG_RESET_REG_MISC_PL_PDA_VAUX */ +}; + +static struct phy_defs s_phy_defs[] = { + {"nw_phy", NWS_REG_NWS_CMU, PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0, + PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8}, + {"sgmii_phy", MS_REG_MS_CMU, PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130, + PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy0", PHY_PCIE_REG_PHY0, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, + {"pcie_phy1", PHY_PCIE_REG_PHY1, PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130, + PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131}, +}; + +/**************************** Private Functions ******************************/ + +/* Reads and returns a single dword from the specified unaligned buffer */ +static u32 qed_read_unaligned_dword(u8 *buf) +{ + u32 dword; + + memcpy((u8 *)&dword, buf, sizeof(dword)); + return dword; +} + +/* Initializes debug data for the specified device */ +static enum dbg_status qed_dbg_dev_init(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + if (dev_data->initialized) + return DBG_STATUS_OK; + + if (QED_IS_K2(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_K2; + dev_data->mode_enable[MODE_K2] = 1; + } else if (QED_IS_BB_B0(p_hwfn->cdev)) { + dev_data->chip_id = CHIP_BB_B0; + dev_data->mode_enable[MODE_BB_B0] = 1; + } else { + return DBG_STATUS_UNKNOWN_CHIP; + } + + dev_data->platform_id = PLATFORM_ASIC; + dev_data->mode_enable[MODE_ASIC] = 1; + dev_data->initialized = true; + return DBG_STATUS_OK; +} + +/* Reads the FW info structure for the specified Storm from the chip, + * and writes it to the specified fw_info pointer. + */ +static void qed_read_fw_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u8 storm_id, struct fw_info *fw_info) +{ + /* Read first the address that points to fw_info location. + * The address is located in the last line of the Storm RAM. + */ + u32 addr = s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + DWORDS_TO_BYTES(SEM_FAST_REG_INT_RAM_SIZE) - + sizeof(struct fw_info_location); + struct fw_info_location fw_info_location; + u32 *dest = (u32 *)&fw_info_location; + u32 i; + + memset(&fw_info_location, 0, sizeof(fw_info_location)); + memset(fw_info, 0, sizeof(*fw_info)); + for (i = 0; i < BYTES_TO_DWORDS(sizeof(fw_info_location)); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + if (fw_info_location.size > 0 && fw_info_location.size <= + sizeof(*fw_info)) { + /* Read FW version info from Storm RAM */ + addr = fw_info_location.grc_addr; + dest = (u32 *)fw_info; + for (i = 0; i < BYTES_TO_DWORDS(fw_info_location.size); + i++, addr += BYTES_IN_DWORD) + dest[i] = qed_rd(p_hwfn, p_ptt, addr); + } +} + +/* Dumps the specified string to the specified buffer. Returns the dumped size + * in bytes (actual length + 1 for the null character termination). + */ +static u32 qed_dump_str(char *dump_buf, bool dump, const char *str) +{ + if (dump) + strcpy(dump_buf, str); + return (u32)strlen(str) + 1; +} + +/* Dumps zeros to align the specified buffer to dwords. Returns the dumped size + * in bytes. + */ +static u32 qed_dump_align(char *dump_buf, bool dump, u32 byte_offset) +{ + u8 offset_in_dword = (u8)(byte_offset & 0x3), align_size; + + align_size = offset_in_dword ? BYTES_IN_DWORD - offset_in_dword : 0; + + if (dump && align_size) + memset(dump_buf, 0, align_size); + return align_size; +} + +/* Writes the specified string param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_str_param(u32 *dump_buf, + bool dump, + const char *param_name, const char *param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a string param value */ + if (dump) + *(char_buf + offset) = 1; + offset++; + + /* Dump param value */ + offset += qed_dump_str(char_buf + offset, dump, param_val); + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + return BYTES_TO_DWORDS(offset); +} + +/* Writes the specified numeric param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_num_param(u32 *dump_buf, + bool dump, const char *param_name, u32 param_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; + + /* Dump param name */ + offset += qed_dump_str(char_buf + offset, dump, param_name); + + /* Indicate a numeric param value */ + if (dump) + *(char_buf + offset) = 0; + offset++; + + /* Align buffer to next dword */ + offset += qed_dump_align(char_buf + offset, dump, offset); + + /* Dump param value (and change offset from bytes to dwords) */ + offset = BYTES_TO_DWORDS(offset); + if (dump) + *(dump_buf + offset) = param_val; + offset++; + return offset; +} + +/* Reads the FW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_fw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char fw_ver_str[16] = EMPTY_FW_VERSION_STR; + char fw_img_str[16] = EMPTY_FW_IMAGE_STR; + struct fw_info fw_info = { {0}, {0} }; + int printed_chars; + u32 offset = 0; + + if (dump) { + /* Read FW image/version from PRAM in a non-reset SEMI */ + bool found = false; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS && !found; + storm_id++) { + /* Read FW version/image */ + if (!dev_data->block_in_reset + [s_storm_defs[storm_id].block_id]) { + /* read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, + p_ptt, storm_id, &fw_info); + + /* Create FW version/image strings */ + printed_chars = + snprintf(fw_ver_str, + sizeof(fw_ver_str), + "%d_%d_%d_%d", + fw_info.ver.num.major, + fw_info.ver.num.minor, + fw_info.ver.num.rev, + fw_info.ver.num.eng); + if (printed_chars < 0 || printed_chars >= + sizeof(fw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid FW version string\n"); + switch (fw_info.ver.image_id) { + case FW_IMG_MAIN: + strcpy(fw_img_str, "main"); + break; + default: + strcpy(fw_img_str, "unknown"); + break; + } + + found = true; + } + } + } + + /* Dump FW version, image and timestamp */ + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-version", fw_ver_str); + offset += qed_dump_str_param(dump_buf + offset, + dump, "fw-image", fw_img_str); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "fw-timestamp", fw_info.ver.timestamp); + return offset; +} + +/* Reads the MFW version and writes it as a param to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_mfw_ver_param(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + char mfw_ver_str[16] = EMPTY_FW_VERSION_STR; + + if (dump) { + u32 global_section_offsize, global_section_addr, mfw_ver; + u32 public_data_addr, global_section_offsize_addr; + int printed_chars; + + /* Find MCP public data GRC address. + * Needs to be ORed with MCP_REG_SCRATCH due to a HW bug. + */ + public_data_addr = qed_rd(p_hwfn, p_ptt, + MISC_REG_SHARED_MEM_ADDR) | + MCP_REG_SCRATCH; + + /* Find MCP public global section offset */ + global_section_offsize_addr = public_data_addr + + offsetof(struct mcp_public_data, + sections) + + sizeof(offsize_t) * PUBLIC_GLOBAL; + global_section_offsize = qed_rd(p_hwfn, p_ptt, + global_section_offsize_addr); + global_section_addr = MCP_REG_SCRATCH + + (global_section_offsize & + OFFSIZE_OFFSET_MASK) * 4; + + /* Read MFW version from MCP public global section */ + mfw_ver = qed_rd(p_hwfn, p_ptt, + global_section_addr + + offsetof(struct public_global, mfw_ver)); + + /* Dump MFW version param */ + printed_chars = snprintf(mfw_ver_str, sizeof(mfw_ver_str), + "%d_%d_%d_%d", + (u8) (mfw_ver >> 24), + (u8) (mfw_ver >> 16), + (u8) (mfw_ver >> 8), + (u8) mfw_ver); + if (printed_chars < 0 || printed_chars >= sizeof(mfw_ver_str)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid MFW version string\n"); + } + + return qed_dump_str_param(dump_buf, dump, "mfw-version", mfw_ver_str); +} + +/* Writes a section header to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_section_hdr(u32 *dump_buf, + bool dump, const char *name, u32 num_params) +{ + return qed_dump_num_param(dump_buf, dump, name, num_params); +} + +/* Writes the common global params to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_common_global_params(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 num_specific_global_params) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + + /* Find platform string and dump global params section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, + "global_params", + NUM_COMMON_GLOBAL_PARAMS + + num_specific_global_params); + + /* Store params */ + offset += qed_dump_fw_ver_param(p_hwfn, p_ptt, dump_buf + offset, dump); + offset += qed_dump_mfw_ver_param(p_hwfn, + p_ptt, dump_buf + offset, dump); + offset += qed_dump_num_param(dump_buf + offset, + dump, "tools-version", TOOLS_VERSION); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "chip", + s_chip_defs[dev_data->chip_id].name); + offset += qed_dump_str_param(dump_buf + offset, + dump, + "platform", + s_platform_defs[dev_data->platform_id]. + name); + offset += + qed_dump_num_param(dump_buf + offset, dump, "pci-func", + p_hwfn->abs_pf_id); + return offset; +} + +/* Writes the last section to the specified buffer at the given offset. + * Returns the dumped size in dwords. + */ +static u32 qed_dump_last_section(u32 *dump_buf, u32 offset, bool dump) +{ + u32 start_offset = offset, crc = ~0; + + /* Dump CRC section header */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + + /* Calculate CRC32 and add it to the dword following the "last" section. + */ + if (dump) + *(dump_buf + offset) = ~crc32(crc, (u8 *)dump_buf, + DWORDS_TO_BYTES(offset)); + offset++; + return offset - start_offset; +} + +/* Update blocks reset state */ +static void qed_update_blocks_reset_state(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Read reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) + reg_val[i] = qed_rd(p_hwfn, + p_ptt, s_reset_regs_defs[i].addr); + + /* Check if blocks are in reset */ + for (i = 0; i < MAX_BLOCK_ID; i++) + dev_data->block_in_reset[i] = + s_block_defs[i]->has_reset_bit && + !(reg_val[s_block_defs[i]->reset_reg] & + BIT(s_block_defs[i]->reset_bit_offset)); +} + +/* Enable / disable the Debug block */ +static void qed_bus_enable_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool enable) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_DBG_BLOCK_ON, enable ? 1 : 0); +} + +/* Resets the Debug block */ +static void qed_bus_reset_dbg_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + u32 dbg_reset_reg_addr, old_reset_reg_val, new_reset_reg_val; + + dbg_reset_reg_addr = + s_reset_regs_defs[s_block_defs[BLOCK_DBG]->reset_reg].addr; + old_reset_reg_val = qed_rd(p_hwfn, p_ptt, dbg_reset_reg_addr); + new_reset_reg_val = old_reset_reg_val & + ~BIT(s_block_defs[BLOCK_DBG]->reset_bit_offset); + + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, new_reset_reg_val); + qed_wr(p_hwfn, p_ptt, dbg_reset_reg_addr, old_reset_reg_val); +} + +static void qed_bus_set_framing_mode(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum dbg_bus_frame_modes mode) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_FRAMING_MODE, (u8)mode); +} + +/* Enable / disable Debug Bus clients according to the specified mask. + * (1 = enable, 0 = disable) + */ +static void qed_bus_enable_clients(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 client_mask) +{ + qed_wr(p_hwfn, p_ptt, DBG_REG_CLIENT_ENABLE, client_mask); +} + +static bool qed_is_mode_match(struct qed_hwfn *p_hwfn, u16 *modes_buf_offset) +{ + const u32 *ptr = s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 tree_val = ((u8 *)ptr)[(*modes_buf_offset)++]; + bool arg1, arg2; + + switch (tree_val) { + case INIT_MODE_OP_NOT: + return !qed_is_mode_match(p_hwfn, modes_buf_offset); + case INIT_MODE_OP_OR: + case INIT_MODE_OP_AND: + arg1 = qed_is_mode_match(p_hwfn, modes_buf_offset); + arg2 = qed_is_mode_match(p_hwfn, modes_buf_offset); + return (tree_val == INIT_MODE_OP_OR) ? (arg1 || + arg2) : (arg1 && arg2); + default: + return dev_data->mode_enable[tree_val - MAX_INIT_MODE_OPS] > 0; + } +} + +/* Returns the value of the specified GRC param */ +static u32 qed_grc_get_param(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + return dev_data->grc.param_val[grc_param]; +} + +/* Clear all GRC params */ +static void qed_dbg_grc_clear_params(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + dev_data->grc.param_set_by_user[i] = 0; +} + +/* Assign default GRC param values */ +static void qed_dbg_grc_set_params_default(struct qed_hwfn *p_hwfn) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i; + + for (i = 0; i < MAX_DBG_GRC_PARAMS; i++) + if (!dev_data->grc.param_set_by_user[i]) + dev_data->grc.param_val[i] = + s_grc_param_defs[i].default_val[dev_data->chip_id]; +} + +/* Returns true if the specified entity (indicated by GRC param) should be + * included in the dump, false otherwise. + */ +static bool qed_grc_is_included(struct qed_hwfn *p_hwfn, + enum dbg_grc_params grc_param) +{ + return qed_grc_get_param(p_hwfn, grc_param) > 0; +} + +/* Returns true of the specified Storm should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_storm_included(struct qed_hwfn *p_hwfn, + enum dbg_storms storm) +{ + return qed_grc_get_param(p_hwfn, (enum dbg_grc_params)storm) > 0; +} + +/* Returns true if the specified memory should be included in the dump, false + * otherwise. + */ +static bool qed_grc_is_mem_included(struct qed_hwfn *p_hwfn, + enum block_id block_id, u8 mem_group_id) +{ + u8 i; + + /* Check Storm match */ + if (s_block_defs[block_id]->associated_to_storm && + !qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)s_block_defs[block_id]->storm_id)) + return false; + + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (mem_group_id == s_big_ram_defs[i].mem_group_id || + mem_group_id == s_big_ram_defs[i].ram_mem_group_id) + return qed_grc_is_included(p_hwfn, + s_big_ram_defs[i].grc_param); + if (mem_group_id == MEM_GROUP_PXP_ILT || mem_group_id == + MEM_GROUP_PXP_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PXP); + if (mem_group_id == MEM_GROUP_RAM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RAM); + if (mem_group_id == MEM_GROUP_PBUF) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PBUF); + if (mem_group_id == MEM_GROUP_CAU_MEM || + mem_group_id == MEM_GROUP_CAU_SB || + mem_group_id == MEM_GROUP_CAU_PI) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CAU); + if (mem_group_id == MEM_GROUP_QM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_QM); + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM || + mem_group_id == MEM_GROUP_TASK_CFC_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CFC); + if (mem_group_id == MEM_GROUP_IGU_MEM || mem_group_id == + MEM_GROUP_IGU_MSIX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IGU); + if (mem_group_id == MEM_GROUP_MULD_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MULD); + if (mem_group_id == MEM_GROUP_PRS_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_PRS); + if (mem_group_id == MEM_GROUP_DMAE_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DMAE); + if (mem_group_id == MEM_GROUP_TM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_TM); + if (mem_group_id == MEM_GROUP_SDM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_SDM); + if (mem_group_id == MEM_GROUP_TDIF_CTX || mem_group_id == + MEM_GROUP_RDIF_CTX) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_DIF); + if (mem_group_id == MEM_GROUP_CM_MEM) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM); + if (mem_group_id == MEM_GROUP_IOR) + return qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR); + + return true; +} + +/* Stalls all Storms */ +static void qed_grc_stall_storms(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, bool stall) +{ + u8 reg_val = stall ? 1 : 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + u32 reg_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STALL_0; + + qed_wr(p_hwfn, p_ptt, reg_addr, reg_val); + } + } + + msleep(STALL_DELAY_MS); +} + +/* Takes all blocks out of reset */ +static void qed_grc_unreset_blocks(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 reg_val[MAX_DBG_RESET_REGS] = { 0 }; + u32 i; + + /* Fill reset regs values */ + for (i = 0; i < MAX_BLOCK_ID; i++) + if (s_block_defs[i]->has_reset_bit && s_block_defs[i]->unreset) + reg_val[s_block_defs[i]->reset_reg] |= + BIT(s_block_defs[i]->reset_bit_offset); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + reg_val[i] |= s_reset_regs_defs[i].unreset_val; + if (reg_val[i]) + qed_wr(p_hwfn, + p_ptt, + s_reset_regs_defs[i].addr + + RESET_REG_UNRESET_OFFSET, reg_val[i]); + } + } +} + +/* Returns the attention name offsets of the specified block */ +static const struct dbg_attn_block_type_data * +qed_get_block_attn_data(enum block_id block_id, enum dbg_attn_type attn_type) +{ + const struct dbg_attn_block *base_attn_block_arr = + (const struct dbg_attn_block *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr; + + return &base_attn_block_arr[block_id].per_type_data[attn_type]; +} + +/* Returns the attention registers of the specified block */ +static const struct dbg_attn_reg * +qed_get_block_attn_regs(enum block_id block_id, enum dbg_attn_type attn_type, + u8 *num_attn_regs) +{ + const struct dbg_attn_block_type_data *block_type_data = + qed_get_block_attn_data(block_id, attn_type); + + *num_attn_regs = block_type_data->num_regs; + return &((const struct dbg_attn_reg *) + s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr)[block_type_data-> + regs_offset]; +} + +/* For each block, clear the status of all parities */ +static void qed_grc_clear_all_prty(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 reg_idx, num_attn_regs; + u32 block_id; + + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id]) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + + /* Check mode */ + bool eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + u16 modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) + /* Mode match - read parity status read-clear + * register. + */ + qed_rd(p_hwfn, p_ptt, + DWORDS_TO_BYTES(reg_data-> + sts_clr_address)); + } + } +} + +/* Dumps GRC registers section header. Returns the dumped size in dwords. + * The following parameters are dumped: + * - 'count' = num_dumped_entries + * - 'split' = split_type + * - 'id'i = split_id (dumped only if split_id >= 0) + * - 'param_name' = param_val (user param, dumped only if param_name != NULL and + * param_val != NULL) + */ +static u32 qed_grc_dump_regs_hdr(u32 *dump_buf, + bool dump, + u32 num_reg_entries, + const char *split_type, + int split_id, + const char *param_name, const char *param_val) +{ + u8 num_params = 2 + (split_id >= 0 ? 1 : 0) + (param_name ? 1 : 0); + u32 offset = 0; + + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_regs", num_params); + offset += qed_dump_num_param(dump_buf + offset, + dump, "count", num_reg_entries); + offset += qed_dump_str_param(dump_buf + offset, + dump, "split", split_type); + if (split_id >= 0) + offset += qed_dump_num_param(dump_buf + offset, + dump, "id", split_id); + if (param_name && param_val) + offset += qed_dump_str_param(dump_buf + offset, + dump, param_name, param_val); + return offset; +} + +/* Dumps GRC register/memory. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reg_entry(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + bool dump, u32 addr, u32 len) +{ + u32 offset = 0, i; + + if (dump) { + *(dump_buf + offset++) = addr | (len << REG_DUMP_LEN_SHIFT); + for (i = 0; i < len; i++, addr++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + DWORDS_TO_BYTES(addr)); + } else { + offset += len + 1; + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_regs_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + u32 *num_dumped_reg_entries) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + *num_dumped_reg_entries = 0; + while (input_offset < input_regs_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr = + (const struct dbg_dump_cond_hdr *) + &input_regs_arr.ptr[input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check mode/block */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match && block_enable[cond_hdr->block_id]) { + for (i = 0; i < cond_hdr->data_size; + i++, input_offset++) { + const struct dbg_dump_reg *reg = + (const struct dbg_dump_reg *) + &input_regs_arr.ptr[input_offset]; + + offset += + qed_grc_dump_reg_entry(p_hwfn, p_ptt, + dump_buf + offset, dump, + GET_FIELD(reg->data, + DBG_DUMP_REG_ADDRESS), + GET_FIELD(reg->data, + DBG_DUMP_REG_LENGTH)); + (*num_dumped_reg_entries)++; + } + } else { + input_offset += cond_hdr->data_size; + } + } + + return offset; +} + +/* Dumps GRC registers entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_split_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_regs_arr, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *split_type_name, + u32 split_id, + const char *param_name, + const char *param_val) +{ + u32 num_dumped_reg_entries, offset; + + /* Calculate register dump header size (and skip it for now) */ + offset = qed_grc_dump_regs_hdr(dump_buf, + false, + 0, + split_type_name, + split_id, param_name, param_val); + + /* Dump registers */ + offset += qed_grc_dump_regs_entries(p_hwfn, + p_ptt, + input_regs_arr, + dump_buf + offset, + dump, + block_enable, + &num_dumped_reg_entries); + + /* Write register dump header */ + if (dump && num_dumped_reg_entries > 0) + qed_grc_dump_regs_hdr(dump_buf, + dump, + num_dumped_reg_entries, + split_type_name, + split_id, param_name, param_val); + + return num_dumped_reg_entries > 0 ? offset : 0; +} + +/* Dumps registers according to the input registers array. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_registers(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + bool block_enable[MAX_BLOCK_ID], + const char *param_name, const char *param_val) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, input_offset = 0; + u8 port_id, pf_id; + + if (dump) + DP_VERBOSE(p_hwfn, QED_MSG_DEBUG, "Dumping registers...\n"); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_regs_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + case SPLIT_TYPE_VF: + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, + block_enable, + "eng", + (u32)(-1), + param_name, + param_val); + break; + case SPLIT_TYPE_PORT: + for (port_id = 0; + port_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_ports; + port_id++) { + if (dump) + qed_port_pretend(p_hwfn, p_ptt, + port_id); + offset += + qed_grc_dump_split_data(p_hwfn, p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "port", port_id, + param_name, + param_val); + } + break; + case SPLIT_TYPE_PF: + case SPLIT_TYPE_PORT_PF: + for (pf_id = 0; + pf_id < + s_chip_defs[dev_data->chip_id]. + per_platform[dev_data->platform_id].num_pfs; + pf_id++) { + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, pf_id); + offset += qed_grc_dump_split_data(p_hwfn, + p_ptt, + curr_input_regs_arr, + dump_buf + offset, + dump, block_enable, + "pf", pf_id, param_name, + param_val); + } + break; + default: + break; + } + + input_offset += split_data_size; + } + + /* Pretend to original PF */ + if (dump) + qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id); + return offset; +} + +/* Dump reset registers. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_reset_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 i, offset = 0, num_regs = 0; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write reset registers */ + for (i = 0; i < MAX_DBG_RESET_REGS; i++) { + if (s_reset_regs_defs[i].exists[dev_data->chip_id]) { + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (s_reset_regs_defs + [i].addr), 1); + num_regs++; + } + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, num_regs, "eng", -1, NULL, NULL); + return offset; +} + +/* Dump registers that are modified during GRC Dump and therefore must be dumped + * first. Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_modified_regs(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, num_reg_entries = 0, block_id; + u8 storm_id, reg_idx, num_attn_regs; + + /* Calculate header size */ + offset += qed_grc_dump_regs_hdr(dump_buf, + false, 0, "eng", -1, NULL, NULL); + + /* Write parity registers */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + const struct dbg_attn_reg *attn_reg_arr; + + if (dev_data->block_in_reset[block_id] && dump) + continue; + + attn_reg_arr = qed_get_block_attn_regs((enum block_id)block_id, + ATTN_TYPE_PARITY, + &num_attn_regs); + for (reg_idx = 0; reg_idx < num_attn_regs; reg_idx++) { + const struct dbg_attn_reg *reg_data = + &attn_reg_arr[reg_idx]; + u16 modes_buf_offset; + bool eval_mode; + + /* Check mode */ + eval_mode = GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + modes_buf_offset = + GET_FIELD(reg_data->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + if (!eval_mode || + qed_is_mode_match(p_hwfn, &modes_buf_offset)) { + /* Mode match - read and dump registers */ + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + reg_data->mask_address, + 1); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + GET_FIELD(reg_data->data, + DBG_ATTN_REG_STS_ADDRESS), + 1); + num_reg_entries += 2; + } + } + } + + /* Write storm stall status registers */ + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id] && + dump) + continue; + + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS(s_storm_defs[storm_id]. + sem_fast_mem_addr + + SEM_FAST_REG_STALLED), + 1); + num_reg_entries++; + } + + /* Write header */ + if (dump) + qed_grc_dump_regs_hdr(dump_buf, + true, + num_reg_entries, "eng", -1, NULL, NULL); + return offset; +} + +/* Dumps a GRC memory header (section and params). + * The following parameters are dumped: + * name - name is dumped only if it's not NULL. + * addr - byte_addr is dumped only if name is NULL. + * len - dword_len is always dumped. + * width - bit_width is dumped if it's not zero. + * packed - packed=1 is dumped if it's not false. + * mem_group - mem_group is always dumped. + * is_storm - true only if the memory is related to a Storm. + * storm_letter - storm letter (valid only if is_storm is true). + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem_hdr(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u8 num_params = 3; + u32 offset = 0; + char buf[64]; + + if (!dword_len) + DP_NOTICE(p_hwfn, + "Unexpected GRC Dump error: dumped memory size must be non-zero\n"); + if (bit_width) + num_params++; + if (packed) + num_params++; + + /* Dump section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "grc_mem", num_params); + if (name) { + /* Dump name */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), name); + } else { + strcpy(buf, name); + } + + offset += qed_dump_str_param(dump_buf + offset, + dump, "name", buf); + if (dump) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from %s...\n", + dword_len, buf); + } else { + /* Dump address */ + offset += qed_dump_num_param(dump_buf + offset, + dump, "addr", byte_addr); + if (dump && dword_len > 64) + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "Dumping %d registers from address 0x%x...\n", + dword_len, byte_addr); + } + + /* Dump len */ + offset += qed_dump_num_param(dump_buf + offset, dump, "len", dword_len); + + /* Dump bit width */ + if (bit_width) + offset += qed_dump_num_param(dump_buf + offset, + dump, "width", bit_width); + + /* Dump packed */ + if (packed) + offset += qed_dump_num_param(dump_buf + offset, + dump, "packed", 1); + + /* Dump reg type */ + if (is_storm) { + strcpy(buf, "?STORM_"); + buf[0] = storm_letter; + strcpy(buf + strlen(buf), mem_group); + } else { + strcpy(buf, mem_group); + } + + offset += qed_dump_str_param(dump_buf + offset, dump, "type", buf); + return offset; +} + +/* Dumps a single GRC memory. If name is NULL, the memory is stored by address. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_mem(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 byte_addr, + u32 dword_len, + u32 bit_width, + bool packed, + const char *mem_group, + bool is_storm, char storm_letter) +{ + u32 offset = 0; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + byte_addr, + dword_len, + bit_width, + packed, + mem_group, is_storm, storm_letter); + if (dump) { + u32 i; + + for (i = 0; i < dword_len; + i++, byte_addr += BYTES_IN_DWORD, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, byte_addr); + } else { + offset += dword_len; + } + + return offset; +} + +/* Dumps GRC memories entries. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_mem_entries(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct dbg_array input_mems_arr, + u32 *dump_buf, bool dump) +{ + u32 i, offset = 0, input_offset = 0; + bool mode_match = true; + + while (input_offset < input_mems_arr.size_in_dwords) { + const struct dbg_dump_cond_hdr *cond_hdr; + u32 num_entries; + bool eval_mode; + + cond_hdr = (const struct dbg_dump_cond_hdr *) + &input_mems_arr.ptr[input_offset++]; + eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + + /* Check required mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (!mode_match) { + input_offset += cond_hdr->data_size; + continue; + } + + num_entries = cond_hdr->data_size / MEM_DUMP_ENTRY_SIZE_DWORDS; + for (i = 0; i < num_entries; + i++, input_offset += MEM_DUMP_ENTRY_SIZE_DWORDS) { + const struct dbg_dump_mem *mem = + (const struct dbg_dump_mem *) + &input_mems_arr.ptr[input_offset]; + u8 mem_group_id; + + mem_group_id = GET_FIELD(mem->dword0, + DBG_DUMP_MEM_MEM_GROUP_ID); + if (mem_group_id >= MEM_GROUPS_NUM) { + DP_NOTICE(p_hwfn, "Invalid mem_group_id\n"); + return 0; + } + + if (qed_grc_is_mem_included(p_hwfn, + (enum block_id)cond_hdr->block_id, + mem_group_id)) { + u32 mem_byte_addr = + DWORDS_TO_BYTES(GET_FIELD(mem->dword0, + DBG_DUMP_MEM_ADDRESS)); + u32 mem_len = GET_FIELD(mem->dword1, + DBG_DUMP_MEM_LENGTH); + char storm_letter = 'a'; + bool is_storm = false; + + /* Update memory length for CCFC/TCFC memories + * according to number of LCIDs/LTIDs. + */ + if (mem_group_id == MEM_GROUP_CONN_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS) + * (mem_len / MAX_LCIDS); + else if (mem_group_id == MEM_GROUP_TASK_CFC_MEM) + mem_len = qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS) + * (mem_len / MAX_LTIDS); + + /* If memory is associated with Storm, update + * Storm details. + */ + if (s_block_defs[cond_hdr->block_id]-> + associated_to_storm) { + is_storm = true; + storm_letter = + s_storm_defs[s_block_defs[ + cond_hdr->block_id]-> + storm_id].letter; + } + + /* Dump memory */ + offset += qed_grc_dump_mem(p_hwfn, p_ptt, + dump_buf + offset, dump, NULL, + mem_byte_addr, mem_len, 0, + false, + s_mem_group_names[mem_group_id], + is_storm, storm_letter); + } + } + } + + return offset; +} + +/* Dumps GRC memories according to the input array dump_mem. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_memories(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0; + + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].size_in_dwords) { + const struct dbg_dump_split_hdr *split_hdr = + (const struct dbg_dump_split_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset++]; + u8 split_type_id = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID); + u32 split_data_size = GET_FIELD(split_hdr->hdr, + DBG_DUMP_SPLIT_HDR_DATA_SIZE); + struct dbg_array curr_input_mems_arr = { + &s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr[input_offset], + split_data_size}; + + switch (split_type_id) { + case SPLIT_TYPE_NONE: + offset += qed_grc_dump_mem_entries(p_hwfn, + p_ptt, + curr_input_mems_arr, + dump_buf + offset, + dump); + break; + default: + DP_NOTICE(p_hwfn, + "Dumping split memories is currently not supported\n"); + break; + } + + input_offset += split_data_size; + } + + return offset; +} + +/* Dumps GRC context data for the specified Storm. + * Returns the dumped size in dwords. + */ +static u32 qed_grc_dump_ctx_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + const char *name, + u32 num_lids, + u32 lid_size, + u32 rd_reg_addr, + u8 storm_id) +{ + u32 i, lid, total_size; + u32 offset = 0; + + if (!lid_size) + return 0; + lid_size *= BYTES_IN_DWORD; + total_size = num_lids * lid_size; + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + name, + 0, + total_size, + lid_size * 32, + false, + name, + true, s_storm_defs[storm_id].letter); + + /* Dump context data */ + if (dump) { + for (lid = 0; lid < num_lids; lid++) { + for (i = 0; i < lid_size; i++, offset++) { + qed_wr(p_hwfn, + p_ptt, + s_storm_defs[storm_id].cm_ctx_wr_addr, + BIT(9) | lid); + *(dump_buf + offset) = qed_rd(p_hwfn, + p_ptt, + rd_reg_addr); + } + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dumps GRC contexts. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_ctx(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0; + u8 storm_id; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (!qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) + continue; + + /* Dump Conn AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_ag_ctx_rd_addr, + storm_id); + + /* Dump Conn ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "CONN_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS), + s_storm_defs[storm_id]. + cm_conn_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_conn_st_ctx_rd_addr, + storm_id); + + /* Dump Task AG context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_AG_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_ag_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_ag_ctx_rd_addr, + storm_id); + + /* Dump Task ST context size */ + offset += + qed_grc_dump_ctx_data(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + "TASK_ST_CTX", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS), + s_storm_defs[storm_id]. + cm_task_st_ctx_lid_size, + s_storm_defs[storm_id]. + cm_task_st_ctx_rd_addr, + storm_id); + } + + return offset; +} + +/* Dumps GRC IORs data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_iors(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + char buf[10] = "IOR_SET_?"; + u8 storm_id, set_id; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id)) { + for (set_id = 0; set_id < NUM_IOR_SETS; set_id++) { + u32 addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_STORM_REG_FILE + + DWORDS_TO_BYTES(IOR_SET_OFFSET(set_id)); + + buf[strlen(buf) - 1] = '0' + set_id; + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + buf, + addr, + IORS_PER_SET, + 32, + false, + "ior", + true, + s_storm_defs + [storm_id].letter); + } + } + } + + return offset; +} + +/* Dump VFC CAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_cam(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 storm_id) +{ + u32 total_size = VFC_CAM_NUM_ROWS * VFC_CAM_RESP_DWORDS; + u32 cam_addr[VFC_CAM_ADDR_DWORDS] = { 0 }; + u32 cam_cmd[VFC_CAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + "vfc_cam", + 0, + total_size, + 256, + false, + "vfc_cam", + true, s_storm_defs[storm_id].letter); + if (dump) { + /* Prepare CAM address */ + SET_VAR_FIELD(cam_addr, VFC_CAM_ADDR, OP, VFC_OPCODE_CAM_RD); + for (row = 0; row < VFC_CAM_NUM_ROWS; + row++, offset += VFC_CAM_RESP_DWORDS) { + /* Write VFC CAM command */ + SET_VAR_FIELD(cam_cmd, VFC_CAM_CMD, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + cam_cmd, VFC_CAM_CMD_DWORDS); + + /* Write VFC CAM address */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + cam_addr, VFC_CAM_ADDR_DWORDS); + + /* Read VFC CAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_CAM_RESP_DWORDS); + } + } else { + offset += total_size; + } + + return offset; +} + +/* Dump VFC RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, + u8 storm_id, struct vfc_ram_defs *ram_defs) +{ + u32 total_size = ram_defs->num_rows * VFC_RAM_RESP_DWORDS; + u32 ram_addr[VFC_RAM_ADDR_DWORDS] = { 0 }; + u32 ram_cmd[VFC_RAM_CMD_DWORDS] = { 0 }; + u32 offset = 0; + u32 row, i; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + ram_defs->mem_name, + 0, + total_size, + 256, + false, + ram_defs->type_name, + true, s_storm_defs[storm_id].letter); + + /* Prepare RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, OP, VFC_OPCODE_RAM_RD); + + if (!dump) + return offset + total_size; + + for (row = ram_defs->base_row; + row < ram_defs->base_row + ram_defs->num_rows; + row++, offset += VFC_RAM_RESP_DWORDS) { + /* Write VFC RAM command */ + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_WR, + ram_cmd, VFC_RAM_CMD_DWORDS); + + /* Write VFC RAM address */ + SET_VAR_FIELD(ram_addr, VFC_RAM_ADDR, ROW, row); + ARR_REG_WR(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_ADDR, + ram_addr, VFC_RAM_ADDR_DWORDS); + + /* Read VFC RAM read response */ + ARR_REG_RD(p_hwfn, + p_ptt, + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_VFC_DATA_RD, + dump_buf + offset, VFC_RAM_RESP_DWORDS); + } + + return offset; +} + +/* Dumps GRC VFC data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_vfc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u8 storm_id, i; + u32 offset = 0; + + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + if (qed_grc_is_storm_included(p_hwfn, + (enum dbg_storms)storm_id) && + s_storm_defs[storm_id].has_vfc && + (storm_id != DBG_PSTORM_ID || + dev_data->platform_id == PLATFORM_ASIC)) { + /* Read CAM */ + offset += qed_grc_dump_vfc_cam(p_hwfn, + p_ptt, + dump_buf + offset, + dump, storm_id); + + /* Read RAM */ + for (i = 0; i < NUM_VFC_RAM_TYPES; i++) + offset += qed_grc_dump_vfc_ram(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + storm_id, + &s_vfc_ram_defs + [i]); + } + } + + return offset; +} + +/* Dumps GRC RSS data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_rss(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0; + u8 rss_mem_id; + + for (rss_mem_id = 0; rss_mem_id < NUM_RSS_MEM_TYPES; rss_mem_id++) { + struct rss_mem_defs *rss_defs = &s_rss_mem_defs[rss_mem_id]; + u32 num_entries = rss_defs->num_entries[dev_data->chip_id]; + u32 entry_width = rss_defs->entry_width[dev_data->chip_id]; + u32 total_size = (num_entries * entry_width) / 32; + bool packed = (entry_width == 16); + u32 addr = rss_defs->addr; + u32 i, j; + + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + rss_defs->mem_name, + addr, + total_size, + entry_width, + packed, + rss_defs->type_name, false, 0); + + if (!dump) { + offset += total_size; + continue; + } + + /* Dump RSS data */ + for (i = 0; i < BYTES_TO_DWORDS(total_size); i++, addr++) { + qed_wr(p_hwfn, p_ptt, RSS_REG_RSS_RAM_ADDR, addr); + for (j = 0; j < BYTES_IN_DWORD; j++, offset++) + *(dump_buf + offset) = + qed_rd(p_hwfn, p_ptt, + RSS_REG_RSS_RAM_DATA + + DWORDS_TO_BYTES(j)); + } + } + + return offset; +} + +/* Dumps GRC Big RAM. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_big_ram(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, u8 big_ram_id) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char mem_name[12] = "???_BIG_RAM"; + char type_name[8] = "???_RAM"; + u32 ram_size, total_blocks; + u32 offset = 0, i, j; + + total_blocks = + s_big_ram_defs[big_ram_id].num_of_blocks[dev_data->chip_id]; + ram_size = total_blocks * BIG_RAM_BLOCK_SIZE_DWORDS; + + strncpy(type_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + strncpy(mem_name, s_big_ram_defs[big_ram_id].instance_name, + strlen(s_big_ram_defs[big_ram_id].instance_name)); + + /* Dump memory header */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + ram_size, + BIG_RAM_BLOCK_SIZE_BYTES * 8, + false, type_name, false, 0); + + if (!dump) + return offset + ram_size; + + /* Read and dump Big RAM data */ + for (i = 0; i < total_blocks / 2; i++) { + qed_wr(p_hwfn, p_ptt, s_big_ram_defs[big_ram_id].addr_reg_addr, + i); + for (j = 0; j < 2 * BIG_RAM_BLOCK_SIZE_DWORDS; j++, offset++) + *(dump_buf + offset) = qed_rd(p_hwfn, p_ptt, + s_big_ram_defs[big_ram_id]. + data_reg_addr + + DWORDS_TO_BYTES(j)); + } + + return offset; +} + +static u32 qed_grc_dump_mcp(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + bool block_enable[MAX_BLOCK_ID] = { 0 }; + bool halted = false; + u32 offset = 0; + + /* Halt MCP */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Dump MCP scratchpad */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_SCRATCH, + MCP_REG_SCRATCH_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP cpu_reg_file */ + offset += qed_grc_dump_mem(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + NULL, + MCP_REG_CPU_REG_FILE, + MCP_REG_CPU_REG_FILE_SIZE, + 0, false, "MCP", false, 0); + + /* Dump MCP registers */ + block_enable[BLOCK_MCP] = true; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + offset, + dump, block_enable, "block", "MCP"); + + /* Dump required non-MCP registers */ + offset += qed_grc_dump_regs_hdr(dump_buf + offset, + dump, 1, "eng", -1, "block", "MCP"); + offset += qed_grc_dump_reg_entry(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + BYTES_TO_DWORDS + (MISC_REG_SHARED_MEM_ADDR), 1); + + /* Release MCP */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt)) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + return offset; +} + +/* Dumps the tbus indirect memory for all PHYs. */ +static u32 qed_grc_dump_phy(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, tbus_lo_offset, tbus_hi_offset; + char mem_name[32]; + u8 phy_id; + + for (phy_id = 0; phy_id < ARRAY_SIZE(s_phy_defs); phy_id++) { + struct phy_defs *phy_defs = &s_phy_defs[phy_id]; + int printed_chars; + + printed_chars = snprintf(mem_name, sizeof(mem_name), "tbus_%s", + phy_defs->phy_name); + if (printed_chars < 0 || printed_chars >= sizeof(mem_name)) + DP_NOTICE(p_hwfn, + "Unexpected debug error: invalid PHY memory name\n"); + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + mem_name, + 0, + PHY_DUMP_SIZE_DWORDS, + 16, true, mem_name, false, 0); + if (dump) { + u32 addr_lo_addr = phy_defs->base_addr + + phy_defs->tbus_addr_lo_addr; + u32 addr_hi_addr = phy_defs->base_addr + + phy_defs->tbus_addr_hi_addr; + u32 data_lo_addr = phy_defs->base_addr + + phy_defs->tbus_data_lo_addr; + u32 data_hi_addr = phy_defs->base_addr + + phy_defs->tbus_data_hi_addr; + u8 *bytes_buf = (u8 *)(dump_buf + offset); + + for (tbus_hi_offset = 0; + tbus_hi_offset < (NUM_PHY_TBUS_ADDRESSES >> 8); + tbus_hi_offset++) { + qed_wr(p_hwfn, + p_ptt, addr_hi_addr, tbus_hi_offset); + for (tbus_lo_offset = 0; tbus_lo_offset < 256; + tbus_lo_offset++) { + qed_wr(p_hwfn, + p_ptt, + addr_lo_addr, tbus_lo_offset); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_lo_addr); + *(bytes_buf++) = + (u8)qed_rd(p_hwfn, p_ptt, + data_hi_addr); + } + } + } + + offset += PHY_DUMP_SIZE_DWORDS; + } + + return offset; +} + +static void qed_config_dbg_line(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum block_id block_id, + u8 line_id, + u8 cycle_en, + u8 right_shift, u8 force_valid, u8 force_frame) +{ + struct block_defs *p_block_defs = s_block_defs[block_id]; + + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_select_addr, line_id); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_cycle_enable_addr, cycle_en); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_shift_addr, right_shift); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_valid_addr, force_valid); + qed_wr(p_hwfn, p_ptt, p_block_defs->dbg_force_frame_addr, force_frame); +} + +/* Dumps Static Debug data. Returns the dumped size in dwords. */ +static u32 qed_grc_dump_static_debug(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump) +{ + u32 block_dwords = NUM_DBG_BUS_LINES * STATIC_DEBUG_LINE_DWORDS; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 offset = 0, block_id, line_id, addr, i; + struct block_defs *p_block_defs; + + if (dump) { + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, "Dumping static debug data...\n"); + + /* Disable all blocks debug output */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (p_block_defs->has_dbg_bus[dev_data->chip_id]) + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } + + qed_bus_reset_dbg_block(p_hwfn, p_ptt); + qed_bus_set_framing_mode(p_hwfn, + p_ptt, DBG_BUS_FRAME_MODE_8HW_0ST); + qed_wr(p_hwfn, + p_ptt, DBG_REG_DEBUG_TARGET, DBG_BUS_TARGET_ID_INT_BUF); + qed_wr(p_hwfn, p_ptt, DBG_REG_FULL_MODE, 1); + qed_bus_enable_dbg_block(p_hwfn, p_ptt, true); + } + + /* Dump all static debug lines for each relevant block */ + for (block_id = 0; block_id < MAX_BLOCK_ID; block_id++) { + p_block_defs = s_block_defs[block_id]; + + if (!p_block_defs->has_dbg_bus[dev_data->chip_id]) + continue; + + /* Dump static section params */ + offset += qed_grc_dump_mem_hdr(p_hwfn, + dump_buf + offset, + dump, + p_block_defs->name, 0, + block_dwords, 32, false, + "STATIC", false, 0); + + if (dump && !dev_data->block_in_reset[block_id]) { + u8 dbg_client_id = + p_block_defs->dbg_client_id[dev_data->chip_id]; + + /* Enable block's client */ + qed_bus_enable_clients(p_hwfn, p_ptt, + BIT(dbg_client_id)); + + for (line_id = 0; line_id < NUM_DBG_BUS_LINES; + line_id++) { + /* Configure debug line ID */ + qed_config_dbg_line(p_hwfn, + p_ptt, + (enum block_id)block_id, + (u8)line_id, + 0xf, 0, 0, 0); + + /* Read debug line info */ + for (i = 0, addr = DBG_REG_CALENDAR_OUT_DATA; + i < STATIC_DEBUG_LINE_DWORDS; + i++, offset++, addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, + addr); + } + + /* Disable block's client and debug output */ + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + qed_wr(p_hwfn, p_ptt, + p_block_defs->dbg_cycle_enable_addr, 0); + } else { + /* All lines are invalid - dump zeros */ + if (dump) + memset(dump_buf + offset, 0, + DWORDS_TO_BYTES(block_dwords)); + offset += block_dwords; + } + } + + if (dump) { + qed_bus_enable_dbg_block(p_hwfn, p_ptt, false); + qed_bus_enable_clients(p_hwfn, p_ptt, 0); + } + + return offset; +} + +/* Performs GRC Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static enum dbg_status qed_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + bool parities_masked = false; + u8 i, port_mode = 0; + u32 offset = 0; + + /* Check if emulation platform */ + *num_dumped_dwords = 0; + + /* Fill GRC parameters that were not set by the user with their default + * value. + */ + qed_dbg_grc_set_params_default(p_hwfn); + + /* Find port mode */ + if (dump) { + switch (qed_rd(p_hwfn, p_ptt, MISC_REG_PORT_MODE)) { + case 0: + port_mode = 1; + break; + case 1: + port_mode = 2; + break; + case 2: + port_mode = 4; + break; + } + } + + /* Update reset state */ + if (dump) + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 4); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "grc-dump"); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-lcids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LCIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, + "num-ltids", + qed_grc_get_param(p_hwfn, + DBG_GRC_PARAM_NUM_LTIDS)); + offset += qed_dump_num_param(dump_buf + offset, + dump, "num-ports", port_mode); + + /* Dump reset registers (dumped before taking blocks out of reset ) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_reset_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Take all blocks out of reset (using reset registers) */ + if (dump) { + qed_grc_unreset_blocks(p_hwfn, p_ptt); + qed_update_blocks_reset_state(p_hwfn, p_ptt); + } + + /* Disable all parities using MFW command */ + if (dump) { + parities_masked = !qed_mcp_mask_parities(p_hwfn, p_ptt, 1); + if (!parities_masked) { + if (qed_grc_get_param + (p_hwfn, DBG_GRC_PARAM_PARITY_SAFE)) + return DBG_STATUS_MCP_COULD_NOT_MASK_PRTY; + else + DP_NOTICE(p_hwfn, + "Failed to mask parities using MFW\n"); + } + } + + /* Dump modified registers (dumped before modifying them) */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) + offset += qed_grc_dump_modified_regs(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Stall storms */ + if (dump && + (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_IOR) || + qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC))) + qed_grc_stall_storms(p_hwfn, p_ptt, true); + + /* Dump all regs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_REGS)) { + /* Dump all blocks except MCP */ + bool block_enable[MAX_BLOCK_ID]; + + for (i = 0; i < MAX_BLOCK_ID; i++) + block_enable[i] = true; + block_enable[BLOCK_MCP] = false; + offset += qed_grc_dump_registers(p_hwfn, + p_ptt, + dump_buf + + offset, + dump, + block_enable, NULL, NULL); + } + + /* Dump memories */ + offset += qed_grc_dump_memories(p_hwfn, p_ptt, dump_buf + offset, dump); + + /* Dump MCP */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_MCP)) + offset += qed_grc_dump_mcp(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump context */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_CM_CTX)) + offset += qed_grc_dump_ctx(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump RSS memories */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_RSS)) + offset += qed_grc_dump_rss(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump Big RAM */ + for (i = 0; i < NUM_BIG_RAM_TYPES; i++) + if (qed_grc_is_included(p_hwfn, s_big_ram_defs[i].grc_param)) + offset += qed_grc_dump_big_ram(p_hwfn, + p_ptt, + dump_buf + offset, + dump, i); + + /* Dump IORs */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_IOR)) + offset += qed_grc_dump_iors(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump VFC */ + if (qed_grc_is_included(p_hwfn, DBG_GRC_PARAM_DUMP_VFC)) + offset += qed_grc_dump_vfc(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump PHY tbus */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_PHY) && dev_data->chip_id == + CHIP_K2 && dev_data->platform_id == PLATFORM_ASIC) + offset += qed_grc_dump_phy(p_hwfn, + p_ptt, dump_buf + offset, dump); + + /* Dump static debug data */ + if (qed_grc_is_included(p_hwfn, + DBG_GRC_PARAM_DUMP_STATIC) && + dev_data->bus.state == DBG_BUS_STATE_IDLE) + offset += qed_grc_dump_static_debug(p_hwfn, + p_ptt, + dump_buf + offset, dump); + + /* Dump last section */ + offset += qed_dump_last_section(dump_buf, offset, dump); + if (dump) { + /* Unstall storms */ + if (qed_grc_get_param(p_hwfn, DBG_GRC_PARAM_UNSTALL)) + qed_grc_stall_storms(p_hwfn, p_ptt, false); + + /* Clear parity status */ + qed_grc_clear_all_prty(p_hwfn, p_ptt); + + /* Enable all parities using MFW command */ + if (parities_masked) + qed_mcp_mask_parities(p_hwfn, p_ptt, 0); + } + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Writes the specified failing Idle Check rule to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump_failure(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 * + dump_buf, + bool dump, + u16 rule_id, + const struct dbg_idle_chk_rule *rule, + u16 fail_entry_id, u32 *cond_reg_values) +{ + const union dbg_idle_chk_reg *regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays + [BIN_BUF_DBG_IDLE_CHK_REGS]. + ptr)[rule->reg_offset]; + const struct dbg_idle_chk_cond_reg *cond_regs = ®s[0].cond_reg; + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + struct dbg_idle_chk_result_hdr *hdr = + (struct dbg_idle_chk_result_hdr *)dump_buf; + const struct dbg_idle_chk_info_reg *info_regs = + ®s[rule->num_cond_regs].info_reg; + u32 next_reg_offset = 0, i, offset = 0; + u8 reg_id; + + /* Dump rule data */ + if (dump) { + memset(hdr, 0, sizeof(*hdr)); + hdr->rule_id = rule_id; + hdr->mem_entry_id = fail_entry_id; + hdr->severity = rule->severity; + hdr->num_dumped_cond_regs = rule->num_cond_regs; + } + + offset += IDLE_CHK_RESULT_HDR_DWORDS; + + /* Dump condition register values */ + for (reg_id = 0; reg_id < rule->num_cond_regs; reg_id++) { + const struct dbg_idle_chk_cond_reg *reg = &cond_regs[reg_id]; + + /* Write register header */ + if (dump) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *)(dump_buf + + offset); + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + memset(reg_hdr, 0, + sizeof(struct dbg_idle_chk_result_reg_hdr)); + reg_hdr->start_entry = reg->start_entry; + reg_hdr->size = reg->entry_size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM, + reg->num_entries > 1 || reg->start_entry > 0 + ? 1 : 0); + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, reg_id); + + /* Write register values */ + for (i = 0; i < reg_hdr->size; + i++, next_reg_offset++, offset++) + dump_buf[offset] = + cond_reg_values[next_reg_offset]; + } else { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + + reg->entry_size; + } + } + + /* Dump info register values */ + for (reg_id = 0; reg_id < rule->num_info_regs; reg_id++) { + const struct dbg_idle_chk_info_reg *reg = &info_regs[reg_id]; + u32 block_id; + + if (!dump) { + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS + reg->size; + continue; + } + + /* Check if register's block is in reset */ + block_id = GET_FIELD(reg->data, DBG_IDLE_CHK_INFO_REG_BLOCK_ID); + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + if (!dev_data->block_in_reset[block_id]) { + bool eval_mode = GET_FIELD(reg->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(reg->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + mode_match = + qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 grc_addr = + DWORDS_TO_BYTES(GET_FIELD(reg->data, + DBG_IDLE_CHK_INFO_REG_ADDRESS)); + + /* Write register header */ + struct dbg_idle_chk_result_reg_hdr *reg_hdr = + (struct dbg_idle_chk_result_reg_hdr *) + (dump_buf + offset); + + offset += IDLE_CHK_RESULT_REG_HDR_DWORDS; + hdr->num_dumped_info_regs++; + memset(reg_hdr, 0, sizeof(*reg_hdr)); + reg_hdr->size = reg->size; + SET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID, + rule->num_cond_regs + reg_id); + + /* Write register values */ + for (i = 0; i < reg->size; + i++, offset++, grc_addr += 4) + dump_buf[offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + } + + return offset; +} + +/* Dumps idle check rule entries. Returns the dumped size in dwords. */ +static u32 +qed_idle_chk_dump_rule_entries(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + u32 *dump_buf, bool dump, + const struct dbg_idle_chk_rule *input_rules, + u32 num_input_rules, u32 *num_failing_rules) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + u32 cond_reg_values[IDLE_CHK_MAX_ENTRIES_SIZE]; + u32 i, j, offset = 0; + u16 entry_id; + u8 reg_id; + + *num_failing_rules = 0; + for (i = 0; i < num_input_rules; i++) { + const struct dbg_idle_chk_cond_reg *cond_regs; + const struct dbg_idle_chk_rule *rule; + const union dbg_idle_chk_reg *regs; + u16 num_reg_entries = 1; + bool check_rule = true; + const u32 *imm_values; + + rule = &input_rules[i]; + regs = &((const union dbg_idle_chk_reg *) + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr) + [rule->reg_offset]; + cond_regs = ®s[0].cond_reg; + imm_values = &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr + [rule->imm_offset]; + + /* Check if all condition register blocks are out of reset, and + * find maximal number of entries (all condition registers that + * are memories must have the same size, which is > 1). + */ + for (reg_id = 0; reg_id < rule->num_cond_regs && check_rule; + reg_id++) { + u32 block_id = GET_FIELD(cond_regs[reg_id].data, + DBG_IDLE_CHK_COND_REG_BLOCK_ID); + + if (block_id >= MAX_BLOCK_ID) { + DP_NOTICE(p_hwfn, "Invalid block_id\n"); + return 0; + } + + check_rule = !dev_data->block_in_reset[block_id]; + if (cond_regs[reg_id].num_entries > num_reg_entries) + num_reg_entries = cond_regs[reg_id].num_entries; + } + + if (!check_rule && dump) + continue; + + /* Go over all register entries (number of entries is the same + * for all condition registers). + */ + for (entry_id = 0; entry_id < num_reg_entries; entry_id++) { + /* Read current entry of all condition registers */ + if (dump) { + u32 next_reg_offset = 0; + + for (reg_id = 0; + reg_id < rule->num_cond_regs; + reg_id++) { + const struct dbg_idle_chk_cond_reg + *reg = &cond_regs[reg_id]; + + /* Find GRC address (if it's a memory, + * the address of the specific entry is + * calculated). + */ + u32 grc_addr = + DWORDS_TO_BYTES( + GET_FIELD(reg->data, + DBG_IDLE_CHK_COND_REG_ADDRESS)); + + if (reg->num_entries > 1 || + reg->start_entry > 0) { + u32 padded_entry_size = + reg->entry_size > 1 ? + roundup_pow_of_two + (reg->entry_size) : 1; + + grc_addr += + DWORDS_TO_BYTES( + (reg->start_entry + + entry_id) + * padded_entry_size); + } + + /* Read registers */ + if (next_reg_offset + reg->entry_size >= + IDLE_CHK_MAX_ENTRIES_SIZE) { + DP_NOTICE(p_hwfn, + "idle check registers entry is too large\n"); + return 0; + } + + for (j = 0; j < reg->entry_size; + j++, next_reg_offset++, + grc_addr += 4) + cond_reg_values[next_reg_offset] = + qed_rd(p_hwfn, p_ptt, grc_addr); + } + } + + /* Call rule's condition function - a return value of + * true indicates failure. + */ + if ((*cond_arr[rule->cond_id])(cond_reg_values, + imm_values) || !dump) { + offset += + qed_idle_chk_dump_failure(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + rule->rule_id, + rule, + entry_id, + cond_reg_values); + (*num_failing_rules)++; + break; + } + } + } + + return offset; +} + +/* Performs Idle Check Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + u32 offset = 0, input_offset = 0, num_failing_rules = 0; + u32 num_failing_rules_offset; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "idle-chk"); + + /* Dump idle check section header with a single parameter */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "idle_chk", 1); + num_failing_rules_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "num_rules", 0); + while (input_offset < + s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].size_in_dwords) { + const struct dbg_idle_chk_cond_hdr *cond_hdr = + (const struct dbg_idle_chk_cond_hdr *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr + [input_offset++]; + bool eval_mode = GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_EVAL_MODE) > 0; + bool mode_match = true; + + /* Check mode */ + if (eval_mode) { + u16 modes_buf_offset = + GET_FIELD(cond_hdr->mode.data, + DBG_MODE_HDR_MODES_BUF_OFFSET); + + mode_match = qed_is_mode_match(p_hwfn, + &modes_buf_offset); + } + + if (mode_match) { + u32 curr_failing_rules; + + offset += + qed_idle_chk_dump_rule_entries(p_hwfn, + p_ptt, + dump_buf + offset, + dump, + (const struct dbg_idle_chk_rule *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES]. + ptr[input_offset], + cond_hdr->data_size / IDLE_CHK_RULE_SIZE_DWORDS, + &curr_failing_rules); + num_failing_rules += curr_failing_rules; + } + + input_offset += cond_hdr->data_size; + } + + /* Overwrite num_rules parameter */ + if (dump) + qed_dump_num_param(dump_buf + num_failing_rules_offset, + dump, "num_rules", num_failing_rules); + + return offset; +} + +/* Finds the meta data image in NVRAM. */ +static enum dbg_status qed_find_nvram_image(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 image_type, + u32 *nvram_offset_bytes, + u32 *nvram_size_bytes) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_txn_size; + struct mcp_file_att file_att; + + /* Call NVRAM get file command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_NVM_GET_FILE_ATT, + image_type, &ret_mcp_resp, &ret_mcp_param, + &ret_txn_size, (u32 *)&file_att) != 0) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_GET_IMAGE_FAILED; + + /* Update return values */ + *nvram_offset_bytes = file_att.nvm_start_addr; + *nvram_size_bytes = file_att.len; + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "find_nvram_image: found NVRAM image of type %d in NVRAM offset %d bytes with size %d bytes\n", + image_type, *nvram_offset_bytes, *nvram_size_bytes); + + /* Check alignment */ + if (*nvram_size_bytes & 0x3) + return DBG_STATUS_NON_ALIGNED_NVRAM_IMAGE; + return DBG_STATUS_OK; +} + +static enum dbg_status qed_nvram_read(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_bytes, + u32 nvram_size_bytes, u32 *ret_buf) +{ + u32 ret_mcp_resp, ret_mcp_param, ret_read_size; + u32 bytes_to_copy, read_offset = 0; + s32 bytes_left = nvram_size_bytes; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "nvram_read: reading image of size %d bytes from NVRAM\n", + nvram_size_bytes); + do { + bytes_to_copy = + (bytes_left > + MCP_DRV_NVM_BUF_LEN) ? MCP_DRV_NVM_BUF_LEN : bytes_left; + + /* Call NVRAM read command */ + if (qed_mcp_nvm_rd_cmd(p_hwfn, p_ptt, + DRV_MSG_CODE_NVM_READ_NVRAM, + (nvram_offset_bytes + + read_offset) | + (bytes_to_copy << + DRV_MB_PARAM_NVM_LEN_SHIFT), + &ret_mcp_resp, &ret_mcp_param, + &ret_read_size, + (u32 *)((u8 *)ret_buf + + read_offset)) != 0) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Check response */ + if ((ret_mcp_resp & FW_MSG_CODE_MASK) != FW_MSG_CODE_NVM_OK) + return DBG_STATUS_NVRAM_READ_FAILED; + + /* Update read offset */ + read_offset += ret_read_size; + bytes_left -= ret_read_size; + } while (bytes_left > 0); + + return DBG_STATUS_OK; +} + +/* Get info on the MCP Trace data in the scratchpad: + * - trace_data_grc_addr - the GRC address of the trace data + * - trace_data_size_bytes - the size in bytes of the MCP Trace data (without + * the header) + */ +static enum dbg_status qed_mcp_trace_get_data_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *trace_data_grc_addr, + u32 *trace_data_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + u32 signature; + + /* Extract MCP trace section GRC address from offsize structure (within + * scratchpad). + */ + *trace_data_grc_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize); + + /* Read signature from MCP trace section */ + signature = qed_rd(p_hwfn, p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, signature)); + if (signature != MFW_TRACE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read trace size from MCP trace section */ + *trace_data_size_bytes = qed_rd(p_hwfn, + p_ptt, + *trace_data_grc_addr + + offsetof(struct mcp_trace, size)); + return DBG_STATUS_OK; +} + +/* Reads MCP trace meta data image from NVRAM. + * - running_bundle_id (OUT) - the running bundle ID (invalid when loaded from + * file) + * - trace_meta_offset_bytes (OUT) - the NVRAM offset in bytes in which the MCP + * Trace meta data starts (invalid when loaded from file) + * - trace_meta_size_bytes (OUT) - the size in bytes of the MCP Trace meta data + */ +static enum dbg_status qed_mcp_trace_get_meta_info(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 trace_data_size_bytes, + u32 *running_bundle_id, + u32 *trace_meta_offset_bytes, + u32 *trace_meta_size_bytes) +{ + /* Read MCP trace section offsize structure from MCP scratchpad */ + u32 spad_trace_offsize = qed_rd(p_hwfn, + p_ptt, + MCP_SPAD_TRACE_OFFSIZE_ADDR); + + /* Find running bundle ID */ + u32 running_mfw_addr = + MCP_REG_SCRATCH + SECTION_OFFSET(spad_trace_offsize) + + QED_SECTION_SIZE(spad_trace_offsize) + trace_data_size_bytes; + enum dbg_status status; + u32 nvram_image_type; + + *running_bundle_id = qed_rd(p_hwfn, p_ptt, running_mfw_addr); + if (*running_bundle_id > 1) + return DBG_STATUS_INVALID_NVRAM_BUNDLE; + + /* Find image in NVRAM */ + nvram_image_type = + (*running_bundle_id == + DIR_ID_1) ? NVM_TYPE_MFW_TRACE1 : NVM_TYPE_MFW_TRACE2; + status = qed_find_nvram_image(p_hwfn, + p_ptt, + nvram_image_type, + trace_meta_offset_bytes, + trace_meta_size_bytes); + + return status; +} + +/* Reads the MCP Trace data from the specified GRC address into the specified + * buffer. + */ +static void qed_mcp_trace_read_data(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 grc_addr, u32 size_in_dwords, u32 *buf) +{ + u32 i; + + DP_VERBOSE(p_hwfn, + QED_MSG_DEBUG, + "mcp_trace_read_data: reading trace data of size %d dwords from GRC address 0x%x\n", + size_in_dwords, grc_addr); + for (i = 0; i < size_in_dwords; i++, grc_addr += BYTES_IN_DWORD) + buf[i] = qed_rd(p_hwfn, p_ptt, grc_addr); +} + +/* Reads the MCP Trace meta data (from NVRAM or buffer) into the specified + * buffer. + */ +static enum dbg_status qed_mcp_trace_read_meta(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 nvram_offset_in_bytes, + u32 size_in_bytes, u32 *buf) +{ + u8 *byte_buf = (u8 *)buf; + u8 modules_num, i; + u32 signature; + + /* Read meta data from NVRAM */ + enum dbg_status status = qed_nvram_read(p_hwfn, + p_ptt, + nvram_offset_in_bytes, + size_in_bytes, + buf); + + if (status != DBG_STATUS_OK) + return status; + + /* Extract and check first signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Extract number of modules */ + modules_num = *(byte_buf++); + + /* Skip all modules */ + for (i = 0; i < modules_num; i++) { + u8 module_len = *(byte_buf++); + + byte_buf += module_len; + } + + /* Extract and check second signature */ + signature = qed_read_unaligned_dword(byte_buf); + byte_buf += sizeof(u32); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + return DBG_STATUS_OK; +} + +/* Dump MCP Trace */ +enum dbg_status qed_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 trace_data_grc_addr, trace_data_size_bytes, trace_data_size_dwords; + u32 trace_meta_size_dwords, running_bundle_id, offset = 0; + u32 trace_meta_offset_bytes, trace_meta_size_bytes; + enum dbg_status status; + int halted = 0; + + *num_dumped_dwords = 0; + + /* Get trace data info */ + status = qed_mcp_trace_get_data_info(p_hwfn, + p_ptt, + &trace_data_grc_addr, + &trace_data_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "mcp-trace"); + + /* Halt MCP while reading from scratchpad so the read data will be + * consistent if halt fails, MCP trace is taken anyway, with a small + * risk that it may be corrupt. + */ + if (dump) { + halted = !qed_mcp_halt(p_hwfn, p_ptt); + if (!halted) + DP_NOTICE(p_hwfn, "MCP halt failed!\n"); + } + + /* Find trace data size */ + trace_data_size_dwords = + DIV_ROUND_UP(trace_data_size_bytes + sizeof(struct mcp_trace), + BYTES_IN_DWORD); + + /* Dump trace data section header and param */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_data", 1); + offset += qed_dump_num_param(dump_buf + offset, + dump, "size", trace_data_size_dwords); + + /* Read trace data from scratchpad into dump buffer */ + if (dump) + qed_mcp_trace_read_data(p_hwfn, + p_ptt, + trace_data_grc_addr, + trace_data_size_dwords, + dump_buf + offset); + offset += trace_data_size_dwords; + + /* Resume MCP (only if halt succeeded) */ + if (halted && qed_mcp_resume(p_hwfn, p_ptt) != 0) + DP_NOTICE(p_hwfn, "Failed to resume MCP after halt!\n"); + + /* Dump trace meta section header */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "mcp_trace_meta", 1); + + /* Read trace meta info */ + status = qed_mcp_trace_get_meta_info(p_hwfn, + p_ptt, + trace_data_size_bytes, + &running_bundle_id, + &trace_meta_offset_bytes, + &trace_meta_size_bytes); + if (status != DBG_STATUS_OK) + return status; + + /* Dump trace meta size param (trace_meta_size_bytes is always + * dword-aligned). + */ + trace_meta_size_dwords = BYTES_TO_DWORDS(trace_meta_size_bytes); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + trace_meta_size_dwords); + + /* Read trace meta image into dump buffer */ + if (dump) { + status = qed_mcp_trace_read_meta(p_hwfn, + p_ptt, + trace_meta_offset_bytes, + trace_meta_size_bytes, + dump_buf + offset); + if (status != DBG_STATUS_OK) + return status; + } + + offset += trace_meta_size_dwords; + + *num_dumped_dwords = offset; + + return DBG_STATUS_OK; +} + +/* Dump GRC FIFO */ +enum dbg_status qed_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "reg-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "reg_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is REG_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += REG_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < REG_FIFO_DEPTH_DWORDS; + dwords_read += REG_FIFO_ELEMENT_DWORDS, offset += + REG_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, GRC_REG_TRACE_FIFO, + (u64)(uintptr_t)(&dump_buf[offset]), + REG_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + GRC_REG_TRACE_FIFO_VALID_DATA) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Dump IGU FIFO */ +enum dbg_status qed_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, dwords_read, size_param_offset; + bool fifo_has_data; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "igu-fifo"); + + /* Dump fifo data section header and param. The size param is 0 for now, + * and is overwritten after reading the FIFO. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "igu_fifo_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + /* FIFO max size is IGU_FIFO_DEPTH_DWORDS. There is no way to + * test how much data is available, except for reading it. + */ + offset += IGU_FIFO_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + + /* Pull available data from fifo. Use DMAE since this is widebus memory + * and must be accessed atomically. Test for dwords_read not passing + * buffer size since more entries could be added to the buffer as we are + * emptying it. + */ + for (dwords_read = 0; + fifo_has_data && dwords_read < IGU_FIFO_DEPTH_DWORDS; + dwords_read += IGU_FIFO_ELEMENT_DWORDS, offset += + IGU_FIFO_ELEMENT_DWORDS) { + if (qed_dmae_grc2host(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_MEMORY, + (u64)(uintptr_t)(&dump_buf[offset]), + IGU_FIFO_ELEMENT_DWORDS, 0)) + return DBG_STATUS_DMAE_FAILED; + fifo_has_data = qed_rd(p_hwfn, p_ptt, + IGU_REG_ERROR_HANDLING_DATA_VALID) > 0; + } + + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + dwords_read); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Protection Override dump */ +enum dbg_status qed_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + bool dump, u32 *num_dumped_dwords) +{ + u32 offset = 0, size_param_offset, override_window_dwords; + + *num_dumped_dwords = 0; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "protection-override"); + + /* Dump data section header and param. The size param is 0 for now, and + * is overwritten after reading the data. + */ + offset += qed_dump_section_hdr(dump_buf + offset, + dump, "protection_override_data", 1); + size_param_offset = offset; + offset += qed_dump_num_param(dump_buf + offset, dump, "size", 0); + + if (!dump) { + offset += PROTECTION_OVERRIDE_DEPTH_DWORDS; + *num_dumped_dwords = offset; + return DBG_STATUS_OK; + } + + /* Add override window info to buffer */ + override_window_dwords = + qed_rd(p_hwfn, p_ptt, + GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW) * + PROTECTION_OVERRIDE_ELEMENT_DWORDS; + if (qed_dmae_grc2host(p_hwfn, p_ptt, + GRC_REG_PROTECTION_OVERRIDE_WINDOW, + (u64)(uintptr_t)(dump_buf + offset), + override_window_dwords, 0)) + return DBG_STATUS_DMAE_FAILED; + offset += override_window_dwords; + qed_dump_num_param(dump_buf + size_param_offset, dump, "size", + override_window_dwords); + + *num_dumped_dwords = offset; + return DBG_STATUS_OK; +} + +/* Performs FW Asserts Dump to the specified buffer. + * Returns the dumped size in dwords. + */ +static u32 qed_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, bool dump) +{ + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + char storm_letter_str[2] = "?"; + struct fw_info fw_info; + u32 offset = 0, i; + u8 storm_id; + + /* Dump global params */ + offset += qed_dump_common_global_params(p_hwfn, + p_ptt, + dump_buf + offset, dump, 1); + offset += qed_dump_str_param(dump_buf + offset, + dump, "dump-type", "fw-asserts"); + for (storm_id = 0; storm_id < MAX_DBG_STORMS; storm_id++) { + u32 fw_asserts_section_addr, next_list_idx_addr, next_list_idx, + last_list_idx, element_addr; + + if (dev_data->block_in_reset[s_storm_defs[storm_id].block_id]) + continue; + + /* Read FW info for the current Storm */ + qed_read_fw_info(p_hwfn, p_ptt, storm_id, &fw_info); + + /* Dump FW Asserts section header and params */ + storm_letter_str[0] = s_storm_defs[storm_id].letter; + offset += qed_dump_section_hdr(dump_buf + offset, dump, + "fw_asserts", 2); + offset += qed_dump_str_param(dump_buf + offset, dump, "storm", + storm_letter_str); + offset += qed_dump_num_param(dump_buf + offset, dump, "size", + fw_info.fw_asserts_section. + list_element_dword_size); + + if (!dump) { + offset += fw_info.fw_asserts_section. + list_element_dword_size; + continue; + } + + /* Read and dump FW Asserts data */ + fw_asserts_section_addr = + s_storm_defs[storm_id].sem_fast_mem_addr + + SEM_FAST_REG_INT_RAM + + RAM_LINES_TO_BYTES(fw_info.fw_asserts_section. + section_ram_line_offset); + next_list_idx_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_next_index_dword_offset); + next_list_idx = qed_rd(p_hwfn, p_ptt, next_list_idx_addr); + last_list_idx = (next_list_idx > 0 + ? next_list_idx + : fw_info.fw_asserts_section.list_num_elements) + - 1; + element_addr = + fw_asserts_section_addr + + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_dword_offset) + + last_list_idx * + DWORDS_TO_BYTES(fw_info.fw_asserts_section. + list_element_dword_size); + for (i = 0; + i < fw_info.fw_asserts_section.list_element_dword_size; + i++, offset++, element_addr += BYTES_IN_DWORD) + dump_buf[offset] = qed_rd(p_hwfn, p_ptt, element_addr); + } + + /* Dump last section */ + offset += qed_dump_section_hdr(dump_buf + offset, dump, "last", 0); + return offset; +} + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_REG].ptr || + !s_dbg_arrays[BIN_BUF_DBG_DUMP_MEM].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_BLOCKS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_ATTN_REGS].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + return qed_grc_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_grc_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* GRC Dump */ + status = qed_grc_dump(p_hwfn, p_ptt, dump_buf, true, num_dumped_dwords); + + /* Clear all GRC params */ + qed_dbg_grc_clear_params(p_hwfn); + return status; +} + +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + struct dbg_tools_data *dev_data = &p_hwfn->dbg_info; + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + if (!s_dbg_arrays[BIN_BUF_DBG_MODE_TREE].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_REGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_IMMS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_RULES].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + if (!dev_data->idle_chk.buf_size_set) { + dev_data->idle_chk.buf_size = qed_idle_chk_dump(p_hwfn, + p_ptt, + NULL, false); + dev_data->idle_chk.buf_size_set = true; + } + + *buf_size = dev_data->idle_chk.buf_size; + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_idle_chk_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Idle Check Dump */ + *num_dumped_dwords = qed_idle_chk_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_mcp_trace_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_mcp_trace_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + + /* Perform dump */ + return qed_mcp_trace_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_reg_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_reg_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_reg_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_igu_fifo_dump(p_hwfn, p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_igu_fifo_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_igu_fifo_dump(p_hwfn, + p_ptt, dump_buf, true, num_dumped_dwords); +} + +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + return qed_protection_override_dump(p_hwfn, + p_ptt, NULL, false, buf_size); +} + +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_protection_override_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + return qed_protection_override_dump(p_hwfn, + p_ptt, + dump_buf, true, num_dumped_dwords); +} + +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size) +{ + enum dbg_status status = qed_dbg_dev_init(p_hwfn, p_ptt); + + *buf_size = 0; + if (status != DBG_STATUS_OK) + return status; + + /* Update reset state */ + qed_update_blocks_reset_state(p_hwfn, p_ptt); + *buf_size = qed_fw_asserts_dump(p_hwfn, p_ptt, NULL, false); + return DBG_STATUS_OK; +} + +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords) +{ + u32 needed_buf_size_in_dwords; + enum dbg_status status; + + status = qed_dbg_fw_asserts_get_dump_buf_size(p_hwfn, p_ptt, + &needed_buf_size_in_dwords); + + *num_dumped_dwords = 0; + if (status != DBG_STATUS_OK) + return status; + if (buf_size_in_dwords < needed_buf_size_in_dwords) + return DBG_STATUS_DUMP_BUF_TOO_SMALL; + + *num_dumped_dwords = qed_fw_asserts_dump(p_hwfn, p_ptt, dump_buf, true); + return DBG_STATUS_OK; +} + +/******************************* Data Types **********************************/ + +struct mcp_trace_format { + u32 data; +#define MCP_TRACE_FORMAT_MODULE_MASK 0x0000ffff +#define MCP_TRACE_FORMAT_MODULE_SHIFT 0 +#define MCP_TRACE_FORMAT_LEVEL_MASK 0x00030000 +#define MCP_TRACE_FORMAT_LEVEL_SHIFT 16 +#define MCP_TRACE_FORMAT_P1_SIZE_MASK 0x000c0000 +#define MCP_TRACE_FORMAT_P1_SIZE_SHIFT 18 +#define MCP_TRACE_FORMAT_P2_SIZE_MASK 0x00300000 +#define MCP_TRACE_FORMAT_P2_SIZE_SHIFT 20 +#define MCP_TRACE_FORMAT_P3_SIZE_MASK 0x00c00000 +#define MCP_TRACE_FORMAT_P3_SIZE_SHIFT 22 +#define MCP_TRACE_FORMAT_LEN_MASK 0xff000000 +#define MCP_TRACE_FORMAT_LEN_SHIFT 24 + char *format_str; +}; + +struct mcp_trace_meta { + u32 modules_num; + char **modules; + u32 formats_num; + struct mcp_trace_format *formats; +}; + +/* Reg fifo element */ +struct reg_fifo_element { + u64 data; +#define REG_FIFO_ELEMENT_ADDRESS_SHIFT 0 +#define REG_FIFO_ELEMENT_ADDRESS_MASK 0x7fffff +#define REG_FIFO_ELEMENT_ACCESS_SHIFT 23 +#define REG_FIFO_ELEMENT_ACCESS_MASK 0x1 +#define REG_FIFO_ELEMENT_PF_SHIFT 24 +#define REG_FIFO_ELEMENT_PF_MASK 0xf +#define REG_FIFO_ELEMENT_VF_SHIFT 28 +#define REG_FIFO_ELEMENT_VF_MASK 0xff +#define REG_FIFO_ELEMENT_PORT_SHIFT 36 +#define REG_FIFO_ELEMENT_PORT_MASK 0x3 +#define REG_FIFO_ELEMENT_PRIVILEGE_SHIFT 38 +#define REG_FIFO_ELEMENT_PRIVILEGE_MASK 0x3 +#define REG_FIFO_ELEMENT_PROTECTION_SHIFT 40 +#define REG_FIFO_ELEMENT_PROTECTION_MASK 0x7 +#define REG_FIFO_ELEMENT_MASTER_SHIFT 43 +#define REG_FIFO_ELEMENT_MASTER_MASK 0xf +#define REG_FIFO_ELEMENT_ERROR_SHIFT 47 +#define REG_FIFO_ELEMENT_ERROR_MASK 0x1f +}; + +/* IGU fifo element */ +struct igu_fifo_element { + u32 dword0; +#define IGU_FIFO_ELEMENT_DWORD0_FID_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD0_FID_MASK 0xff +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_SHIFT 8 +#define IGU_FIFO_ELEMENT_DWORD0_IS_PF_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_SHIFT 9 +#define IGU_FIFO_ELEMENT_DWORD0_SOURCE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_SHIFT 13 +#define IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE_MASK 0xf +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_SHIFT 17 +#define IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR_MASK 0x7fff + u32 dword1; + u32 dword2; +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_SHIFT 0 +#define IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD_MASK 0x1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_SHIFT 1 +#define IGU_FIFO_ELEMENT_DWORD12_WR_DATA_MASK 0xffffffff + u32 reserved; +}; + +struct igu_fifo_wr_data { + u32 data; +#define IGU_FIFO_WR_DATA_PROD_CONS_SHIFT 0 +#define IGU_FIFO_WR_DATA_PROD_CONS_MASK 0xffffff +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_SHIFT 24 +#define IGU_FIFO_WR_DATA_UPDATE_FLAG_MASK 0x1 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_SHIFT 25 +#define IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB_MASK 0x3 +#define IGU_FIFO_WR_DATA_SEGMENT_SHIFT 27 +#define IGU_FIFO_WR_DATA_SEGMENT_MASK 0x1 +#define IGU_FIFO_WR_DATA_TIMER_MASK_SHIFT 28 +#define IGU_FIFO_WR_DATA_TIMER_MASK_MASK 0x1 +#define IGU_FIFO_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +struct igu_fifo_cleanup_wr_data { + u32 data; +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_SHIFT 0 +#define IGU_FIFO_CLEANUP_WR_DATA_RESERVED_MASK 0x7ffffff +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_SHIFT 27 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL_MASK 0x1 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_SHIFT 28 +#define IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE_MASK 0x7 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_SHIFT 31 +#define IGU_FIFO_CLEANUP_WR_DATA_CMD_TYPE_MASK 0x1 +}; + +/* Protection override element */ +struct protection_override_element { + u64 data; +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_SHIFT 0 +#define PROTECTION_OVERRIDE_ELEMENT_ADDRESS_MASK 0x7fffff +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_SHIFT 23 +#define PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE_MASK 0xffffff +#define PROTECTION_OVERRIDE_ELEMENT_READ_SHIFT 47 +#define PROTECTION_OVERRIDE_ELEMENT_READ_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_SHIFT 48 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_MASK 0x1 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_SHIFT 49 +#define PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION_MASK 0x7 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_SHIFT 52 +#define PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION_MASK 0x7 +}; + +enum igu_fifo_sources { + IGU_SRC_PXP0, + IGU_SRC_PXP1, + IGU_SRC_PXP2, + IGU_SRC_PXP3, + IGU_SRC_PXP4, + IGU_SRC_PXP5, + IGU_SRC_PXP6, + IGU_SRC_PXP7, + IGU_SRC_CAU, + IGU_SRC_ATTN, + IGU_SRC_GRC +}; + +enum igu_fifo_addr_types { + IGU_ADDR_TYPE_MSIX_MEM, + IGU_ADDR_TYPE_WRITE_PBA, + IGU_ADDR_TYPE_WRITE_INT_ACK, + IGU_ADDR_TYPE_WRITE_ATTN_BITS, + IGU_ADDR_TYPE_READ_INT, + IGU_ADDR_TYPE_WRITE_PROD_UPDATE, + IGU_ADDR_TYPE_RESERVED +}; + +struct igu_fifo_addr_data { + u16 start_addr; + u16 end_addr; + char *desc; + char *vf_desc; + enum igu_fifo_addr_types type; +}; + +/******************************** Constants **********************************/ + +#define MAX_MSG_LEN 1024 +#define MCP_TRACE_MAX_MODULE_LEN 8 +#define MCP_TRACE_FORMAT_MAX_PARAMS 3 +#define MCP_TRACE_FORMAT_PARAM_WIDTH \ + (MCP_TRACE_FORMAT_P2_SIZE_SHIFT - MCP_TRACE_FORMAT_P1_SIZE_SHIFT) +#define REG_FIFO_ELEMENT_ADDR_FACTOR 4 +#define REG_FIFO_ELEMENT_IS_PF_VF_VAL 127 +#define PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR 4 + +/********************************* Macros ************************************/ + +#define BYTES_TO_DWORDS(bytes) ((bytes) / BYTES_IN_DWORD) + +/***************************** Constant Arrays *******************************/ + +/* Status string array */ +static const char * const s_status_str[] = { + "Operation completed successfully", + "Debug application version wasn't set", + "Unsupported debug application version", + "The debug block wasn't reset since the last recording", + "Invalid arguments", + "The debug output was already set", + "Invalid PCI buffer size", + "PCI buffer allocation failed", + "A PCI buffer wasn't allocated", + "Too many inputs were enabled. Enabled less inputs, or set 'unifyInputs' to true", + "GRC/Timestamp input overlap in cycle dword 0", + "Cannot record Storm data since the entire recording cycle is used by HW", + "The Storm was already enabled", + "The specified Storm wasn't enabled", + "The block was already enabled", + "The specified block wasn't enabled", + "No input was enabled for recording", + "Filters and triggers are not allowed when recording in 64b units", + "The filter was already enabled", + "The trigger was already enabled", + "The trigger wasn't enabled", + "A constraint can be added only after a filter was enabled or a trigger state was added", + "Cannot add more than 3 trigger states", + "Cannot add more than 4 constraints per filter or trigger state", + "The recording wasn't started", + "A trigger was configured, but it didn't trigger", + "No data was recorded", + "Dump buffer is too small", + "Dumped data is not aligned to chunks", + "Unknown chip", + "Failed allocating virtual memory", + "The input block is in reset", + "Invalid MCP trace signature found in NVRAM", + "Invalid bundle ID found in NVRAM", + "Failed getting NVRAM image", + "NVRAM image is not dword-aligned", + "Failed reading from NVRAM", + "Idle check parsing failed", + "MCP Trace data is corrupt", + "Dump doesn't contain meta data - it must be provided in an image file", + "Failed to halt MCP", + "Failed to resume MCP after halt", + "DMAE transaction failed", + "Failed to empty SEMI sync FIFO", + "IGU FIFO data is corrupt", + "MCP failed to mask parities", + "FW Asserts parsing failed", + "GRC FIFO data is corrupt", + "Protection Override data is corrupt", + "Debug arrays were not set (when using binary files, dbg_set_bin_ptr must be called)", + "When a block is filtered, no other blocks can be recorded unless inputs are unified (due to a HW bug)" +}; + +/* Idle check severity names array */ +static const char * const s_idle_chk_severity_str[] = { + "Error", + "Error if no traffic", + "Warning" +}; + +/* MCP Trace level names array */ +static const char * const s_mcp_trace_level_str[] = { + "ERROR", + "TRACE", + "DEBUG" +}; + +/* Parsing strings */ +static const char * const s_access_strs[] = { + "read", + "write" +}; + +static const char * const s_privilege_strs[] = { + "VF", + "PDA", + "HV", + "UA" +}; + +static const char * const s_protection_strs[] = { + "(default)", + "(default)", + "(default)", + "(default)", + "override VF", + "override PDA", + "override HV", + "override UA" +}; + +static const char * const s_master_strs[] = { + "???", + "pxp", + "mcp", + "msdm", + "psdm", + "ysdm", + "usdm", + "tsdm", + "xsdm", + "dbu", + "dmae", + "???", + "???", + "???", + "???", + "???" +}; + +static const char * const s_reg_fifo_error_strs[] = { + "grc timeout", + "address doesn't belong to any block", + "reserved address in block or write to read-only address", + "privilege/protection mismatch", + "path isolation error" +}; + +static const char * const s_igu_fifo_source_strs[] = { + "TSTORM", + "MSTORM", + "USTORM", + "XSTORM", + "YSTORM", + "PSTORM", + "PCIE", + "NIG_QM_PBF", + "CAU", + "ATTN", + "GRC", +}; + +static const char * const s_igu_fifo_error_strs[] = { + "no error", + "length error", + "function disabled", + "VF sent command to attnetion address", + "host sent prod update command", + "read of during interrupt register while in MIMD mode", + "access to PXP BAR reserved address", + "producer update command to attention index", + "unknown error", + "SB index not valid", + "SB relative index and FID not found", + "FID not match", + "command with error flag asserted (PCI error or CAU discard)", + "VF sent cleanup and RF cleanup is disabled", + "cleanup command on type bigger than 4" +}; + +/* IGU FIFO address data */ +static const struct igu_fifo_addr_data s_igu_fifo_addr_data[] = { + {0x0, 0x101, "MSI-X Memory", NULL, IGU_ADDR_TYPE_MSIX_MEM}, + {0x102, 0x1ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x200, 0x200, "Write PBA[0:63]", NULL, IGU_ADDR_TYPE_WRITE_PBA}, + {0x201, 0x201, "Write PBA[64:127]", "reserved", + IGU_ADDR_TYPE_WRITE_PBA}, + {0x202, 0x202, "Write PBA[128]", "reserved", IGU_ADDR_TYPE_WRITE_PBA}, + {0x203, 0x3ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x400, 0x5ef, "Write interrupt acknowledgment", NULL, + IGU_ADDR_TYPE_WRITE_INT_ACK}, + {0x5f0, 0x5f0, "Attention bits update", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f1, 0x5f1, "Attention bits set", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f2, 0x5f2, "Attention bits clear", NULL, + IGU_ADDR_TYPE_WRITE_ATTN_BITS}, + {0x5f3, 0x5f3, "Read interrupt 0:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f4, 0x5f4, "Read interrupt 0:31 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f5, 0x5f5, "Read interrupt 32:63 with mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f6, 0x5f6, "Read interrupt 0:63 without mask", NULL, + IGU_ADDR_TYPE_READ_INT}, + {0x5f7, 0x5ff, "reserved", NULL, IGU_ADDR_TYPE_RESERVED}, + {0x600, 0x7ff, "Producer update", NULL, IGU_ADDR_TYPE_WRITE_PROD_UPDATE} +}; + +/******************************** Variables **********************************/ + +/* MCP Trace meta data - used in case the dump doesn't contain the meta data + * (e.g. due to no NVRAM access). + */ +static struct dbg_array s_mcp_trace_meta = { NULL, 0 }; + +/* Temporary buffer, used for print size calculations */ +static char s_temp_buf[MAX_MSG_LEN]; + +/***************************** Public Functions *******************************/ + +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr) +{ + /* Convert binary data to debug arrays */ + u32 num_of_buffers = *(u32 *)bin_ptr; + struct bin_buffer_hdr *buf_array; + u8 buf_id; + + buf_array = (struct bin_buffer_hdr *)((u32 *)bin_ptr + 1); + + for (buf_id = 0; buf_id < num_of_buffers; buf_id++) { + s_dbg_arrays[buf_id].ptr = + (u32 *)(bin_ptr + buf_array[buf_id].offset); + s_dbg_arrays[buf_id].size_in_dwords = + BYTES_TO_DWORDS(buf_array[buf_id].length); + } + + return DBG_STATUS_OK; +} + +static u32 qed_cyclic_add(u32 a, u32 b, u32 size) +{ + return (a + b) % size; +} + +static u32 qed_cyclic_sub(u32 a, u32 b, u32 size) +{ + return (size + a - b) % size; +} + +/* Reads the specified number of bytes from the specified cyclic buffer (up to 4 + * bytes) and returns them as a dword value. the specified buffer offset is + * updated. + */ +static u32 qed_read_from_cyclic_buf(void *buf, + u32 *offset, + u32 buf_size, u8 num_bytes_to_read) +{ + u8 *bytes_buf = (u8 *)buf; + u8 *val_ptr; + u32 val = 0; + u8 i; + + val_ptr = (u8 *)&val; + + for (i = 0; i < num_bytes_to_read; i++) { + val_ptr[i] = bytes_buf[*offset]; + *offset = qed_cyclic_add(*offset, 1, buf_size); + } + + return val; +} + +/* Reads and returns the next byte from the specified buffer. + * The specified buffer offset is updated. + */ +static u8 qed_read_byte_from_buf(void *buf, u32 *offset) +{ + return ((u8 *)buf)[(*offset)++]; +} + +/* Reads and returns the next dword from the specified buffer. + * The specified buffer offset is updated. + */ +static u32 qed_read_dword_from_buf(void *buf, u32 *offset) +{ + u32 dword_val = *(u32 *)&((u8 *)buf)[*offset]; + + *offset += 4; + return dword_val; +} + +/* Reads the next string from the specified buffer, and copies it to the + * specified pointer. The specified buffer offset is updated. + */ +static void qed_read_str_from_buf(void *buf, u32 *offset, u32 size, char *dest) +{ + const char *source_str = &((const char *)buf)[*offset]; + + strncpy(dest, source_str, size); + dest[size - 1] = '\0'; + *offset += size; +} + +/* Returns a pointer to the specified offset (in bytes) of the specified buffer. + * If the specified buffer in NULL, a temporary buffer pointer is returned. + */ +static char *qed_get_buf_ptr(void *buf, u32 offset) +{ + return buf ? (char *)buf + offset : s_temp_buf; +} + +/* Reads a param from the specified buffer. Returns the number of dwords read. + * If the returned str_param is NULL, the param is numeric and its value is + * returned in num_param. + * Otheriwise, the param is a string and its pointer is returned in str_param. + */ +static u32 qed_read_param(u32 *dump_buf, + const char **param_name, + const char **param_str_val, u32 *param_num_val) +{ + char *char_buf = (char *)dump_buf; + u32 offset = 0; /* In bytes */ + + /* Extract param name */ + *param_name = char_buf; + offset += strlen(*param_name) + 1; + + /* Check param type */ + if (*(char_buf + offset++)) { + /* String param */ + *param_str_val = char_buf + offset; + offset += strlen(*param_str_val) + 1; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + } else { + /* Numeric param */ + *param_str_val = NULL; + if (offset & 0x3) + offset += (4 - (offset & 0x3)); + *param_num_val = *(u32 *)(char_buf + offset); + offset += 4; + } + + return offset / 4; +} + +/* Reads a section header from the specified buffer. + * Returns the number of dwords read. + */ +static u32 qed_read_section_hdr(u32 *dump_buf, + const char **section_name, + u32 *num_section_params) +{ + const char *param_str_val; + + return qed_read_param(dump_buf, + section_name, ¶m_str_val, num_section_params); +} + +/* Reads section params from the specified buffer and prints them to the results + * buffer. Returns the number of dwords read. + */ +static u32 qed_print_section_params(u32 *dump_buf, + u32 num_section_params, + char *results_buf, u32 *num_chars_printed) +{ + u32 i, dump_offset = 0, results_offset = 0; + + for (i = 0; i < num_section_params; i++) { + const char *param_name; + const char *param_str_val; + u32 param_num_val = 0; + + dump_offset += qed_read_param(dump_buf + dump_offset, + ¶m_name, + ¶m_str_val, ¶m_num_val); + if (param_str_val) + /* String param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %s\n", param_name, param_str_val); + else if (strcmp(param_name, "fw-timestamp")) + /* Numeric param */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%s: %d\n", param_name, param_num_val); + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + *num_chars_printed = results_offset; + return dump_offset; +} + +const char *qed_dbg_get_status_str(enum dbg_status status) +{ + return (status < + MAX_DBG_STATUS) ? s_status_str[status] : "Invalid debug status"; +} + +/* Parses the idle check rules and returns the number of characters printed. + * In case of parsing error, returns 0. + */ +static u32 qed_parse_idle_chk_dump_rules(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 *dump_buf_end, + u32 num_rules, + bool print_fw_idle_chk, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 rule_idx, results_offset = 0; /* Offset in results_buf in bytes */ + u16 i, j; + + *num_errors = 0; + *num_warnings = 0; + + /* Go over dumped results */ + for (rule_idx = 0; rule_idx < num_rules && dump_buf < dump_buf_end; + rule_idx++) { + const struct dbg_idle_chk_rule_parsing_data *rule_parsing_data; + struct dbg_idle_chk_result_hdr *hdr; + const char *parsing_str; + u32 parsing_str_offset; + const char *lsi_msg; + u8 curr_reg_id = 0; + bool has_fw_msg; + + hdr = (struct dbg_idle_chk_result_hdr *)dump_buf; + rule_parsing_data = + (const struct dbg_idle_chk_rule_parsing_data *) + &s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA]. + ptr[hdr->rule_id]; + parsing_str_offset = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET); + has_fw_msg = + GET_FIELD(rule_parsing_data->data, + DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG) > 0; + parsing_str = &((const char *) + s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr) + [parsing_str_offset]; + lsi_msg = parsing_str; + + if (hdr->severity >= MAX_DBG_IDLE_CHK_SEVERITY_TYPES) + return 0; + + /* Skip rule header */ + dump_buf += (sizeof(struct dbg_idle_chk_result_hdr) / 4); + + /* Update errors/warnings count */ + if (hdr->severity == IDLE_CHK_SEVERITY_ERROR || + hdr->severity == IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC) + (*num_errors)++; + else + (*num_warnings)++; + + /* Print rule severity */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s: ", + s_idle_chk_severity_str[hdr->severity]); + + /* Print rule message */ + if (has_fw_msg) + parsing_str += strlen(parsing_str) + 1; + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s.", + has_fw_msg && + print_fw_idle_chk ? parsing_str : lsi_msg); + parsing_str += strlen(parsing_str) + 1; + + /* Print register values */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " Registers:"); + for (i = 0; + i < hdr->num_dumped_cond_regs + hdr->num_dumped_info_regs; + i++) { + struct dbg_idle_chk_result_reg_hdr *reg_hdr + = (struct dbg_idle_chk_result_reg_hdr *) + dump_buf; + bool is_mem = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM); + u8 reg_id = + GET_FIELD(reg_hdr->data, + DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID); + + /* Skip reg header */ + dump_buf += + (sizeof(struct dbg_idle_chk_result_reg_hdr) / 4); + + /* Skip register names until the required reg_id is + * reached. + */ + for (; reg_id > curr_reg_id; + curr_reg_id++, + parsing_str += strlen(parsing_str) + 1); + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), " %s", + parsing_str); + if (i < hdr->num_dumped_cond_regs && is_mem) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "[%d]", hdr->mem_entry_id + + reg_hdr->start_entry); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "="); + for (j = 0; j < reg_hdr->size; j++, dump_buf++) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "0x%x", *dump_buf); + if (j < reg_hdr->size - 1) + results_offset += + sprintf(qed_get_buf_ptr + (results_buf, + results_offset), ","); + } + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + /* Check if end of dump buffer was exceeded */ + if (dump_buf > dump_buf_end) + return 0; + return results_offset; +} + +/* Parses an idle check dump buffer. + * If result_buf is not NULL, the idle check results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_idle_chk_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes, + u32 *num_errors, + u32 *num_warnings) +{ + const char *section_name, *param_name, *param_str_val; + u32 *dump_buf_end = dump_buf + num_dumped_dwords; + u32 num_section_params = 0, num_rules; + u32 results_offset = 0; /* Offset in results_buf in bytes */ + + *parsed_results_bytes = 0; + *num_errors = 0; + *num_warnings = 0; + if (!s_dbg_arrays[BIN_BUF_DBG_PARSING_STRINGS].ptr || + !s_dbg_arrays[BIN_BUF_DBG_IDLE_CHK_PARSING_DATA].ptr) + return DBG_STATUS_DBG_ARRAY_NOT_SET; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read idle_chk section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "idle_chk") || num_section_params != 1) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, &num_rules); + if (strcmp(param_name, "num_rules") != 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + if (num_rules) { + u32 rules_print_size; + + /* Print FW output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "FW_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + true, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + + /* Print LSI output */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nLSI_IDLE_CHECK:\n"); + rules_print_size = + qed_parse_idle_chk_dump_rules(p_hwfn, dump_buf, + dump_buf_end, num_rules, + false, + results_buf ? + results_buf + + results_offset : NULL, + num_errors, num_warnings); + results_offset += rules_print_size; + if (rules_print_size == 0) + return DBG_STATUS_IDLE_CHK_PARSE_FAILED; + } + + /* Print errors/warnings count */ + if (*num_errors) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check failed!!! (with %d errors and %d warnings)\n", + *num_errors, *num_warnings); + } else if (*num_warnings) { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly (with %d warnings)\n", + *num_warnings); + } else { + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\nIdle Check completed successfuly\n"); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + u32 num_errors, num_warnings; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, + results_buf_size, + &num_errors, &num_warnings); +} + +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, u32 *num_warnings) +{ + u32 parsed_buf_size; + + return qed_parse_idle_chk_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size, + num_errors, num_warnings); +} + +/* Frees the specified MCP Trace meta data */ +static void qed_mcp_trace_free_meta(struct qed_hwfn *p_hwfn, + struct mcp_trace_meta *meta) +{ + u32 i; + + /* Release modules */ + if (meta->modules) { + for (i = 0; i < meta->modules_num; i++) + kfree(meta->modules[i]); + kfree(meta->modules); + } + + /* Release formats */ + if (meta->formats) { + for (i = 0; i < meta->formats_num; i++) + kfree(meta->formats[i].format_str); + kfree(meta->formats); + } +} + +/* Allocates and fills MCP Trace meta data based on the specified meta data + * dump buffer. + * Returns debug status code. + */ +static enum dbg_status qed_mcp_trace_alloc_meta(struct qed_hwfn *p_hwfn, + const u32 *meta_buf, + struct mcp_trace_meta *meta) +{ + u8 *meta_buf_bytes = (u8 *)meta_buf; + u32 offset = 0, signature, i; + + memset(meta, 0, sizeof(*meta)); + + /* Read first signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of modules and allocate memory for all the modules + * pointers. + */ + meta->modules_num = qed_read_byte_from_buf(meta_buf_bytes, &offset); + meta->modules = kzalloc(meta->modules_num * sizeof(char *), GFP_KERNEL); + if (!meta->modules) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all module strings */ + for (i = 0; i < meta->modules_num; i++) { + u8 module_len = qed_read_byte_from_buf(meta_buf_bytes, &offset); + + *(meta->modules + i) = kzalloc(module_len, GFP_KERNEL); + if (!(*(meta->modules + i))) { + /* Update number of modules to be released */ + meta->modules_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, &offset, module_len, + *(meta->modules + i)); + if (module_len > MCP_TRACE_MAX_MODULE_LEN) + (*(meta->modules + i))[MCP_TRACE_MAX_MODULE_LEN] = '\0'; + } + + /* Read second signature */ + signature = qed_read_dword_from_buf(meta_buf_bytes, &offset); + if (signature != MCP_TRACE_META_IMAGE_SIGNATURE) + return DBG_STATUS_INVALID_TRACE_SIGNATURE; + + /* Read number of formats and allocate memory for all formats */ + meta->formats_num = qed_read_dword_from_buf(meta_buf_bytes, &offset); + meta->formats = kzalloc(meta->formats_num * + sizeof(struct mcp_trace_format), + GFP_KERNEL); + if (!meta->formats) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Allocate and read all strings */ + for (i = 0; i < meta->formats_num; i++) { + struct mcp_trace_format *format_ptr = &meta->formats[i]; + u8 format_len; + + format_ptr->data = qed_read_dword_from_buf(meta_buf_bytes, + &offset); + format_len = + (format_ptr->data & + MCP_TRACE_FORMAT_LEN_MASK) >> MCP_TRACE_FORMAT_LEN_SHIFT; + format_ptr->format_str = kzalloc(format_len, GFP_KERNEL); + if (!format_ptr->format_str) { + /* Update number of modules to be released */ + meta->formats_num = i ? i - 1 : 0; + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + } + + qed_read_str_from_buf(meta_buf_bytes, + &offset, + format_len, format_ptr->format_str); + } + + return DBG_STATUS_OK; +} + +/* Parses an MCP Trace dump buffer. + * If result_buf is not NULL, the MCP Trace results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_mcp_trace_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_mask, param_shift, param_num_val; + u32 num_section_params, offset, end_offset, bytes_left; + const char *section_name, *param_name, *param_str_val; + u32 trace_data_dwords, trace_meta_dwords; + struct mcp_trace_meta meta; + struct mcp_trace *trace; + enum dbg_status status; + const u32 *meta_buf; + u8 *trace_buf; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read trace_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_data") || num_section_params != 1) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_data_dwords = param_num_val; + + /* Prepare trace info */ + trace = (struct mcp_trace *)dump_buf; + trace_buf = (u8 *)dump_buf + sizeof(struct mcp_trace); + offset = trace->trace_oldest; + end_offset = trace->trace_prod; + bytes_left = qed_cyclic_sub(end_offset, offset, trace->size); + dump_buf += trace_data_dwords; + + /* Read meta_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "mcp_trace_meta")) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size") != 0) + return DBG_STATUS_MCP_TRACE_BAD_DATA; + trace_meta_dwords = param_num_val; + + /* Choose meta data buffer */ + if (!trace_meta_dwords) { + /* Dump doesn't include meta data */ + if (!s_mcp_trace_meta.ptr) + return DBG_STATUS_MCP_TRACE_NO_META; + meta_buf = s_mcp_trace_meta.ptr; + } else { + /* Dump includes meta data */ + meta_buf = dump_buf; + } + + /* Allocate meta data memory */ + status = qed_mcp_trace_alloc_meta(p_hwfn, meta_buf, &meta); + if (status != DBG_STATUS_OK) + goto free_mem; + + /* Ignore the level and modules masks - just print everything that is + * already in the buffer. + */ + while (bytes_left) { + struct mcp_trace_format *format_ptr; + u8 format_level, format_module; + u32 params[3] = { 0, 0, 0 }; + u32 header, format_idx, i; + + if (bytes_left < MFW_TRACE_ENTRY_SIZE) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + header = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + MFW_TRACE_ENTRY_SIZE); + bytes_left -= MFW_TRACE_ENTRY_SIZE; + format_idx = header & MFW_TRACE_EVENTID_MASK; + + /* Skip message if its index doesn't exist in the meta data */ + if (format_idx > meta.formats_num) { + u8 format_size = + (u8)((header & + MFW_TRACE_PRM_SIZE_MASK) >> + MFW_TRACE_PRM_SIZE_SHIFT); + + if (bytes_left < format_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + offset = qed_cyclic_add(offset, + format_size, trace->size); + bytes_left -= format_size; + continue; + } + + format_ptr = &meta.formats[format_idx]; + for (i = 0, + param_mask = MCP_TRACE_FORMAT_P1_SIZE_MASK, param_shift = + MCP_TRACE_FORMAT_P1_SIZE_SHIFT; + i < MCP_TRACE_FORMAT_MAX_PARAMS; + i++, param_mask <<= MCP_TRACE_FORMAT_PARAM_WIDTH, + param_shift += MCP_TRACE_FORMAT_PARAM_WIDTH) { + /* Extract param size (0..3) */ + u8 param_size = + (u8)((format_ptr->data & + param_mask) >> param_shift); + + /* If the param size is zero, there are no other + * parameters. + */ + if (!param_size) + break; + + /* Size is encoded using 2 bits, where 3 is used to + * encode 4. + */ + if (param_size == 3) + param_size = 4; + if (bytes_left < param_size) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + params[i] = qed_read_from_cyclic_buf(trace_buf, + &offset, + trace->size, + param_size); + bytes_left -= param_size; + } + + format_level = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_LEVEL_MASK) >> + MCP_TRACE_FORMAT_LEVEL_SHIFT); + format_module = + (u8)((format_ptr->data & + MCP_TRACE_FORMAT_MODULE_MASK) >> + MCP_TRACE_FORMAT_MODULE_SHIFT); + if (format_level >= ARRAY_SIZE(s_mcp_trace_level_str)) { + status = DBG_STATUS_MCP_TRACE_BAD_DATA; + goto free_mem; + } + + /* Print current message to results buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s %-8s: ", + s_mcp_trace_level_str[format_level], + meta.modules[format_module]); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + format_ptr->format_str, params[0], params[1], + params[2]); + } + +free_mem: + *parsed_results_bytes = results_offset + 1; + qed_mcp_trace_free_meta(p_hwfn, &meta); + return status; +} + +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_mcp_trace_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses a Reg FIFO dump buffer. + * If result_buf is not NULL, the Reg FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_reg_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct reg_fifo_element *elements; + u8 i, j, err_val, vf_val; + char vf_str[4]; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read reg_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "reg_fifo_data")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_REG_FIFO_BAD_DATA; + if (param_num_val % REG_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_REG_FIFO_BAD_DATA; + num_elements = param_num_val / REG_FIFO_ELEMENT_DWORDS; + elements = (struct reg_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + bool err_printed = false; + + /* Discover if element belongs to a VF or a PF */ + vf_val = GET_FIELD(elements[i].data, REG_FIFO_ELEMENT_VF); + if (vf_val == REG_FIFO_ELEMENT_IS_PF_VF_VAL) + sprintf(vf_str, "%s", "N/A"); + else + sprintf(vf_str, "%d", vf_val); + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%016llx, address: 0x%07llx, access: %-5s, pf: %2lld, vf: %s, port: %lld, privilege: %-3s, protection: %-12s, master: %-4s, errors: ", + elements[i].data, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ADDRESS) * + REG_FIFO_ELEMENT_ADDR_FACTOR, + s_access_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ACCESS)], + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PF), vf_str, + GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PORT), + s_privilege_strs[GET_FIELD(elements[i]. + data, + REG_FIFO_ELEMENT_PRIVILEGE)], + s_protection_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_PROTECTION)], + s_master_strs[GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_MASTER)]); + + /* Print errors */ + for (j = 0, + err_val = GET_FIELD(elements[i].data, + REG_FIFO_ELEMENT_ERROR); + j < ARRAY_SIZE(s_reg_fifo_error_strs); + j++, err_val >>= 1) { + if (!(err_val & 0x1)) + continue; + if (err_printed) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + ", "); + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), "%s", + s_reg_fifo_error_strs[j]); + err_printed = true; + } + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, results_offset), "\n"); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_reg_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Parses an IGU FIFO dump buffer. + * If result_buf is not NULL, the IGU FIFO results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_igu_fifo_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct igu_fifo_element *elements; + char parsed_addr_data[32]; + char parsed_wr_data[256]; + u8 i, j; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read igu_fifo_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "igu_fifo_data")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (param_num_val % IGU_FIFO_ELEMENT_DWORDS) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + num_elements = param_num_val / IGU_FIFO_ELEMENT_DWORDS; + elements = (struct igu_fifo_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + /* dword12 (dword index 1 and 2) contains bits 32..95 of the + * FIFO element. + */ + u64 dword12 = + ((u64)elements[i].dword2 << 32) | elements[i].dword1; + bool is_wr_cmd = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_IS_WR_CMD); + bool is_pf = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_IS_PF); + u16 cmd_addr = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_CMD_ADDR); + u8 source = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_SOURCE); + u8 err_type = GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_ERR_TYPE); + const struct igu_fifo_addr_data *addr_data = NULL; + + if (source >= ARRAY_SIZE(s_igu_fifo_source_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + if (err_type >= ARRAY_SIZE(s_igu_fifo_error_strs)) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Find address data */ + for (j = 0; j < ARRAY_SIZE(s_igu_fifo_addr_data) && !addr_data; + j++) + if (cmd_addr >= s_igu_fifo_addr_data[j].start_addr && + cmd_addr <= s_igu_fifo_addr_data[j].end_addr) + addr_data = &s_igu_fifo_addr_data[j]; + if (!addr_data) + return DBG_STATUS_IGU_FIFO_BAD_DATA; + + /* Prepare parsed address data */ + switch (addr_data->type) { + case IGU_ADDR_TYPE_MSIX_MEM: + sprintf(parsed_addr_data, + " vector_num=0x%x", cmd_addr / 2); + break; + case IGU_ADDR_TYPE_WRITE_INT_ACK: + case IGU_ADDR_TYPE_WRITE_PROD_UPDATE: + sprintf(parsed_addr_data, + " SB=0x%x", cmd_addr - addr_data->start_addr); + break; + default: + parsed_addr_data[0] = '\0'; + } + + /* Prepare parsed write data */ + if (is_wr_cmd) { + u32 wr_data = GET_FIELD(dword12, + IGU_FIFO_ELEMENT_DWORD12_WR_DATA); + u32 prod_cons = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_PROD_CONS); + u8 is_cleanup = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_CMD_TYPE); + + if (source == IGU_SRC_ATTN) { + sprintf(parsed_wr_data, + "prod: 0x%x, ", prod_cons); + } else { + if (is_cleanup) { + u8 cleanup_val = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_VAL); + u8 cleanup_type = GET_FIELD(wr_data, + IGU_FIFO_CLEANUP_WR_DATA_CLEANUP_TYPE); + + sprintf(parsed_wr_data, + "cmd_type: cleanup, cleanup_val: %s, cleanup_type: %d, ", + cleanup_val ? "set" : "clear", + cleanup_type); + } else { + u8 update_flag = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_UPDATE_FLAG); + u8 en_dis_int_for_sb = + GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_EN_DIS_INT_FOR_SB); + u8 segment = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_SEGMENT); + u8 timer_mask = GET_FIELD(wr_data, + IGU_FIFO_WR_DATA_TIMER_MASK); + + sprintf(parsed_wr_data, + "cmd_type: prod/cons update, prod/cons: 0x%x, update_flag: %s, en_dis_int_for_sb: %s, segment: %s, timer_mask=%d, ", + prod_cons, + update_flag ? "update" : "nop", + en_dis_int_for_sb + ? (en_dis_int_for_sb == + 1 ? "disable" : "nop") : + "enable", + segment ? "attn" : "regular", + timer_mask); + } + } + } else { + parsed_wr_data[0] = '\0'; + } + + /* Add parsed element to parsed buffer */ + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "raw: 0x%01x%08x%08x, %s: %d, source: %s, type: %s, cmd_addr: 0x%x (%s%s), %serror: %s\n", + elements[i].dword2, elements[i].dword1, + elements[i].dword0, + is_pf ? "pf" : "vf", + GET_FIELD(elements[i].dword0, + IGU_FIFO_ELEMENT_DWORD0_FID), + s_igu_fifo_source_strs[source], + is_wr_cmd ? "wr" : "rd", cmd_addr, + (!is_pf && addr_data->vf_desc) + ? addr_data->vf_desc : addr_data->desc, + parsed_addr_data, parsed_wr_data, + s_igu_fifo_error_strs[err_type]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "fifo contained %d elements", num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_igu_fifo_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +static enum dbg_status +qed_parse_protection_override_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, param_num_val, num_section_params, num_elements; + const char *section_name, *param_name, *param_str_val; + struct protection_override_element *elements; + u8 i; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + + /* Read protection_override_data section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "protection_override_data")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + dump_buf += qed_read_param(dump_buf, + ¶m_name, ¶m_str_val, ¶m_num_val); + if (strcmp(param_name, "size")) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + if (param_num_val % PROTECTION_OVERRIDE_ELEMENT_DWORDS != 0) + return DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA; + num_elements = param_num_val / PROTECTION_OVERRIDE_ELEMENT_DWORDS; + elements = (struct protection_override_element *)dump_buf; + + /* Decode elements */ + for (i = 0; i < num_elements; i++) { + u32 address = GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_ADDRESS) * + PROTECTION_OVERRIDE_ELEMENT_ADDR_FACTOR; + + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "window %2d, address: 0x%07x, size: %7lld regs, read: %lld, write: %lld, read protection: %-12s, write protection: %-12s\n", + i, address, + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WINDOW_SIZE), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ), + GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE), + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_READ_PROTECTION)], + s_protection_strs[GET_FIELD(elements[i].data, + PROTECTION_OVERRIDE_ELEMENT_WRITE_PROTECTION)]); + } + + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "protection override contained %d elements", + num_elements); + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_protection_override_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, + &parsed_buf_size); +} + +/* Parses a FW Asserts dump buffer. + * If result_buf is not NULL, the FW Asserts results are printed to it. + * In any case, the required results buffer size is assigned to + * parsed_results_bytes. + * The parsing status is returned. + */ +static enum dbg_status qed_parse_fw_asserts_dump(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *parsed_results_bytes) +{ + u32 results_offset = 0, num_section_params, param_num_val, i; + const char *param_name, *param_str_val, *section_name; + bool last_section_found = false; + + *parsed_results_bytes = 0; + + /* Read global_params section */ + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, &num_section_params); + if (strcmp(section_name, "global_params")) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print global params */ + dump_buf += qed_print_section_params(dump_buf, + num_section_params, + results_buf, &results_offset); + while (!last_section_found) { + const char *storm_letter = NULL; + u32 storm_dump_size = 0; + + dump_buf += qed_read_section_hdr(dump_buf, + §ion_name, + &num_section_params); + if (!strcmp(section_name, "last")) { + last_section_found = true; + continue; + } else if (strcmp(section_name, "fw_asserts")) { + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + /* Extract params */ + for (i = 0; i < num_section_params; i++) { + dump_buf += qed_read_param(dump_buf, + ¶m_name, + ¶m_str_val, + ¶m_num_val); + if (!strcmp(param_name, "storm")) + storm_letter = param_str_val; + else if (!strcmp(param_name, "size")) + storm_dump_size = param_num_val; + else + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + } + + if (!storm_letter || !storm_dump_size) + return DBG_STATUS_FW_ASSERTS_PARSE_FAILED; + + /* Print data */ + results_offset += sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "\n%sSTORM_ASSERT: size=%d\n", + storm_letter, storm_dump_size); + for (i = 0; i < storm_dump_size; i++, dump_buf++) + results_offset += + sprintf(qed_get_buf_ptr(results_buf, + results_offset), + "%08x\n", *dump_buf); + } + + /* Add 1 for string NULL termination */ + *parsed_results_bytes = results_offset + 1; + return DBG_STATUS_OK; +} + +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size) +{ + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + NULL, results_buf_size); +} + +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 parsed_buf_size; + + return qed_parse_fw_asserts_dump(p_hwfn, + dump_buf, + num_dumped_dwords, + results_buf, &parsed_buf_size); +} + +/* Wrapper for unifying the idle_chk and mcp_trace api */ +enum dbg_status qed_print_idle_chk_results_wrapper(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf) +{ + u32 num_errors, num_warnnings; + + return qed_print_idle_chk_results(p_hwfn, dump_buf, num_dumped_dwords, + results_buf, &num_errors, + &num_warnnings); +} + +/* Feature meta data lookup table */ +static struct { + char *name; + enum dbg_status (*get_size)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *size); + enum dbg_status (*perform_dump)(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 *dump_buf, + u32 buf_size, u32 *dumped_dwords); + enum dbg_status (*print_results)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, u32 num_dumped_dwords, + char *results_buf); + enum dbg_status (*results_buf_size)(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +} qed_features_lookup[] = { + { + "grc", qed_dbg_grc_get_dump_buf_size, + qed_dbg_grc_dump, NULL, NULL}, { + "idle_chk", + qed_dbg_idle_chk_get_dump_buf_size, + qed_dbg_idle_chk_dump, + qed_print_idle_chk_results_wrapper, + qed_get_idle_chk_results_buf_size}, { + "mcp_trace", + qed_dbg_mcp_trace_get_dump_buf_size, + qed_dbg_mcp_trace_dump, qed_print_mcp_trace_results, + qed_get_mcp_trace_results_buf_size}, { + "reg_fifo", + qed_dbg_reg_fifo_get_dump_buf_size, + qed_dbg_reg_fifo_dump, qed_print_reg_fifo_results, + qed_get_reg_fifo_results_buf_size}, { + "igu_fifo", + qed_dbg_igu_fifo_get_dump_buf_size, + qed_dbg_igu_fifo_dump, qed_print_igu_fifo_results, + qed_get_igu_fifo_results_buf_size}, { + "protection_override", + qed_dbg_protection_override_get_dump_buf_size, + qed_dbg_protection_override_dump, + qed_print_protection_override_results, + qed_get_protection_override_results_buf_size}, { + "fw_asserts", + qed_dbg_fw_asserts_get_dump_buf_size, + qed_dbg_fw_asserts_dump, + qed_print_fw_asserts_results, + qed_get_fw_asserts_results_buf_size},}; + +static void qed_dbg_print_feature(u8 *p_text_buf, u32 text_size) +{ + u32 i, precision = 80; + + if (!p_text_buf) + return; + + pr_notice("\n%.*s", precision, p_text_buf); + for (i = precision; i < text_size; i += precision) + pr_cont("%.*s", precision, p_text_buf + i); + pr_cont("\n"); +} + +#define QED_RESULTS_BUF_MIN_SIZE 16 +/* Generic function for decoding debug feature info */ +enum dbg_status format_feature(struct qed_hwfn *p_hwfn, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 text_size_bytes, null_char_pos, i; + enum dbg_status rc; + char *text_buf; + + /* Check if feature supports formatting capability */ + if (!qed_features_lookup[feature_idx].results_buf_size) + return DBG_STATUS_OK; + + /* Obtain size of formatted output */ + rc = qed_features_lookup[feature_idx]. + results_buf_size(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, &text_size_bytes); + if (rc != DBG_STATUS_OK) + return rc; + + /* Make sure that the allocated size is a multiple of dword (4 bytes) */ + null_char_pos = text_size_bytes - 1; + text_size_bytes = (text_size_bytes + 3) & ~0x3; + + if (text_size_bytes < QED_RESULTS_BUF_MIN_SIZE) { + DP_NOTICE(p_hwfn->cdev, + "formatted size of feature was too small %d. Aborting\n", + text_size_bytes); + return DBG_STATUS_INVALID_ARGS; + } + + /* Allocate temp text buf */ + text_buf = vzalloc(text_size_bytes); + if (!text_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + /* Decode feature opcodes to string on temp buf */ + rc = qed_features_lookup[feature_idx]. + print_results(p_hwfn, (u32 *)feature->dump_buf, + feature->dumped_dwords, text_buf); + if (rc != DBG_STATUS_OK) { + vfree(text_buf); + return rc; + } + + /* Replace the original null character with a '\n' character. + * The bytes that were added as a result of the dword alignment are also + * padded with '\n' characters. + */ + for (i = null_char_pos; i < text_size_bytes; i++) + text_buf[i] = '\n'; + + /* Dump printable feature to log */ + if (p_hwfn->cdev->dbg_params.print_data) + qed_dbg_print_feature(text_buf, text_size_bytes); + + /* Free the old dump_buf and point the dump_buf to the newly allocagted + * and formatted text buffer. + */ + vfree(feature->dump_buf); + feature->dump_buf = text_buf; + feature->buf_size = text_size_bytes; + feature->dumped_dwords = text_size_bytes / 4; + return rc; +} + +/* Generic function for performing the dump of a debug feature. */ +enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, + enum qed_dbg_features feature_idx) +{ + struct qed_dbg_feature *feature = + &p_hwfn->cdev->dbg_params.features[feature_idx]; + u32 buf_size_dwords; + enum dbg_status rc; + + DP_NOTICE(p_hwfn->cdev, "Collecting a debug feature [\"%s\"]\n", + qed_features_lookup[feature_idx].name); + + /* Dump_buf was already allocated need to free (this can happen if dump + * was called but file was never read). + * We can't use the buffer as is since size may have changed. + */ + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + + /* Get buffer size from hsi, allocate accordingly, and perform the + * dump. + */ + rc = qed_features_lookup[feature_idx].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + return rc; + feature->buf_size = buf_size_dwords * sizeof(u32); + feature->dump_buf = vmalloc(feature->buf_size); + if (!feature->dump_buf) + return DBG_STATUS_VIRT_MEM_ALLOC_FAILED; + + rc = qed_features_lookup[feature_idx]. + perform_dump(p_hwfn, p_ptt, (u32 *)feature->dump_buf, + feature->buf_size / sizeof(u32), + &feature->dumped_dwords); + + /* If mcp is stuck we get DBG_STATUS_NVRAM_GET_IMAGE_FAILED error. + * In this case the buffer holds valid binary data, but we wont able + * to parse it (since parsing relies on data in NVRAM which is only + * accessible when MFW is responsive). skip the formatting but return + * success so that binary data is provided. + */ + if (rc == DBG_STATUS_NVRAM_GET_IMAGE_FAILED) + return DBG_STATUS_OK; + + if (rc != DBG_STATUS_OK) + return rc; + + /* Format output */ + rc = format_feature(p_hwfn, feature_idx); + return rc; +} + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_GRC, num_dumped_bytes); +} + +int qed_dbg_grc_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_GRC); +} + +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IDLE_CHK, + num_dumped_bytes); +} + +int qed_dbg_idle_chk_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IDLE_CHK); +} + +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_REG_FIFO, + num_dumped_bytes); +} + +int qed_dbg_reg_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_REG_FIFO); +} + +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_IGU_FIFO, + num_dumped_bytes); +} + +int qed_dbg_igu_fifo_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_IGU_FIFO); +} + +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_PROTECTION_OVERRIDE, + num_dumped_bytes); +} + +int qed_dbg_protection_override_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_PROTECTION_OVERRIDE); +} + +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_FW_ASSERTS, + num_dumped_bytes); +} + +int qed_dbg_fw_asserts_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_FW_ASSERTS); +} + +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes) +{ + return qed_dbg_feature(cdev, buffer, DBG_FEATURE_MCP_TRACE, + num_dumped_bytes); +} + +int qed_dbg_mcp_trace_size(struct qed_dev *cdev) +{ + return qed_dbg_feature_size(cdev, DBG_FEATURE_MCP_TRACE); +} + +/* Defines the amount of bytes allocated for recording the length of debugfs + * feature buffer. + */ +#define REGDUMP_HEADER_SIZE sizeof(u32) +#define REGDUMP_HEADER_FEATURE_SHIFT 24 +#define REGDUMP_HEADER_ENGINE_SHIFT 31 +#define REGDUMP_HEADER_OMIT_ENGINE_SHIFT 30 +enum debug_print_features { + OLD_MODE = 0, + IDLE_CHK = 1, + GRC_DUMP = 2, + MCP_TRACE = 3, + REG_FIFO = 4, + PROTECTION_OVERRIDE = 5, + IGU_FIFO = 6, + PHY = 7, + FW_ASSERTS = 8, +}; + +static u32 qed_calc_regdump_header(enum debug_print_features feature, + int engine, u32 feature_size, u8 omit_engine) +{ + /* Insert the engine, feature and mode inside the header and combine it + * with feature size. + */ + return feature_size | (feature << REGDUMP_HEADER_FEATURE_SHIFT) | + (omit_engine << REGDUMP_HEADER_OMIT_ENGINE_SHIFT) | + (engine << REGDUMP_HEADER_ENGINE_SHIFT); +} + +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer) +{ + u8 cur_engine, omit_engine = 0, org_engine; + u32 offset = 0, feature_size; + int rc; + + if (cdev->num_hwfns == 1) + omit_engine = 1; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Collect idle_chks and grcDump for each hw function */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "obtaining idle_chk and grcdump for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + + /* First idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* Second idle_chk */ + rc = qed_dbg_idle_chk(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IDLE_CHK, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_idle_chk failed. rc = %d\n", rc); + } + + /* reg_fifo dump */ + rc = qed_dbg_reg_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(REG_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_reg_fifo failed. rc = %d\n", rc); + } + + /* igu_fifo dump */ + rc = qed_dbg_igu_fifo(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(IGU_FIFO, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_igu_fifo failed. rc = %d", rc); + } + + /* protection_override dump */ + rc = qed_dbg_protection_override(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, + &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(PROTECTION_OVERRIDE, + cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, + "qed_dbg_protection_override failed. rc = %d\n", + rc); + } + + /* fw_asserts dump */ + rc = qed_dbg_fw_asserts(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(FW_ASSERTS, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_fw_asserts failed. rc = %d\n", + rc); + } + + /* GRC dump - must be last because when mcp stuck it will + * clutter idle_chk, reg_fifo, ... + */ + rc = qed_dbg_grc(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(GRC_DUMP, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_grc failed. rc = %d", rc); + } + } + + /* mcp_trace */ + rc = qed_dbg_mcp_trace(cdev, (u8 *)buffer + offset + + REGDUMP_HEADER_SIZE, &feature_size); + if (!rc) { + *(u32 *)((u8 *)buffer + offset) = + qed_calc_regdump_header(MCP_TRACE, cur_engine, + feature_size, omit_engine); + offset += (feature_size + REGDUMP_HEADER_SIZE); + } else { + DP_ERR(cdev, "qed_dbg_mcp_trace failed. rc = %d\n", rc); + } + + qed_set_debug_engine(cdev, org_engine); + + return 0; +} + +int qed_dbg_all_data_size(struct qed_dev *cdev) +{ + u8 cur_engine, org_engine; + u32 regs_len = 0; + + org_engine = qed_get_debug_engine(cdev); + for (cur_engine = 0; cur_engine < cdev->num_hwfns; cur_engine++) { + /* Engine specific */ + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "calculating idle_chk and grcdump register length for current engine\n"); + qed_set_debug_engine(cdev, cur_engine); + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_idle_chk_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_grc_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_reg_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_igu_fifo_size(cdev) + + REGDUMP_HEADER_SIZE + + qed_dbg_protection_override_size(cdev) + + REGDUMP_HEADER_SIZE + qed_dbg_fw_asserts_size(cdev); + } + + /* Engine common */ + regs_len += REGDUMP_HEADER_SIZE + qed_dbg_mcp_trace_size(cdev); + qed_set_debug_engine(cdev, org_engine); + + return regs_len; +} + +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + enum dbg_status dbg_rc; + struct qed_ptt *p_ptt; + int rc = 0; + + /* Acquire ptt */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + return -EINVAL; + + /* Get dump */ + dbg_rc = qed_dbg_dump(p_hwfn, p_ptt, feature); + if (dbg_rc != DBG_STATUS_OK) { + DP_VERBOSE(cdev, QED_MSG_DEBUG, "%s\n", + qed_dbg_get_status_str(dbg_rc)); + *num_dumped_bytes = 0; + rc = -EINVAL; + goto out; + } + + DP_VERBOSE(cdev, QED_MSG_DEBUG, + "copying debugfs feature to external buffer\n"); + memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size); + *num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords * + 4; + +out: + qed_ptt_release(p_hwfn, p_ptt); + return rc; +} + +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature) +{ + struct qed_hwfn *p_hwfn = + &cdev->hwfns[cdev->dbg_params.engine_for_debug]; + struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn); + struct qed_dbg_feature *qed_feature = + &cdev->dbg_params.features[feature]; + u32 buf_size_dwords; + enum dbg_status rc; + + if (!p_ptt) + return -EINVAL; + + rc = qed_features_lookup[feature].get_size(p_hwfn, p_ptt, + &buf_size_dwords); + if (rc != DBG_STATUS_OK) + buf_size_dwords = 0; + + qed_ptt_release(p_hwfn, p_ptt); + qed_feature->buf_size = buf_size_dwords * sizeof(u32); + return qed_feature->buf_size; +} + +u8 qed_get_debug_engine(struct qed_dev *cdev) +{ + return cdev->dbg_params.engine_for_debug; +} + +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number) +{ + DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n", + engine_number); + cdev->dbg_params.engine_for_debug = engine_number; +} + +void qed_dbg_pf_init(struct qed_dev *cdev) +{ + const u8 *dbg_values; + + /* Debug values are after init values. + * The offset is the first dword of the file. + */ + dbg_values = cdev->firmware->data + *(u32 *)cdev->firmware->data; + qed_dbg_set_bin_ptr((u8 *)dbg_values); + qed_dbg_user_set_bin_ptr((u8 *)dbg_values); +} + +void qed_dbg_pf_exit(struct qed_dev *cdev) +{ + struct qed_dbg_feature *feature = NULL; + enum qed_dbg_features feature_idx; + + /* Debug features' buffers may be allocated if debug feature was used + * but dump wasn't called. + */ + for (feature_idx = 0; feature_idx < DBG_FEATURE_NUM; feature_idx++) { + feature = &cdev->dbg_params.features[feature_idx]; + if (feature->dump_buf) { + vfree(feature->dump_buf); + feature->dump_buf = NULL; + } + } +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.h b/drivers/net/ethernet/qlogic/qed/qed_debug.h new file mode 100644 index 000000000000..f872d7324814 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_debug.h @@ -0,0 +1,54 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_DEBUGFS_H +#define _QED_DEBUGFS_H + +enum qed_dbg_features { + DBG_FEATURE_GRC, + DBG_FEATURE_IDLE_CHK, + DBG_FEATURE_MCP_TRACE, + DBG_FEATURE_REG_FIFO, + DBG_FEATURE_IGU_FIFO, + DBG_FEATURE_PROTECTION_OVERRIDE, + DBG_FEATURE_FW_ASSERTS, + DBG_FEATURE_NUM +}; + +int qed_dbg_grc(struct qed_dev *cdev, void *buffer, u32 *num_dumped_bytes); +int qed_dbg_grc_size(struct qed_dev *cdev); +int qed_dbg_idle_chk(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_idle_chk_size(struct qed_dev *cdev); +int qed_dbg_reg_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_reg_fifo_size(struct qed_dev *cdev); +int qed_dbg_igu_fifo(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_igu_fifo_size(struct qed_dev *cdev); +int qed_dbg_protection_override(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_protection_override_size(struct qed_dev *cdev); +int qed_dbg_fw_asserts(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_fw_asserts_size(struct qed_dev *cdev); +int qed_dbg_mcp_trace(struct qed_dev *cdev, void *buffer, + u32 *num_dumped_bytes); +int qed_dbg_mcp_trace_size(struct qed_dev *cdev); +int qed_dbg_all_data(struct qed_dev *cdev, void *buffer); +int qed_dbg_all_data_size(struct qed_dev *cdev); +u8 qed_get_debug_engine(struct qed_dev *cdev); +void qed_set_debug_engine(struct qed_dev *cdev, int engine_number); +int qed_dbg_feature(struct qed_dev *cdev, void *buffer, + enum qed_dbg_features feature, u32 *num_dumped_bytes); +int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature); + +void qed_dbg_pf_init(struct qed_dev *cdev); +void qed_dbg_pf_exit(struct qed_dev *cdev); + +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 0e4f4a9306b5..754f6a908858 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -29,14 +29,18 @@ #include "qed_hw.h" #include "qed_init_ops.h" #include "qed_int.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" #include "qed_vf.h" +#include "qed_roce.h" -static spinlock_t qm_lock; -static bool qm_lock_init = false; +static DEFINE_SPINLOCK(qm_lock); + +#define QED_MIN_DPIS (4) +#define QED_MIN_PWM_REGION (QED_WID_SIZE * QED_MIN_DPIS) /* API common to all protocols */ enum BAR_ID { @@ -44,8 +48,7 @@ enum BAR_ID { BAR_ID_1 /* Used for doorbells */ }; -static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, - enum BAR_ID bar_id) +static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, enum BAR_ID bar_id) { u32 bar_reg = (bar_id == BAR_ID_0 ? PGLUE_B_REG_PF_BAR0_SIZE : PGLUE_B_REG_PF_BAR1_SIZE); @@ -70,8 +73,7 @@ static u32 qed_hw_bar_size(struct qed_hwfn *p_hwfn, } } -void qed_init_dp(struct qed_dev *cdev, - u32 dp_module, u8 dp_level) +void qed_init_dp(struct qed_dev *cdev, u32 dp_module, u8 dp_level) { u32 i; @@ -150,6 +152,9 @@ void qed_resc_free(struct qed_dev *cdev) qed_eq_free(p_hwfn, p_hwfn->p_eq); qed_consq_free(p_hwfn, p_hwfn->p_consq); qed_int_free(p_hwfn); +#ifdef CONFIG_QED_LL2 + qed_ll2_free(p_hwfn, p_hwfn->p_ll2_info); +#endif qed_iov_free(p_hwfn); qed_dmae_info_free(p_hwfn); qed_dcbx_info_free(p_hwfn, p_hwfn->p_dcbx_info); @@ -343,7 +348,6 @@ static int qed_init_qm_info(struct qed_hwfn *p_hwfn, bool b_sleepable) return 0; alloc_err: - DP_NOTICE(p_hwfn, "Failed to allocate memory for QM params\n"); qed_qm_info_free(p_hwfn); return -ENOMEM; } @@ -407,6 +411,9 @@ int qed_qm_reconf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) int qed_resc_alloc(struct qed_dev *cdev) { +#ifdef CONFIG_QED_LL2 + struct qed_ll2_info *p_ll2_info; +#endif struct qed_consq *p_consq; struct qed_eq *p_eq; int i, rc = 0; @@ -427,18 +434,12 @@ int qed_resc_alloc(struct qed_dev *cdev) RESC_NUM(p_hwfn, QED_L2_QUEUE); p_hwfn->p_tx_cids = kzalloc(tx_size, GFP_KERNEL); - if (!p_hwfn->p_tx_cids) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for Tx Cids\n"); + if (!p_hwfn->p_tx_cids) goto alloc_no_mem; - } p_hwfn->p_rx_cids = kzalloc(rx_size, GFP_KERNEL); - if (!p_hwfn->p_rx_cids) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for Rx Cids\n"); + if (!p_hwfn->p_rx_cids) goto alloc_no_mem; - } } for_each_hwfn(cdev, i) { @@ -523,29 +524,29 @@ int qed_resc_alloc(struct qed_dev *cdev) goto alloc_no_mem; p_hwfn->p_consq = p_consq; +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) { + p_ll2_info = qed_ll2_alloc(p_hwfn); + if (!p_ll2_info) + goto alloc_no_mem; + p_hwfn->p_ll2_info = p_ll2_info; + } +#endif + /* DMA info initialization */ rc = qed_dmae_info_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for dmae_info structure\n"); + if (rc) goto alloc_err; - } /* DCBX initialization */ rc = qed_dcbx_info_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, - "Failed to allocate memory for dcbx structure\n"); + if (rc) goto alloc_err; - } } cdev->reset_stats = kzalloc(sizeof(*cdev->reset_stats), GFP_KERNEL); - if (!cdev->reset_stats) { - DP_NOTICE(cdev, "Failed to allocate reset statistics\n"); - rc = -ENOMEM; - goto alloc_err; - } + if (!cdev->reset_stats) + goto alloc_no_mem; return 0; @@ -580,6 +581,10 @@ void qed_resc_setup(struct qed_dev *cdev) qed_int_setup(p_hwfn, p_hwfn->p_main_ptt); qed_iov_setup(p_hwfn, p_hwfn->p_main_ptt); +#ifdef CONFIG_QED_LL2 + if (p_hwfn->using_ll2) + qed_ll2_setup(p_hwfn, p_hwfn->p_ll2_info); +#endif } } @@ -605,9 +610,8 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn, /* Make sure notification is not set before initiating final cleanup */ if (REG_RD(p_hwfn, addr)) { - DP_NOTICE( - p_hwfn, - "Unexpected; Found final cleanup notification before initiating final cleanup\n"); + DP_NOTICE(p_hwfn, + "Unexpected; Found final cleanup notification before initiating final cleanup\n"); REG_WR(p_hwfn, addr, 0); } @@ -701,17 +705,14 @@ static void qed_init_cau_rt_data(struct qed_dev *cdev) continue; qed_init_cau_sb_entry(p_hwfn, &sb_entry, - p_block->function_id, - 0, 0); - STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, - sb_entry); + p_block->function_id, 0, 0); + STORE_RT_REG_AGG(p_hwfn, offset + sb_id * 2, sb_entry); } } } static int qed_hw_init_common(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int hw_mode) + struct qed_ptt *p_ptt, int hw_mode) { struct qed_qm_info *qm_info = &p_hwfn->qm_info; struct qed_qm_common_rt_init_params params; @@ -759,7 +760,7 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, qed_port_unpretend(p_hwfn, p_ptt); rc = qed_init_run(p_hwfn, p_ptt, PHASE_ENGINE, ANY_PHASE_ID, hw_mode); - if (rc != 0) + if (rc) return rc; qed_wr(p_hwfn, p_ptt, PSWRQ2_REG_L2P_VALIDATE_VFID, 0); @@ -780,6 +781,9 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, concrete_fid = qed_vfid_to_concrete(p_hwfn, vf_id); qed_fid_pretend(p_hwfn, p_ptt, (u16) concrete_fid); qed_wr(p_hwfn, p_ptt, CCFC_REG_STRONG_ENABLE_VF, 0x1); + qed_wr(p_hwfn, p_ptt, CCFC_REG_WEAK_ENABLE_VF, 0x0); + qed_wr(p_hwfn, p_ptt, TCFC_REG_STRONG_ENABLE_VF, 0x1); + qed_wr(p_hwfn, p_ptt, TCFC_REG_WEAK_ENABLE_VF, 0x0); } /* pretend to original PF */ qed_fid_pretend(p_hwfn, p_ptt, p_hwfn->rel_pf_id); @@ -787,38 +791,141 @@ static int qed_hw_init_common(struct qed_hwfn *p_hwfn, return rc; } -static int qed_hw_init_port(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int hw_mode) +static int +qed_hw_init_dpi_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 pwm_region_size, u32 n_cpus) { + u32 dpi_page_size_1, dpi_page_size_2, dpi_page_size; + u32 dpi_bit_shift, dpi_count; + u32 min_dpis; + + /* Calculate DPI size */ + dpi_page_size_1 = QED_WID_SIZE * n_cpus; + dpi_page_size_2 = max_t(u32, QED_WID_SIZE, PAGE_SIZE); + dpi_page_size = max_t(u32, dpi_page_size_1, dpi_page_size_2); + dpi_page_size = roundup_pow_of_two(dpi_page_size); + dpi_bit_shift = ilog2(dpi_page_size / 4096); + + dpi_count = pwm_region_size / dpi_page_size; + + min_dpis = p_hwfn->pf_params.rdma_pf_params.min_dpis; + min_dpis = max_t(u32, QED_MIN_DPIS, min_dpis); + + p_hwfn->dpi_size = dpi_page_size; + p_hwfn->dpi_count = dpi_count; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPI_BIT_SHIFT, dpi_bit_shift); + + if (dpi_count < min_dpis) + return -EINVAL; + + return 0; +} + +enum QED_ROCE_EDPM_MODE { + QED_ROCE_EDPM_MODE_ENABLE = 0, + QED_ROCE_EDPM_MODE_FORCE_ON = 1, + QED_ROCE_EDPM_MODE_DISABLE = 2, +}; + +static int +qed_hw_init_pf_doorbell_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 pwm_regsize, norm_regsize; + u32 non_pwm_conn, min_addr_reg1; + u32 db_bar_size, n_cpus; + u32 roce_edpm_mode; + u32 pf_dems_shift; int rc = 0; + u8 cond; - rc = qed_init_run(p_hwfn, p_ptt, PHASE_PORT, p_hwfn->port_id, hw_mode); - if (rc != 0) - return rc; + db_bar_size = qed_hw_bar_size(p_hwfn, BAR_ID_1); + if (p_hwfn->cdev->num_hwfns > 1) + db_bar_size /= 2; - if (hw_mode & (1 << MODE_MF_SI)) { - u8 pf_id = 0; + /* Calculate doorbell regions */ + non_pwm_conn = qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_CORE) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_CORE, + NULL) + + qed_cxt_get_proto_cid_count(p_hwfn, PROTOCOLID_ETH, + NULL); + norm_regsize = roundup(QED_PF_DEMS_SIZE * non_pwm_conn, 4096); + min_addr_reg1 = norm_regsize / 4096; + pwm_regsize = db_bar_size - norm_regsize; - if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) { - DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, - "PF[%08x] is first eth on engine\n", pf_id); - - /* We should have configured BIT for ppfid, i.e., the - * relative function number in the port. But there's a - * bug in LLH in BB where the ppfid is actually engine - * based, so we need to take this into account. - */ - qed_wr(p_hwfn, p_ptt, - NIG_REG_LLH_TAGMAC_DEF_PF_VECTOR, 1 << pf_id); - } - - /* Take the protocol-based hit vector if there is a hit, - * otherwise take the other vector. - */ - qed_wr(p_hwfn, p_ptt, NIG_REG_LLH_CLS_TYPE_DUALMODE, 0x2); + /* Check that the normal and PWM sizes are valid */ + if (db_bar_size < norm_regsize) { + DP_ERR(p_hwfn->cdev, + "Doorbell BAR size 0x%x is too small (normal region is 0x%0x )\n", + db_bar_size, norm_regsize); + return -EINVAL; } - return rc; + + if (pwm_regsize < QED_MIN_PWM_REGION) { + DP_ERR(p_hwfn->cdev, + "PWM region size 0x%0x is too small. Should be at least 0x%0x (Doorbell BAR size is 0x%x and normal region size is 0x%0x)\n", + pwm_regsize, + QED_MIN_PWM_REGION, db_bar_size, norm_regsize); + return -EINVAL; + } + + /* Calculate number of DPIs */ + roce_edpm_mode = p_hwfn->pf_params.rdma_pf_params.roce_edpm_mode; + if ((roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE) || + ((roce_edpm_mode == QED_ROCE_EDPM_MODE_FORCE_ON))) { + /* Either EDPM is mandatory, or we are attempting to allocate a + * WID per CPU. + */ + n_cpus = num_active_cpus(); + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + } + + cond = (rc && (roce_edpm_mode == QED_ROCE_EDPM_MODE_ENABLE)) || + (roce_edpm_mode == QED_ROCE_EDPM_MODE_DISABLE); + if (cond || p_hwfn->dcbx_no_edpm) { + /* Either EDPM is disabled from user configuration, or it is + * disabled via DCBx, or it is not mandatory and we failed to + * allocated a WID per CPU. + */ + n_cpus = 1; + rc = qed_hw_init_dpi_size(p_hwfn, p_ptt, pwm_regsize, n_cpus); + + if (cond) + qed_rdma_dpm_bar(p_hwfn, p_ptt); + } + + DP_INFO(p_hwfn, + "doorbell bar: normal_region_size=%d, pwm_region_size=%d, dpi_size=%d, dpi_count=%d, roce_edpm=%s\n", + norm_regsize, + pwm_regsize, + p_hwfn->dpi_size, + p_hwfn->dpi_count, + ((p_hwfn->dcbx_no_edpm) || (p_hwfn->db_bar_no_edpm)) ? + "disabled" : "enabled"); + + if (rc) { + DP_ERR(p_hwfn, + "Failed to allocate enough DPIs. Allocated %d but the current minimum is %d.\n", + p_hwfn->dpi_count, + p_hwfn->pf_params.rdma_pf_params.min_dpis); + return -EINVAL; + } + + p_hwfn->dpi_start_offset = norm_regsize; + + /* DEMS size is configured log2 of DWORDs, hence the division by 4 */ + pf_dems_shift = ilog2(QED_PF_DEMS_SIZE / 4); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_ICID_BIT_SHIFT_NORM, pf_dems_shift); + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_MIN_ADDR_REG1, min_addr_reg1); + + return 0; +} + +static int qed_hw_init_port(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, int hw_mode) +{ + return qed_init_run(p_hwfn, p_ptt, PHASE_PORT, + p_hwfn->port_id, hw_mode); } static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, @@ -848,7 +955,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, qed_int_igu_init_rt(p_hwfn); /* Set VLAN in NIG if needed */ - if (hw_mode & (1 << MODE_MF_SD)) { + if (hw_mode & BIT(MODE_MF_SD)) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring LLH_FUNC_TAG\n"); STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET, 1); STORE_RT_REG(p_hwfn, NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET, @@ -856,7 +963,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, } /* Enable classification by MAC if needed */ - if (hw_mode & (1 << MODE_MF_SI)) { + if (hw_mode & BIT(MODE_MF_SI)) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "Configuring TAGMAC_CLS_TYPE\n"); STORE_RT_REG(p_hwfn, @@ -871,7 +978,7 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Cleanup chip from previous driver if such remains exist */ rc = qed_final_cleanup(p_hwfn, p_ptt, rel_pf_id, false); - if (rc != 0) + if (rc) return rc; /* PF Init sequence */ @@ -887,20 +994,9 @@ static int qed_hw_init_pf(struct qed_hwfn *p_hwfn, /* Pure runtime initializations - directly to the HW */ qed_int_igu_init_pure_rt(p_hwfn, p_ptt, true, true); - if (hw_mode & (1 << MODE_MF_SI)) { - u8 pf_id = 0; - u32 val = 0; - - if (!qed_hw_init_first_eth(p_hwfn, p_ptt, &pf_id)) { - if (p_hwfn->rel_pf_id == pf_id) { - DP_VERBOSE(p_hwfn, NETIF_MSG_IFUP, - "PF[%d] is first ETH on engine\n", - pf_id); - val = 1; - } - qed_wr(p_hwfn, p_ptt, PRS_REG_MSG_INFO, val); - } - } + rc = qed_hw_init_pf_doorbell_bar(p_hwfn, p_ptt); + if (rc) + return rc; if (b_hw_start) { /* enable interrupts */ @@ -950,8 +1046,7 @@ static void qed_reset_mb_shadow(struct qed_hwfn *p_hwfn, /* Read shadow of current MFW mailbox */ qed_mcp_read_mb(p_hwfn, p_main_ptt); memcpy(p_hwfn->mcp_info->mfw_mb_shadow, - p_hwfn->mcp_info->mfw_mb_cur, - p_hwfn->mcp_info->mfw_mb_length); + p_hwfn->mcp_info->mfw_mb_cur, p_hwfn->mcp_info->mfw_mb_length); } int qed_hw_init(struct qed_dev *cdev, @@ -971,7 +1066,7 @@ int qed_hw_init(struct qed_dev *cdev, if (IS_PF(cdev)) { rc = qed_init_fw_data(cdev, bin_fw_data); - if (rc != 0) + if (rc) return rc; } @@ -988,8 +1083,7 @@ int qed_hw_init(struct qed_dev *cdev, qed_calc_hw_mode(p_hwfn); - rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, - &load_code); + rc = qed_mcp_load_req(p_hwfn, p_hwfn->p_main_ptt, &load_code); if (rc) { DP_NOTICE(p_hwfn, "Failed sending LOAD_REQ command\n"); return rc; @@ -1004,11 +1098,6 @@ int qed_hw_init(struct qed_dev *cdev, p_hwfn->first_on_engine = (load_code == FW_MSG_CODE_DRV_LOAD_ENGINE); - if (!qm_lock_init) { - spin_lock_init(&qm_lock); - qm_lock_init = true; - } - switch (load_code) { case FW_MSG_CODE_DRV_LOAD_ENGINE: rc = qed_hw_init_common(p_hwfn, p_hwfn->p_main_ptt, @@ -1071,9 +1160,8 @@ int qed_hw_init(struct qed_dev *cdev, } #define QED_HW_STOP_RETRY_LIMIT (10) -static inline void qed_hw_timers_stop(struct qed_dev *cdev, - struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static void qed_hw_timers_stop(struct qed_dev *cdev, + struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { int i; @@ -1084,8 +1172,7 @@ static inline void qed_hw_timers_stop(struct qed_dev *cdev, for (i = 0; i < QED_HW_STOP_RETRY_LIMIT; i++) { if ((!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_CONN)) && - (!qed_rd(p_hwfn, p_ptt, - TM_REG_PF_SCAN_ACTIVE_TASK))) + (!qed_rd(p_hwfn, p_ptt, TM_REG_PF_SCAN_ACTIVE_TASK))) break; /* Dependent on number of connection/tasks, possibly @@ -1190,8 +1277,7 @@ void qed_hw_stop_fastpath(struct qed_dev *cdev) } DP_VERBOSE(p_hwfn, - NETIF_MSG_IFDOWN, - "Shutting down the fastpath\n"); + NETIF_MSG_IFDOWN, "Shutting down the fastpath\n"); qed_wr(p_hwfn, p_ptt, NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x1); @@ -1219,14 +1305,13 @@ void qed_hw_start_fastpath(struct qed_hwfn *p_hwfn) NIG_REG_RX_LLH_BRB_GATE_DNTFWD_PERPF, 0x0); } -static int qed_reg_assert(struct qed_hwfn *hwfn, - struct qed_ptt *ptt, u32 reg, - bool expected) +static int qed_reg_assert(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 reg, bool expected) { - u32 assert_val = qed_rd(hwfn, ptt, reg); + u32 assert_val = qed_rd(p_hwfn, p_ptt, reg); if (assert_val != expected) { - DP_NOTICE(hwfn, "Value at address 0x%x != 0x%08x\n", + DP_NOTICE(p_hwfn, "Value at address 0x%08x != 0x%08x\n", reg, expected); return -EINVAL; } @@ -1306,8 +1391,7 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn) /* Clean Previous errors if such exist */ qed_wr(p_hwfn, p_hwfn->p_main_ptt, - PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, - 1 << p_hwfn->abs_pf_id); + PGLUE_B_REG_WAS_ERROR_PF_31_0_CLR, 1 << p_hwfn->abs_pf_id); /* enable internal target-read */ qed_wr(p_hwfn, p_hwfn->p_main_ptt, @@ -1317,7 +1401,8 @@ static void qed_hw_hwfn_prepare(struct qed_hwfn *p_hwfn) static void get_function_id(struct qed_hwfn *p_hwfn) { /* ME Register */ - p_hwfn->hw_info.opaque_fid = (u16)REG_RD(p_hwfn, PXP_PF_ME_OPAQUE_ADDR); + p_hwfn->hw_info.opaque_fid = (u16) REG_RD(p_hwfn, + PXP_PF_ME_OPAQUE_ADDR); p_hwfn->hw_info.concrete_fid = REG_RD(p_hwfn, PXP_PF_ME_CONCRETE_ADDR); @@ -1326,6 +1411,10 @@ static void get_function_id(struct qed_hwfn *p_hwfn) PXP_CONCRETE_FID_PFID); p_hwfn->port_id = GET_FIELD(p_hwfn->hw_info.concrete_fid, PXP_CONCRETE_FID_PORT); + + DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, + "Read ME register: Concrete 0x%08x Opaque 0x%04x\n", + p_hwfn->hw_info.concrete_fid, p_hwfn->hw_info.opaque_fid); } static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) @@ -1333,6 +1422,19 @@ static void qed_hw_set_feat(struct qed_hwfn *p_hwfn) u32 *feat_num = p_hwfn->hw_info.feat_num; int num_features = 1; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + /* Roce CNQ each requires: 1 status block + 1 CNQ. We divide the + * status blocks equally between L2 / RoCE but with consideration as + * to how many l2 queues / cnqs we have + */ + if (p_hwfn->hw_info.personality == QED_PCI_ETH_ROCE) { + num_features++; + + feat_num[QED_RDMA_CNQ] = + min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, + RESC_NUM(p_hwfn, QED_RDMA_CNQ_RAM)); + } +#endif feat_num[QED_PF_L2_QUE] = min_t(u32, RESC_NUM(p_hwfn, QED_SB) / num_features, RESC_NUM(p_hwfn, QED_L2_QUEUE)); @@ -1373,6 +1475,10 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) resc_num[QED_VLAN] = (ETH_NUM_VLAN_FILTERS - 1 /*For vlan0*/) / num_funcs; resc_num[QED_ILT] = PXP_NUM_ILT_RECORDS_BB / num_funcs; + resc_num[QED_LL2_QUEUE] = MAX_NUM_LL2_RX_QUEUES / num_funcs; + resc_num[QED_RDMA_CNQ_RAM] = NUM_OF_CMDQS_CQS / num_funcs; + resc_num[QED_RDMA_STATS_QUEUE] = RDMA_NUM_STATISTIC_COUNTERS_BB / + num_funcs; for (i = 0; i < QED_MAX_RESC; i++) resc_start[i] = resc_num[i] * enabled_func_idx; @@ -1396,7 +1502,8 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) "RL = %d start = %d\n" "MAC = %d start = %d\n" "VLAN = %d start = %d\n" - "ILT = %d start = %d\n", + "ILT = %d start = %d\n" + "LL2_QUEUE = %d start = %d\n", p_hwfn->hw_info.resc_num[QED_SB], p_hwfn->hw_info.resc_start[QED_SB], p_hwfn->hw_info.resc_num[QED_L2_QUEUE], @@ -1412,13 +1519,14 @@ static int qed_hw_get_resc(struct qed_hwfn *p_hwfn) p_hwfn->hw_info.resc_num[QED_VLAN], p_hwfn->hw_info.resc_start[QED_VLAN], p_hwfn->hw_info.resc_num[QED_ILT], - p_hwfn->hw_info.resc_start[QED_ILT]); + p_hwfn->hw_info.resc_start[QED_ILT], + RESC_NUM(p_hwfn, QED_LL2_QUEUE), + RESC_START(p_hwfn, QED_LL2_QUEUE)); return 0; } -static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 nvm_cfg1_offset, mf_mode, addr, generic_cont0, core_cfg; u32 port_cfg_addr, link_temp, nvm_cfg_addr, device_capabilities; @@ -1472,8 +1580,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, p_hwfn->hw_info.port_mode = QED_PORT_MODE_DE_1X25G; break; default: - DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", - core_cfg); + DP_NOTICE(p_hwfn, "Unknown port mode in 0x%08x\n", core_cfg); break; } @@ -1484,11 +1591,11 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr + offsetof(struct nvm_cfg1_port, speed_cap_mask)); - link->speed.advertised_speeds = - link_temp & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK; + link_temp &= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_MASK; + link->speed.advertised_speeds = link_temp; - p_hwfn->mcp_info->link_capabilities.speed_capabilities = - link->speed.advertised_speeds; + link_temp = link->speed.advertised_speeds; + p_hwfn->mcp_info->link_capabilities.speed_capabilities = link_temp; link_temp = qed_rd(p_hwfn, p_ptt, port_cfg_addr + @@ -1517,8 +1624,7 @@ static int qed_hw_get_nvm_info(struct qed_hwfn *p_hwfn, link->speed.forced_speed = 100000; break; default: - DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", - link_temp); + DP_NOTICE(p_hwfn, "Unknown Speed in 0x%08x\n", link_temp); } link_temp &= NVM_CFG1_PORT_DRV_FLOW_CONTROL_MASK; @@ -1628,10 +1734,10 @@ static void qed_get_num_funcs(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) DP_VERBOSE(p_hwfn, NETIF_MSG_PROBE, - "PF [rel_id %d, abs_id %d] within the %d enabled functions on the engine\n", + "PF [rel_id %d, abs_id %d] occupies index %d within the %d enabled functions on the engine\n", p_hwfn->rel_pf_id, p_hwfn->abs_pf_id, - p_hwfn->num_funcs_on_engine); + p_hwfn->enabled_func_idx, p_hwfn->num_funcs_on_engine); } static int @@ -1703,10 +1809,9 @@ static int qed_get_dev_info(struct qed_dev *cdev) u32 tmp; /* Read Vendor Id / Device Id */ - pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, - &cdev->vendor_id); - pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, - &cdev->device_id); + pci_read_config_word(cdev->pdev, PCI_VENDOR_ID, &cdev->vendor_id); + pci_read_config_word(cdev->pdev, PCI_DEVICE_ID, &cdev->device_id); + cdev->chip_num = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, MISCS_REG_CHIP_NUM); cdev->chip_rev = (u16)qed_rd(p_hwfn, p_hwfn->p_main_ptt, @@ -1771,10 +1876,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Allocate PTT pool */ rc = qed_ptt_pool_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to prepare hwfn's hw\n"); + if (rc) goto err0; - } /* Allocate the main PTT */ p_hwfn->p_main_ptt = qed_get_reserved_ptt(p_hwfn, RESERVED_PTT_MAIN); @@ -1782,7 +1885,7 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* First hwfn learns basic information, e.g., number of hwfns */ if (!p_hwfn->my_id) { rc = qed_get_dev_info(p_hwfn->cdev); - if (rc != 0) + if (rc) goto err1; } @@ -1804,10 +1907,8 @@ static int qed_hw_prepare_single(struct qed_hwfn *p_hwfn, /* Allocate the init RT array and initialize the init-ops engine */ rc = qed_init_alloc(p_hwfn); - if (rc) { - DP_NOTICE(p_hwfn, "Failed to allocate the init array\n"); + if (rc) goto err2; - } return rc; err2: @@ -2015,10 +2116,8 @@ qed_chain_alloc_next_ptr(struct qed_dev *cdev, struct qed_chain *p_chain) p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } if (i == 0) { qed_chain_init_mem(p_chain, p_virt, p_phys); @@ -2048,10 +2147,8 @@ qed_chain_alloc_single(struct qed_dev *cdev, struct qed_chain *p_chain) p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } qed_chain_init_mem(p_chain, p_virt, p_phys); qed_chain_reset(p_chain); @@ -2068,13 +2165,9 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) void *p_virt = NULL; size = page_cnt * sizeof(*pp_virt_addr_tbl); - pp_virt_addr_tbl = vmalloc(size); - if (!pp_virt_addr_tbl) { - DP_NOTICE(cdev, - "Failed to allocate memory for the chain virtual addresses table\n"); + pp_virt_addr_tbl = vzalloc(size); + if (!pp_virt_addr_tbl) return -ENOMEM; - } - memset(pp_virt_addr_tbl, 0, size); /* The allocation of the PBL table is done with its full size, since it * is expected to be successive. @@ -2087,19 +2180,15 @@ static int qed_chain_alloc_pbl(struct qed_dev *cdev, struct qed_chain *p_chain) size, &p_pbl_phys, GFP_KERNEL); qed_chain_init_pbl_mem(p_chain, p_pbl_virt, p_pbl_phys, pp_virt_addr_tbl); - if (!p_pbl_virt) { - DP_NOTICE(cdev, "Failed to allocate chain pbl memory\n"); + if (!p_pbl_virt) return -ENOMEM; - } for (i = 0; i < page_cnt; i++) { p_virt = dma_alloc_coherent(&cdev->pdev->dev, QED_CHAIN_PAGE_SIZE, &p_phys, GFP_KERNEL); - if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate chain memory\n"); + if (!p_virt) return -ENOMEM; - } if (i == 0) { qed_chain_init_mem(p_chain, p_virt, p_phys); @@ -2134,7 +2223,8 @@ int qed_chain_alloc(struct qed_dev *cdev, rc = qed_chain_alloc_sanity_check(cdev, cnt_type, elem_size, page_cnt); if (rc) { DP_NOTICE(cdev, - "Cannot allocate a chain with the given arguments:\n" + "Cannot allocate a chain with the given arguments:\n"); + DP_NOTICE(cdev, "[use_mode %d, mode %d, cnt_type %d, num_elems %d, elem_size %zu]\n", intended_use, mode, cnt_type, num_elems, elem_size); return rc; @@ -2183,8 +2273,7 @@ int qed_fw_l2_queue(struct qed_hwfn *p_hwfn, u16 src_id, u16 *dst_id) return 0; } -int qed_fw_vport(struct qed_hwfn *p_hwfn, - u8 src_id, u8 *dst_id) +int qed_fw_vport(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) { if (src_id >= RESC_NUM(p_hwfn, QED_VPORT)) { u8 min, max; @@ -2203,8 +2292,7 @@ int qed_fw_vport(struct qed_hwfn *p_hwfn, return 0; } -int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, - u8 src_id, u8 *dst_id) +int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id) { if (src_id >= RESC_NUM(p_hwfn, QED_RSS_ENG)) { u8 min, max; @@ -2223,6 +2311,98 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, return 0; } +static void qed_llh_mac_to_filter(u32 *p_high, u32 *p_low, + u8 *p_filter) +{ + *p_high = p_filter[1] | (p_filter[0] << 8); + *p_low = p_filter[5] | (p_filter[4] << 8) | + (p_filter[3] << 16) | (p_filter[2] << 24); +} + +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0, en; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return 0; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find a free entry and utilize it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + en = qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32)); + if (en) + continue; + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32), low); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), high); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_MODE + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE + + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 1); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) { + DP_NOTICE(p_hwfn, + "Failed to find an empty LLH filter to utilize\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is added at %d\n", + p_filter, i); + + return 0; +} + +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter) +{ + u32 high = 0, low = 0; + int i; + + if (!(IS_MF_SI(p_hwfn) || IS_MF_DEFAULT(p_hwfn))) + return; + + qed_llh_mac_to_filter(&high, &low, p_filter); + + /* Find the entry and clean it */ + for (i = 0; i < NIG_REG_LLH_FUNC_FILTER_EN_SIZE; i++) { + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + 2 * i * sizeof(u32)) != low) + continue; + if (qed_rd(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32)) != high) + continue; + + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_EN + i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + 2 * i * sizeof(u32), 0); + qed_wr(p_hwfn, p_ptt, + NIG_REG_LLH_FUNC_FILTER_VALUE + + (2 * i + 1) * sizeof(u32), 0); + + DP_VERBOSE(p_hwfn, NETIF_MSG_HW, + "mac: %pM is removed from %d\n", + p_filter, i); + break; + } + if (i >= NIG_REG_LLH_FUNC_FILTER_EN_SIZE) + DP_NOTICE(p_hwfn, "Tried to remove a non-configured filter\n"); +} + static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 hw_addr, void *p_eth_qzone, size_t eth_qzone_size, u8 timeset) @@ -2386,8 +2566,7 @@ static void qed_disable_wfq_for_all_vports(struct qed_hwfn *p_hwfn, * 3. total_req_min_rate [all vports min rate sum] shouldn't exceed min_pf_rate. */ static int qed_init_wfq_param(struct qed_hwfn *p_hwfn, - u16 vport_id, u32 req_rate, - u32 min_pf_rate) + u16 vport_id, u32 req_rate, u32 min_pf_rate) { u32 total_req_min_rate = 0, total_left_rate = 0, left_rate_per_vp = 0; int non_requested_count = 0, req_count = 0, i, num_vports; @@ -2471,7 +2650,7 @@ static int __qed_configure_vport_wfq(struct qed_hwfn *p_hwfn, rc = qed_init_wfq_param(p_hwfn, vp_id, rate, p_link->min_pf_rate); - if (rc == 0) + if (!rc) qed_configure_wfq_for_all_vports(p_hwfn, p_ptt, p_link->min_pf_rate); else diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index 343bb0344f62..b6711c106597 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -309,6 +309,26 @@ int qed_fw_rss_eng(struct qed_hwfn *p_hwfn, u8 src_id, u8 *dst_id); +/** + * @brief qed_llh_add_mac_filter - configures a MAC filter in llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to add + */ +int qed_llh_add_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + +/** + * @brief qed_llh_remove_mac_filter - removes a MAC filter from llh + * + * @param p_hwfn + * @param p_ptt + * @param p_filter - MAC to remove + */ +void qed_llh_remove_mac_filter(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u8 *p_filter); + /** * *@brief Cleanup of previous driver remains prior to load * diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 6f9d3b831a2a..72eee29c677f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -536,6 +536,247 @@ struct core_conn_context { struct regpair ustorm_st_padding[2]; }; +enum core_error_handle { + LL2_DROP_PACKET, + LL2_DO_NOTHING, + LL2_ASSERT, + MAX_CORE_ERROR_HANDLE +}; + +enum core_event_opcode { + CORE_EVENT_TX_QUEUE_START, + CORE_EVENT_TX_QUEUE_STOP, + CORE_EVENT_RX_QUEUE_START, + CORE_EVENT_RX_QUEUE_STOP, + MAX_CORE_EVENT_OPCODE +}; + +enum core_l4_pseudo_checksum_mode { + CORE_L4_PSEUDO_CSUM_CORRECT_LENGTH, + CORE_L4_PSEUDO_CSUM_ZERO_LENGTH, + MAX_CORE_L4_PSEUDO_CHECKSUM_MODE +}; + +struct core_ll2_port_stats { + struct regpair gsi_invalid_hdr; + struct regpair gsi_invalid_pkt_length; + struct regpair gsi_unsupported_pkt_typ; + struct regpair gsi_crcchksm_error; +}; + +struct core_ll2_pstorm_per_queue_stat { + struct regpair sent_ucast_bytes; + struct regpair sent_mcast_bytes; + struct regpair sent_bcast_bytes; + struct regpair sent_ucast_pkts; + struct regpair sent_mcast_pkts; + struct regpair sent_bcast_pkts; +}; + +struct core_ll2_rx_prod { + __le16 bd_prod; + __le16 cqe_prod; + __le32 reserved; +}; + +struct core_ll2_tstorm_per_queue_stat { + struct regpair packet_too_big_discard; + struct regpair no_buff_discard; +}; + +struct core_ll2_ustorm_per_queue_stat { + struct regpair rcv_ucast_bytes; + struct regpair rcv_mcast_bytes; + struct regpair rcv_bcast_bytes; + struct regpair rcv_ucast_pkts; + struct regpair rcv_mcast_pkts; + struct regpair rcv_bcast_pkts; +}; + +enum core_ramrod_cmd_id { + CORE_RAMROD_UNUSED, + CORE_RAMROD_RX_QUEUE_START, + CORE_RAMROD_TX_QUEUE_START, + CORE_RAMROD_RX_QUEUE_STOP, + CORE_RAMROD_TX_QUEUE_STOP, + MAX_CORE_RAMROD_CMD_ID +}; + +enum core_roce_flavor_type { + CORE_ROCE, + CORE_RROCE, + MAX_CORE_ROCE_FLAVOR_TYPE +}; + +struct core_rx_action_on_error { + u8 error_type; +#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_MASK 0x3 +#define CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG_SHIFT 0 +#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_MASK 0x3 +#define CORE_RX_ACTION_ON_ERROR_NO_BUFF_SHIFT 2 +#define CORE_RX_ACTION_ON_ERROR_RESERVED_MASK 0xF +#define CORE_RX_ACTION_ON_ERROR_RESERVED_SHIFT 4 +}; + +struct core_rx_bd { + struct regpair addr; + __le16 reserved[4]; +}; + +struct core_rx_bd_with_buff_len { + struct regpair addr; + __le16 buff_length; + __le16 reserved[3]; +}; + +union core_rx_bd_union { + struct core_rx_bd rx_bd; + struct core_rx_bd_with_buff_len rx_bd_with_len; +}; + +struct core_rx_cqe_opaque_data { + __le32 data[2]; +}; + +enum core_rx_cqe_type { + CORE_RX_CQE_ILLIGAL_TYPE, + CORE_RX_CQE_TYPE_REGULAR, + CORE_RX_CQE_TYPE_GSI_OFFLOAD, + CORE_RX_CQE_TYPE_SLOW_PATH, + MAX_CORE_RX_CQE_TYPE +}; + +struct core_rx_fast_path_cqe { + u8 type; + u8 placement_offset; + struct parsing_and_err_flags parse_flags; + __le16 packet_length; + __le16 vlan; + struct core_rx_cqe_opaque_data opaque_data; + __le32 reserved[4]; +}; + +struct core_rx_gsi_offload_cqe { + u8 type; + u8 data_length_error; + struct parsing_and_err_flags parse_flags; + __le16 data_length; + __le16 vlan; + __le32 src_mac_addrhi; + __le16 src_mac_addrlo; + u8 reserved1[2]; + __le32 gid_dst[4]; +}; + +struct core_rx_slow_path_cqe { + u8 type; + u8 ramrod_cmd_id; + __le16 echo; + __le32 reserved1[7]; +}; + +union core_rx_cqe_union { + struct core_rx_fast_path_cqe rx_cqe_fp; + struct core_rx_gsi_offload_cqe rx_cqe_gsi; + struct core_rx_slow_path_cqe rx_cqe_sp; +}; + +struct core_rx_start_ramrod_data { + struct regpair bd_base; + struct regpair cqe_pbl_addr; + __le16 mtu; + __le16 sb_id; + u8 sb_index; + u8 complete_cqe_flg; + u8 complete_event_flg; + u8 drop_ttl0_flg; + __le16 num_of_pbl_pages; + u8 inner_vlan_removal_en; + u8 queue_id; + u8 main_func_queue; + u8 mf_si_bcast_accept_all; + u8 mf_si_mcast_accept_all; + struct core_rx_action_on_error action_on_error; + u8 gsi_offload_flag; + u8 reserved[7]; +}; + +struct core_rx_stop_ramrod_data { + u8 complete_cqe_flg; + u8 complete_event_flg; + u8 queue_id; + u8 reserved1; + __le16 reserved2[2]; +}; + +struct core_tx_bd_flags { + u8 as_bitfield; +#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_MASK 0x1 +#define CORE_TX_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0 +#define CORE_TX_BD_FLAGS_VLAN_INSERTION_MASK 0x1 +#define CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT 1 +#define CORE_TX_BD_FLAGS_START_BD_MASK 0x1 +#define CORE_TX_BD_FLAGS_START_BD_SHIFT 2 +#define CORE_TX_BD_FLAGS_IP_CSUM_MASK 0x1 +#define CORE_TX_BD_FLAGS_IP_CSUM_SHIFT 3 +#define CORE_TX_BD_FLAGS_L4_CSUM_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_CSUM_SHIFT 4 +#define CORE_TX_BD_FLAGS_IPV6_EXT_MASK 0x1 +#define CORE_TX_BD_FLAGS_IPV6_EXT_SHIFT 5 +#define CORE_TX_BD_FLAGS_L4_PROTOCOL_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_PROTOCOL_SHIFT 6 +#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_MASK 0x1 +#define CORE_TX_BD_FLAGS_L4_PSEUDO_CSUM_MODE_SHIFT 7 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_MASK 0x1 +#define CORE_TX_BD_FLAGS_ROCE_FLAV_SHIFT 12 + +}; + +struct core_tx_bd { + struct regpair addr; + __le16 nbytes; + __le16 nw_vlan_or_lb_echo; + u8 bitfield0; +#define CORE_TX_BD_NBDS_MASK 0xF +#define CORE_TX_BD_NBDS_SHIFT 0 +#define CORE_TX_BD_ROCE_FLAV_MASK 0x1 +#define CORE_TX_BD_ROCE_FLAV_SHIFT 4 +#define CORE_TX_BD_RESERVED0_MASK 0x7 +#define CORE_TX_BD_RESERVED0_SHIFT 5 + struct core_tx_bd_flags bd_flags; + __le16 bitfield1; +#define CORE_TX_BD_L4_HDR_OFFSET_W_MASK 0x3FFF +#define CORE_TX_BD_L4_HDR_OFFSET_W_SHIFT 0 +#define CORE_TX_BD_TX_DST_MASK 0x1 +#define CORE_TX_BD_TX_DST_SHIFT 14 +#define CORE_TX_BD_RESERVED1_MASK 0x1 +#define CORE_TX_BD_RESERVED1_SHIFT 15 +}; + +enum core_tx_dest { + CORE_TX_DEST_NW, + CORE_TX_DEST_LB, + MAX_CORE_TX_DEST +}; + +struct core_tx_start_ramrod_data { + struct regpair pbl_base_addr; + __le16 mtu; + __le16 sb_id; + u8 sb_index; + u8 stats_en; + u8 stats_id; + u8 conn_type; + __le16 pbl_size; + __le16 qm_pq_id; + u8 gsi_offload_flag; + u8 resrved[3]; +}; + +struct core_tx_stop_ramrod_data { + __le32 reserved0[2]; +}; + struct eth_mstorm_per_pf_stat { struct regpair gre_discard_pkts; struct regpair vxlan_discard_pkts; @@ -636,9 +877,33 @@ struct hsi_fp_ver_struct { }; /* Mstorm non-triggering VF zone */ +enum malicious_vf_error_id { + MALICIOUS_VF_NO_ERROR, + VF_PF_CHANNEL_NOT_READY, + VF_ZONE_MSG_NOT_VALID, + VF_ZONE_FUNC_NOT_ENABLED, + ETH_PACKET_TOO_SMALL, + ETH_ILLEGAL_VLAN_MODE, + ETH_MTU_VIOLATION, + ETH_ILLEGAL_INBAND_TAGS, + ETH_VLAN_INSERT_AND_INBAND_VLAN, + ETH_ILLEGAL_NBDS, + ETH_FIRST_BD_WO_SOP, + ETH_INSUFFICIENT_BDS, + ETH_ILLEGAL_LSO_HDR_NBDS, + ETH_ILLEGAL_LSO_MSS, + ETH_ZERO_SIZE_BD, + ETH_ILLEGAL_LSO_HDR_LEN, + ETH_INSUFFICIENT_PAYLOAD, + ETH_EDPM_OUT_OF_SYNC, + ETH_TUNN_IPV6_EXT_NBD_ERR, + ETH_CONTROL_PACKET_VIOLATION, + MAX_MALICIOUS_VF_ERROR_ID +}; + struct mstorm_non_trigger_vf_zone { struct eth_mstorm_per_queue_stat eth_queue_stat; - struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF]; + struct eth_rx_prod_data eth_rx_queue_producers[ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD]; }; /* Mstorm VF zone */ @@ -705,13 +970,17 @@ struct pf_start_ramrod_data { struct protocol_dcb_data { u8 dcb_enable_flag; + u8 reserved_a; u8 dcb_priority; u8 dcb_tc; - u8 reserved; + u8 reserved_b; + u8 reserved0; }; struct pf_update_tunnel_config { u8 update_rx_pf_clss; + u8 update_rx_def_ucast_clss; + u8 update_rx_def_non_ucast_clss; u8 update_tx_pf_clss; u8 set_vxlan_udp_port_flg; u8 set_geneve_udp_port_flg; @@ -727,7 +996,7 @@ struct pf_update_tunnel_config { u8 tunnel_clss_ipgre; __le16 vxlan_udp_port; __le16 geneve_udp_port; - __le16 reserved[3]; + __le16 reserved[2]; }; struct pf_update_ramrod_data { @@ -736,16 +1005,17 @@ struct pf_update_ramrod_data { u8 update_fcoe_dcb_data_flag; u8 update_iscsi_dcb_data_flag; u8 update_roce_dcb_data_flag; + u8 update_rroce_dcb_data_flag; u8 update_iwarp_dcb_data_flag; u8 update_mf_vlan_flag; - u8 reserved; struct protocol_dcb_data eth_dcb_data; struct protocol_dcb_data fcoe_dcb_data; struct protocol_dcb_data iscsi_dcb_data; struct protocol_dcb_data roce_dcb_data; + struct protocol_dcb_data rroce_dcb_data; struct protocol_dcb_data iwarp_dcb_data; __le16 mf_vlan; - __le16 reserved2; + __le16 reserved; struct pf_update_tunnel_config tunnel_config; }; @@ -766,10 +1036,14 @@ enum protocol_version_array_key { MAX_PROTOCOL_VERSION_ARRAY_KEY }; -/* Pstorm non-triggering VF zone */ +struct rdma_sent_stats { + struct regpair sent_bytes; + struct regpair sent_pkts; +}; + struct pstorm_non_trigger_vf_zone { struct eth_pstorm_per_queue_stat eth_queue_stat; - struct regpair reserved[2]; + struct rdma_sent_stats rdma_stats; }; /* Pstorm VF zone */ @@ -786,7 +1060,11 @@ struct ramrod_header { __le16 echo; }; -/* Slowpath Element (SPQE) */ +struct rdma_rcv_stats { + struct regpair rcv_bytes; + struct regpair rcv_pkts; +}; + struct slow_path_element { struct ramrod_header hdr; struct regpair data_ptr; @@ -794,7 +1072,7 @@ struct slow_path_element { /* Tstorm non-triggering VF zone */ struct tstorm_non_trigger_vf_zone { - struct regpair reserved[2]; + struct rdma_rcv_stats rdma_stats; }; struct tstorm_per_port_stat { @@ -802,9 +1080,14 @@ struct tstorm_per_port_stat { struct regpair mac_error_discard; struct regpair mftag_filter_discard; struct regpair eth_mac_filter_discard; - struct regpair reserved[5]; + struct regpair ll2_mac_filter_discard; + struct regpair ll2_conn_disabled_discard; + struct regpair iscsi_irregular_pkt; + struct regpair reserved; + struct regpair roce_irregular_pkt; struct regpair eth_irregular_pkt; - struct regpair reserved1[2]; + struct regpair reserved1; + struct regpair preroce_irregular_pkt; struct regpair eth_gre_tunn_filter_discard; struct regpair eth_vxlan_tunn_filter_discard; struct regpair eth_geneve_tunn_filter_discard; @@ -870,7 +1153,13 @@ struct vf_stop_ramrod_data { __le32 reserved2; }; -/* Attentions status block */ +enum vf_zone_size_mode { + VF_ZONE_SIZE_MODE_DEFAULT, + VF_ZONE_SIZE_MODE_DOUBLE, + VF_ZONE_SIZE_MODE_QUAD, + MAX_VF_ZONE_SIZE_MODE +}; + struct atten_status_block { __le32 atten_bits; __le32 atten_ack; @@ -1442,13 +1731,6 @@ enum bin_dbg_buffer_type { MAX_BIN_DBG_BUFFER_TYPE }; -/* Chip IDs */ -enum chip_ids { - CHIP_RESERVED, - CHIP_BB_B0, - CHIP_RESERVED2, - MAX_CHIP_IDS -}; /* Attention bit mapping */ struct dbg_attn_bit_mapping { @@ -1527,6 +1809,371 @@ enum dbg_attn_type { MAX_DBG_ATTN_TYPE }; +/* condition header for registers dump */ +struct dbg_dump_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + u8 block_id; /* block ID */ + u8 data_size; /* size in dwords of the data following this header */ +}; + +/* memory data for registers dump */ +struct dbg_dump_mem { + __le32 dword0; +#define DBG_DUMP_MEM_ADDRESS_MASK 0xFFFFFF +#define DBG_DUMP_MEM_ADDRESS_SHIFT 0 +#define DBG_DUMP_MEM_MEM_GROUP_ID_MASK 0xFF +#define DBG_DUMP_MEM_MEM_GROUP_ID_SHIFT 24 + __le32 dword1; +#define DBG_DUMP_MEM_LENGTH_MASK 0xFFFFFF +#define DBG_DUMP_MEM_LENGTH_SHIFT 0 +#define DBG_DUMP_MEM_RESERVED_MASK 0xFF +#define DBG_DUMP_MEM_RESERVED_SHIFT 24 +}; + +/* register data for registers dump */ +struct dbg_dump_reg { + __le32 data; +#define DBG_DUMP_REG_ADDRESS_MASK 0xFFFFFF /* register address (in dwords) */ +#define DBG_DUMP_REG_ADDRESS_SHIFT 0 +#define DBG_DUMP_REG_LENGTH_MASK 0xFF /* register size (in dwords) */ +#define DBG_DUMP_REG_LENGTH_SHIFT 24 +}; + +/* split header for registers dump */ +struct dbg_dump_split_hdr { + __le32 hdr; +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_MASK 0xFFFFFF +#define DBG_DUMP_SPLIT_HDR_DATA_SIZE_SHIFT 0 +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_MASK 0xFF +#define DBG_DUMP_SPLIT_HDR_SPLIT_TYPE_ID_SHIFT 24 +}; + +/* condition header for idle check */ +struct dbg_idle_chk_cond_hdr { + struct dbg_mode_hdr mode; /* Mode header */ + __le16 data_size; /* size in dwords of the data following this header */ +}; + +/* Idle Check condition register */ +struct dbg_idle_chk_cond_reg { + __le32 data; +#define DBG_IDLE_CHK_COND_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_COND_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_COND_REG_BLOCK_ID_SHIFT 24 + __le16 num_entries; /* number of registers entries to check */ + u8 entry_size; /* size of registers entry (in dwords) */ + u8 start_entry; /* index of the first entry to check */ +}; + +/* Idle Check info register */ +struct dbg_idle_chk_info_reg { + __le32 data; +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_MASK 0xFFFFFF +#define DBG_IDLE_CHK_INFO_REG_ADDRESS_SHIFT 0 +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_MASK 0xFF +#define DBG_IDLE_CHK_INFO_REG_BLOCK_ID_SHIFT 24 + __le16 size; /* register size in dwords */ + struct dbg_mode_hdr mode; /* Mode header */ +}; + +/* Idle Check register */ +union dbg_idle_chk_reg { + struct dbg_idle_chk_cond_reg cond_reg; /* condition register */ + struct dbg_idle_chk_info_reg info_reg; /* info register */ +}; + +/* Idle Check result header */ +struct dbg_idle_chk_result_hdr { + __le16 rule_id; /* Failing rule index */ + __le16 mem_entry_id; /* Failing memory entry index */ + u8 num_dumped_cond_regs; /* number of dumped condition registers */ + u8 num_dumped_info_regs; /* number of dumped condition registers */ + u8 severity; /* from dbg_idle_chk_severity_types enum */ + u8 reserved; +}; + +/* Idle Check result register header */ +struct dbg_idle_chk_result_reg_hdr { + u8 data; +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_MASK 0x1 +#define DBG_IDLE_CHK_RESULT_REG_HDR_IS_MEM_SHIFT 0 +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_MASK 0x7F +#define DBG_IDLE_CHK_RESULT_REG_HDR_REG_ID_SHIFT 1 + u8 start_entry; /* index of the first checked entry */ + __le16 size; /* register size in dwords */ +}; + +/* Idle Check rule */ +struct dbg_idle_chk_rule { + __le16 rule_id; /* Idle Check rule ID */ + u8 severity; /* value from dbg_idle_chk_severity_types enum */ + u8 cond_id; /* Condition ID */ + u8 num_cond_regs; /* number of condition registers */ + u8 num_info_regs; /* number of info registers */ + u8 num_imms; /* number of immediates in the condition */ + u8 reserved1; + __le16 reg_offset; /* offset of this rules registers in the idle check + * register array (in dbg_idle_chk_reg units). + */ + __le16 imm_offset; /* offset of this rules immediate values in the + * immediate values array (in dwords). + */ +}; + +/* Idle Check rule parsing data */ +struct dbg_idle_chk_rule_parsing_data { + __le32 data; +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_MASK 0x1 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_HAS_FW_MSG_SHIFT 0 +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_MASK 0x7FFFFFFF +#define DBG_IDLE_CHK_RULE_PARSING_DATA_STR_OFFSET_SHIFT 1 +}; + +/* idle check severity types */ +enum dbg_idle_chk_severity_types { + /* idle check failure should cause an error */ + IDLE_CHK_SEVERITY_ERROR, + /* idle check failure should cause an error only if theres no traffic */ + IDLE_CHK_SEVERITY_ERROR_NO_TRAFFIC, + /* idle check failure should cause a warning */ + IDLE_CHK_SEVERITY_WARNING, + MAX_DBG_IDLE_CHK_SEVERITY_TYPES +}; + +/* Debug Bus block data */ +struct dbg_bus_block_data { + u8 enabled; /* Indicates if the block is enabled for recording (0/1) */ + u8 hw_id; /* HW ID associated with the block */ + u8 line_num; /* Debug line number to select */ + u8 right_shift; /* Number of units to right the debug data (0-3) */ + u8 cycle_en; /* 4-bit value: bit i set -> unit i is enabled. */ + u8 force_valid; /* 4-bit value: bit i set -> unit i is forced valid. */ + u8 force_frame; /* 4-bit value: bit i set -> unit i frame bit is forced. + */ + u8 reserved; +}; + +/* Debug Bus Clients */ +enum dbg_bus_clients { + DBG_BUS_CLIENT_RBCN, + DBG_BUS_CLIENT_RBCP, + DBG_BUS_CLIENT_RBCR, + DBG_BUS_CLIENT_RBCT, + DBG_BUS_CLIENT_RBCU, + DBG_BUS_CLIENT_RBCF, + DBG_BUS_CLIENT_RBCX, + DBG_BUS_CLIENT_RBCS, + DBG_BUS_CLIENT_RBCH, + DBG_BUS_CLIENT_RBCZ, + DBG_BUS_CLIENT_OTHER_ENGINE, + DBG_BUS_CLIENT_TIMESTAMP, + DBG_BUS_CLIENT_CPU, + DBG_BUS_CLIENT_RBCY, + DBG_BUS_CLIENT_RBCQ, + DBG_BUS_CLIENT_RBCM, + DBG_BUS_CLIENT_RBCB, + DBG_BUS_CLIENT_RBCW, + DBG_BUS_CLIENT_RBCV, + MAX_DBG_BUS_CLIENTS +}; + +/* Debug Bus memory address */ +struct dbg_bus_mem_addr { + __le32 lo; + __le32 hi; +}; + +/* Debug Bus PCI buffer data */ +struct dbg_bus_pci_buf_data { + struct dbg_bus_mem_addr phys_addr; /* PCI buffer physical address */ + struct dbg_bus_mem_addr virt_addr; /* PCI buffer virtual address */ + __le32 size; /* PCI buffer size in bytes */ +}; + +/* Debug Bus Storm EID range filter params */ +struct dbg_bus_storm_eid_range_params { + u8 min; /* Minimal event ID to filter on */ + u8 max; /* Maximal event ID to filter on */ +}; + +/* Debug Bus Storm EID mask filter params */ +struct dbg_bus_storm_eid_mask_params { + u8 val; /* Event ID value */ + u8 mask; /* Event ID mask. 1s in the mask = dont care bits. */ +}; + +/* Debug Bus Storm EID filter params */ +union dbg_bus_storm_eid_params { + struct dbg_bus_storm_eid_range_params range; + struct dbg_bus_storm_eid_mask_params mask; +}; + +/* Debug Bus Storm data */ +struct dbg_bus_storm_data { + u8 fast_enabled; + u8 fast_mode; + u8 slow_enabled; + u8 slow_mode; + u8 hw_id; + u8 eid_filter_en; + u8 eid_range_not_mask; + u8 cid_filter_en; + union dbg_bus_storm_eid_params eid_filter_params; + __le16 reserved; + __le32 cid; +}; + +/* Debug Bus data */ +struct dbg_bus_data { + __le32 app_version; /* The tools version number of the application */ + u8 state; /* The current debug bus state */ + u8 hw_dwords; /* HW dwords per cycle */ + u8 next_hw_id; /* Next HW ID to be associated with an input */ + u8 num_enabled_blocks; /* Number of blocks enabled for recording */ + u8 num_enabled_storms; /* Number of Storms enabled for recording */ + u8 target; /* Output target */ + u8 next_trigger_state; /* ID of next trigger state to be added */ + u8 next_constraint_id; /* ID of next filter/trigger constraint to be + * added. + */ + u8 one_shot_en; /* Indicates if one-shot mode is enabled (0/1) */ + u8 grc_input_en; /* Indicates if GRC recording is enabled (0/1) */ + u8 timestamp_input_en; /* Indicates if timestamp recording is enabled + * (0/1). + */ + u8 filter_en; /* Indicates if the recording filter is enabled (0/1) */ + u8 trigger_en; /* Indicates if the recording trigger is enabled (0/1) */ + u8 adding_filter; /* If true, the next added constraint belong to the + * filter. Otherwise, it belongs to the last added + * trigger state. Valid only if either filter or + * triggers are enabled. + */ + u8 filter_pre_trigger; /* Indicates if the recording filter should be + * applied before the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 filter_post_trigger; /* Indicates if the recording filter should be + * applied after the trigger. Valid only if both + * filter and trigger are enabled (0/1). + */ + u8 unify_inputs; /* If true, all inputs are associated with HW ID 0. + * Otherwise, each input is assigned a different HW ID + * (0/1). + */ + u8 rcv_from_other_engine; /* Indicates if the other engine sends it NW + * recording to this engine (0/1). + */ + struct dbg_bus_pci_buf_data pci_buf; /* Debug Bus PCI buffer data. Valid + * only when the target is + * DBG_BUS_TARGET_ID_PCI. + */ + __le16 reserved; + struct dbg_bus_block_data blocks[80];/* Debug Bus data for each block */ + struct dbg_bus_storm_data storms[6]; /* Debug Bus data for each block */ +}; + +/* Debug bus frame modes */ +enum dbg_bus_frame_modes { + DBG_BUS_FRAME_MODE_0HW_4ST = 0, /* 0 HW dwords, 4 Storm dwords */ + DBG_BUS_FRAME_MODE_4HW_0ST = 3, /* 4 HW dwords, 0 Storm dwords */ + DBG_BUS_FRAME_MODE_8HW_0ST = 4, /* 8 HW dwords, 0 Storm dwords */ + MAX_DBG_BUS_FRAME_MODES +}; + +/* Debug bus states */ +enum dbg_bus_states { + DBG_BUS_STATE_IDLE, /* debug bus idle state (not recording) */ + DBG_BUS_STATE_READY, /* debug bus is ready for configuration and + * recording. + */ + DBG_BUS_STATE_RECORDING, /* debug bus is currently recording */ + DBG_BUS_STATE_STOPPED, /* debug bus recording has stopped */ + MAX_DBG_BUS_STATES +}; + +/* Debug bus target IDs */ +enum dbg_bus_targets { + /* records debug bus to DBG block internal buffer */ + DBG_BUS_TARGET_ID_INT_BUF, + /* records debug bus to the NW */ + DBG_BUS_TARGET_ID_NIG, + /* records debug bus to a PCI buffer */ + DBG_BUS_TARGET_ID_PCI, + MAX_DBG_BUS_TARGETS +}; + +/* GRC Dump data */ +struct dbg_grc_data { + __le32 param_val[40]; /* Value of each GRC parameter. Array size must + * match the enum dbg_grc_params. + */ + u8 param_set_by_user[40]; /* Indicates for each GRC parameter if it was + * set by the user (0/1). Array size must + * match the enum dbg_grc_params. + */ +}; + +/* Debug GRC params */ +enum dbg_grc_params { + DBG_GRC_PARAM_DUMP_TSTORM, /* dump Tstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_MSTORM, /* dump Mstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_USTORM, /* dump Ustorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_XSTORM, /* dump Xstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_YSTORM, /* dump Ystorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_PSTORM, /* dump Pstorm memories (0/1) */ + DBG_GRC_PARAM_DUMP_REGS, /* dump non-memory registers (0/1) */ + DBG_GRC_PARAM_DUMP_RAM, /* dump Storm internal RAMs (0/1) */ + DBG_GRC_PARAM_DUMP_PBUF, /* dump Storm passive buffer (0/1) */ + DBG_GRC_PARAM_DUMP_IOR, /* dump Storm IORs (0/1) */ + DBG_GRC_PARAM_DUMP_VFC, /* dump VFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_CM_CTX, /* dump CM contexts (0/1) */ + DBG_GRC_PARAM_DUMP_PXP, /* dump PXP memories (0/1) */ + DBG_GRC_PARAM_DUMP_RSS, /* dump RSS memories (0/1) */ + DBG_GRC_PARAM_DUMP_CAU, /* dump CAU memories (0/1) */ + DBG_GRC_PARAM_DUMP_QM, /* dump QM memories (0/1) */ + DBG_GRC_PARAM_DUMP_MCP, /* dump MCP memories (0/1) */ + DBG_GRC_PARAM_RESERVED, /* reserved */ + DBG_GRC_PARAM_DUMP_CFC, /* dump CFC memories (0/1) */ + DBG_GRC_PARAM_DUMP_IGU, /* dump IGU memories (0/1) */ + DBG_GRC_PARAM_DUMP_BRB, /* dump BRB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BTB, /* dump BTB memories (0/1) */ + DBG_GRC_PARAM_DUMP_BMB, /* dump BMB memories (0/1) */ + DBG_GRC_PARAM_DUMP_NIG, /* dump NIG memories (0/1) */ + DBG_GRC_PARAM_DUMP_MULD, /* dump MULD memories (0/1) */ + DBG_GRC_PARAM_DUMP_PRS, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_DMAE, /* dump PRS memories (0/1) */ + DBG_GRC_PARAM_DUMP_TM, /* dump TM (timers) memories (0/1) */ + DBG_GRC_PARAM_DUMP_SDM, /* dump SDM memories (0/1) */ + DBG_GRC_PARAM_DUMP_DIF, /* dump DIF memories (0/1) */ + DBG_GRC_PARAM_DUMP_STATIC, /* dump static debug data (0/1) */ + DBG_GRC_PARAM_UNSTALL, /* un-stall Storms after dump (0/1) */ + DBG_GRC_PARAM_NUM_LCIDS, /* number of LCIDs (0..320) */ + DBG_GRC_PARAM_NUM_LTIDS, /* number of LTIDs (0..320) */ + /* preset: exclude all memories from dump (1 only) */ + DBG_GRC_PARAM_EXCLUDE_ALL, + /* preset: include memories for crash dump (1 only) */ + DBG_GRC_PARAM_CRASH, + /* perform dump only if MFW is responding (0/1) */ + DBG_GRC_PARAM_PARITY_SAFE, + DBG_GRC_PARAM_DUMP_CM, /* dump CM memories (0/1) */ + DBG_GRC_PARAM_DUMP_PHY, /* dump PHY memories (0/1) */ + MAX_DBG_GRC_PARAMS +}; + +/* Debug reset registers */ +enum dbg_reset_regs { + DBG_RESET_REG_MISCS_PL_UA, + DBG_RESET_REG_MISCS_PL_HV, + DBG_RESET_REG_MISCS_PL_HV_2, + DBG_RESET_REG_MISC_PL_UA, + DBG_RESET_REG_MISC_PL_HV, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_1, + DBG_RESET_REG_MISC_PL_PDA_VMAIN_2, + DBG_RESET_REG_MISC_PL_PDA_VAUX, + MAX_DBG_RESET_REGS +}; + /* Debug status codes */ enum dbg_status { DBG_STATUS_OK, @@ -1579,9 +2226,45 @@ enum dbg_status { DBG_STATUS_REG_FIFO_BAD_DATA, DBG_STATUS_PROTECTION_OVERRIDE_BAD_DATA, DBG_STATUS_DBG_ARRAY_NOT_SET, + DBG_STATUS_MULTI_BLOCKS_WITH_FILTER, MAX_DBG_STATUS }; +/* Debug Storms IDs */ +enum dbg_storms { + DBG_TSTORM_ID, + DBG_MSTORM_ID, + DBG_USTORM_ID, + DBG_XSTORM_ID, + DBG_YSTORM_ID, + DBG_PSTORM_ID, + MAX_DBG_STORMS +}; + +/* Idle Check data */ +struct idle_chk_data { + __le32 buf_size; /* Idle check buffer size in dwords */ + u8 buf_size_set; /* Indicates if the idle check buffer size was set + * (0/1). + */ + u8 reserved1; + __le16 reserved2; +}; + +/* Debug Tools data (per HW function) */ +struct dbg_tools_data { + struct dbg_grc_data grc; /* GRC Dump data */ + struct dbg_bus_data bus; /* Debug Bus data */ + struct idle_chk_data idle_chk; /* Idle Check data */ + u8 mode_enable[40]; /* Indicates if a mode is enabled (0/1) */ + u8 block_in_reset[80]; /* Indicates if a block is in reset state (0/1). + */ + u8 chip_id; /* Chip ID (from enum chip_ids) */ + u8 platform_id; /* Platform ID (from enum platform_ids) */ + u8 initialized; /* Indicates if the data was initialized */ + u8 reserved; +}; + /********************************/ /* HSI Init Functions constants */ /********************************/ @@ -1589,7 +2272,41 @@ enum dbg_status { /* Number of VLAN priorities */ #define NUM_OF_VLAN_PRIORITIES 8 -/* QM per-port init parameters */ +struct init_brb_ram_req { + __le32 guranteed_per_tc; + __le32 headroom_per_tc; + __le32 min_pkt_size; + __le32 max_ports_per_engine; + u8 num_active_tcs[MAX_NUM_PORTS]; +}; + +struct init_ets_tc_req { + u8 use_sp; + u8 use_wfq; + __le16 weight; +}; + +struct init_ets_req { + __le32 mtu; + struct init_ets_tc_req tc_req[NUM_OF_TCS]; +}; + +struct init_nig_lb_rl_req { + __le16 lb_mac_rate; + __le16 lb_rate; + __le32 mtu; + __le16 tc_rate[NUM_OF_PHYS_TCS]; +}; + +struct init_nig_pri_tc_map_entry { + u8 tc_id; + u8 valid; +}; + +struct init_nig_pri_tc_map_req { + struct init_nig_pri_tc_map_entry pri[NUM_OF_VLAN_PRIORITIES]; +}; + struct init_qm_port_params { u8 active; u8 active_phys_tcs; @@ -1619,7 +2336,7 @@ struct init_qm_vport_params { /* Width of GRC address in bits (addresses are specified in dwords) */ #define GRC_ADDR_BITS 23 -#define MAX_GRC_ADDR ((1 << GRC_ADDR_BITS) - 1) +#define MAX_GRC_ADDR (BIT(GRC_ADDR_BITS) - 1) /* indicates an init that should be applied to any phase ID */ #define ANY_PHASE_ID 0xffff @@ -1627,15 +2344,50 @@ struct init_qm_vport_params { /* Max size in dwords of a zipped array */ #define MAX_ZIPPED_SIZE 8192 +struct fw_asserts_ram_section { + __le16 section_ram_line_offset; + __le16 section_ram_line_size; + u8 list_dword_offset; + u8 list_element_dword_size; + u8 list_num_elements; + u8 list_next_index_dword_offset; +}; + +struct fw_ver_num { + u8 major; /* Firmware major version number */ + u8 minor; /* Firmware minor version number */ + u8 rev; /* Firmware revision version number */ + u8 eng; /* Firmware engineering version number (for bootleg versions) */ +}; + +struct fw_ver_info { + __le16 tools_ver; /* Tools version number */ + u8 image_id; /* FW image ID (e.g. main) */ + u8 reserved1; + struct fw_ver_num num; /* FW version number */ + __le32 timestamp; /* FW Timestamp in unix time (sec. since 1970) */ + __le32 reserved2; +}; + +struct fw_info { + struct fw_ver_info ver; + struct fw_asserts_ram_section fw_asserts_section; +}; + +struct fw_info_location { + __le32 grc_addr; + __le32 size; +}; + enum init_modes { MODE_RESERVED, MODE_BB_B0, - MODE_RESERVED2, + MODE_K2, MODE_ASIC, + MODE_RESERVED2, MODE_RESERVED3, MODE_RESERVED4, MODE_RESERVED5, - MODE_RESERVED6, MODE_SF, MODE_MF_SD, MODE_MF_SI, @@ -1644,7 +2396,7 @@ enum init_modes { MODE_PORTS_PER_ENG_4, MODE_100G, MODE_40G, - MODE_RESERVED7, + MODE_RESERVED6, MAX_INIT_MODES }; @@ -1674,11 +2426,11 @@ struct bin_buffer_hdr { /* binary init buffer types */ enum bin_init_buffer_type { - BIN_BUF_FW_VER_INFO, + BIN_BUF_INIT_FW_VER_INFO, BIN_BUF_INIT_CMD, BIN_BUF_INIT_VAL, BIN_BUF_INIT_MODE_TREE, - BIN_BUF_IRO, + BIN_BUF_INIT_IRO, MAX_BIN_INIT_BUFFER_TYPE }; @@ -1902,8 +2654,276 @@ struct iro { __le16 size; }; +/***************************** Public Functions *******************************/ /** - * @brief qed_dbg_print_attn - Prints attention registers values in the specified results struct. + * @brief qed_dbg_set_bin_ptr - Sets a pointer to the binary data with debug + * arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_grc_get_dump_buf_size - Returns the required buffer size for + * GRC Dump. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the GRC Dump + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_grc_dump - Dumps GRC data into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the collected GRC data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified dump buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_grc_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_idle_chk_get_dump_buf_size - Returns the required buffer size + * for idle check results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the idle check + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_idle_chk_dump - Performs idle check and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the idle check data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_idle_chk_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_mcp_trace_get_dump_buf_size - Returns the required buffer size + * for mcp trace results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for mcp trace data. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_mcp_trace_dump - Performs mcp trace and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the mcp trace data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - the trace data in MCP scratchpad contain an invalid signature + * - the bundle ID in NVRAM is invalid + * - the trace meta data cannot be found (in NVRAM or image file) + * - the trace meta data cannot be read (from NVRAM or image file) + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_mcp_trace_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_reg_fifo_get_dump_buf_size - Returns the required buffer size + * for grc trace fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for reg fifo data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_reg_fifo_dump - Reads the reg fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the reg fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_reg_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_igu_fifo_get_dump_buf_size - Returns the required buffer size + * for the IGU fifo results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for the IGU fifo + * data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_igu_fifo_dump - Reads the IGU fifo and writes the results into + * the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the IGU fifo data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_igu_fifo_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_protection_override_get_dump_buf_size - Returns the required + * buffer size for protection override window results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for protection + * override data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status +qed_dbg_protection_override_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_protection_override_dump - Reads protection override window + * entries and writes the results into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the protection override data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * - DMAE transaction failed + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_protection_override_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_fw_asserts_get_dump_buf_size - Returns the required buffer + * size for FW Asserts results. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param buf_size - OUT: required buffer size (in dwords) for FW Asserts data. + * + * @return error if one of the following holds: + * - the version wasn't set + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_get_dump_buf_size(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *buf_size); +/** + * @brief qed_dbg_fw_asserts_dump - Reads the FW Asserts and writes the results + * into the specified buffer. + * + * @param p_hwfn - HW device data + * @param p_ptt - Ptt window used for writing the registers. + * @param dump_buf - Pointer to write the FW Asserts data into. + * @param buf_size_in_dwords - Size of the specified buffer in dwords. + * @param num_dumped_dwords - OUT: number of dumped dwords. + * + * @return error if one of the following holds: + * - the version wasn't set + * - the specified buffer is too small + * Otherwise, returns ok. + */ +enum dbg_status qed_dbg_fw_asserts_dump(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 *dump_buf, + u32 buf_size_in_dwords, + u32 *num_dumped_dwords); +/** + * @brief qed_dbg_print_attn - Prints attention registers values in the + * specified results struct. * * @param p_hwfn * @param results - Pointer to the attention read results @@ -1915,47 +2935,241 @@ struct iro { enum dbg_status qed_dbg_print_attn(struct qed_hwfn *p_hwfn, struct dbg_attn_block_result *results); +/******************************** Constants **********************************/ + #define MAX_NAME_LEN 16 +/***************************** Public Functions *******************************/ +/** + * @brief qed_dbg_user_set_bin_ptr - Sets a pointer to the binary data with + * debug arrays. + * + * @param bin_ptr - a pointer to the binary data with debug arrays. + */ +enum dbg_status qed_dbg_user_set_bin_ptr(const u8 * const bin_ptr); +/** + * @brief qed_dbg_get_status_str - Returns a string for the specified status. + * + * @param status - a debug status code. + * + * @return a string for the specified status + */ +const char *qed_dbg_get_status_str(enum dbg_status status); +/** + * @brief qed_get_idle_chk_results_buf_size - Returns the required buffer size + * for idle check results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_idle_chk_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_idle_chk_results - Prints idle check results + * + * @param p_hwfn - HW device data + * @param dump_buf - idle check dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the idle check results. + * @param num_errors - OUT: number of errors found in idle check. + * @param num_warnings - OUT: number of warnings found in idle check. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_idle_chk_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf, + u32 *num_errors, + u32 *num_warnings); +/** + * @brief qed_get_mcp_trace_results_buf_size - Returns the required buffer size + * for MCP Trace results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - MCP Trace dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_mcp_trace_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_mcp_trace_results - Prints MCP Trace results + * + * @param p_hwfn - HW device data + * @param dump_buf - mcp trace dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the mcp trace results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_mcp_trace_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_reg_fifo_results_buf_size - Returns the required buffer size + * for reg_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_reg_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_reg_fifo_results - Prints reg fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - reg fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_reg_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_igu_fifo_results_buf_size - Returns the required buffer size + * for igu_fifo results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_igu_fifo_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_igu_fifo_results - Prints IGU fifo results + * + * @param p_hwfn - HW device data + * @param dump_buf - IGU fifo dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the IGU fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_igu_fifo_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_protection_override_results_buf_size - Returns the required + * buffer size for protection override results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status +qed_get_protection_override_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_protection_override_results - Prints protection override + * results. + * + * @param p_hwfn - HW device data + * @param dump_buf - protection override dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the reg fifo results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_protection_override_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); +/** + * @brief qed_get_fw_asserts_results_buf_size - Returns the required buffer size + * for FW Asserts results (in bytes). + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf_size - OUT: required buffer size (in bytes) for the parsed + * results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_get_fw_asserts_results_buf_size(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + u32 *results_buf_size); +/** + * @brief qed_print_fw_asserts_results - Prints FW Asserts results + * + * @param p_hwfn - HW device data + * @param dump_buf - FW Asserts dump buffer, starting from the header. + * @param num_dumped_dwords - number of dwords that were dumped. + * @param results_buf - buffer for printing the FW Asserts results. + * + * @return error if the parsing fails, ok otherwise. + */ +enum dbg_status qed_print_fw_asserts_results(struct qed_hwfn *p_hwfn, + u32 *dump_buf, + u32 num_dumped_dwords, + char *results_buf); /* Win 2 */ -#define GTT_BAR0_MAP_REG_IGU_CMD \ - 0x00f000UL +#define GTT_BAR0_MAP_REG_IGU_CMD 0x00f000UL /* Win 3 */ -#define GTT_BAR0_MAP_REG_TSDM_RAM \ - 0x010000UL +#define GTT_BAR0_MAP_REG_TSDM_RAM 0x010000UL /* Win 4 */ -#define GTT_BAR0_MAP_REG_MSDM_RAM \ - 0x011000UL +#define GTT_BAR0_MAP_REG_MSDM_RAM 0x011000UL /* Win 5 */ -#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 \ - 0x012000UL +#define GTT_BAR0_MAP_REG_MSDM_RAM_1024 0x012000UL /* Win 6 */ -#define GTT_BAR0_MAP_REG_USDM_RAM \ - 0x013000UL +#define GTT_BAR0_MAP_REG_USDM_RAM 0x013000UL /* Win 7 */ -#define GTT_BAR0_MAP_REG_USDM_RAM_1024 \ - 0x014000UL +#define GTT_BAR0_MAP_REG_USDM_RAM_1024 0x014000UL /* Win 8 */ -#define GTT_BAR0_MAP_REG_USDM_RAM_2048 \ - 0x015000UL +#define GTT_BAR0_MAP_REG_USDM_RAM_2048 0x015000UL /* Win 9 */ -#define GTT_BAR0_MAP_REG_XSDM_RAM \ - 0x016000UL +#define GTT_BAR0_MAP_REG_XSDM_RAM 0x016000UL /* Win 10 */ -#define GTT_BAR0_MAP_REG_YSDM_RAM \ - 0x017000UL +#define GTT_BAR0_MAP_REG_YSDM_RAM 0x017000UL /* Win 11 */ -#define GTT_BAR0_MAP_REG_PSDM_RAM \ - 0x018000UL +#define GTT_BAR0_MAP_REG_PSDM_RAM 0x018000UL /** * @brief qed_qm_pf_mem_size - prepare QM ILT sizes @@ -2003,7 +3217,7 @@ struct qed_qm_pf_rt_init_params { u16 num_vf_pqs; u8 start_vport; u8 num_vports; - u8 pf_wfq; + u16 pf_wfq; u32 pf_rl; struct init_qm_pq_params *pq_params; struct init_qm_vport_params *vport_params; @@ -2138,6 +3352,9 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, #define TSTORM_PORT_STAT_OFFSET(port_id) \ (IRO[1].base + ((port_id) * IRO[1].m1)) #define TSTORM_PORT_STAT_SIZE (IRO[1].size) +#define TSTORM_LL2_PORT_STAT_OFFSET(port_id) \ + (IRO[2].base + ((port_id) * IRO[2].m1)) +#define TSTORM_LL2_PORT_STAT_SIZE (IRO[2].size) #define USTORM_VF_PF_CHANNEL_READY_OFFSET(vf_id) \ (IRO[3].base + ((vf_id) * IRO[3].m1)) #define USTORM_VF_PF_CHANNEL_READY_SIZE (IRO[3].size) @@ -2153,42 +3370,90 @@ void qed_set_geneve_enable(struct qed_hwfn *p_hwfn, #define USTORM_COMMON_QUEUE_CONS_OFFSET(queue_zone_id) \ (IRO[7].base + ((queue_zone_id) * IRO[7].m1)) #define USTORM_COMMON_QUEUE_CONS_SIZE (IRO[7].size) +#define TSTORM_LL2_RX_PRODS_OFFSET(core_rx_queue_id) \ + (IRO[14].base + ((core_rx_queue_id) * IRO[14].m1)) +#define TSTORM_LL2_RX_PRODS_SIZE (IRO[14].size) +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[15].base + ((core_rx_queue_id) * IRO[15].m1)) +#define CORE_LL2_TSTORM_PER_QUEUE_STAT_SIZE (IRO[15].size) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(core_rx_queue_id) \ + (IRO[16].base + ((core_rx_queue_id) * IRO[16].m1)) +#define CORE_LL2_USTORM_PER_QUEUE_STAT_SIZE (IRO[16].size) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(core_tx_stats_id) \ + (IRO[17].base + ((core_tx_stats_id) * IRO[17].m1)) +#define CORE_LL2_PSTORM_PER_QUEUE_STAT_SIZE (IRO[17]. size) #define MSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ (IRO[18].base + ((stat_counter_id) * IRO[18].m1)) #define MSTORM_QUEUE_STAT_SIZE (IRO[18].size) #define MSTORM_ETH_PF_PRODS_OFFSET(queue_id) \ (IRO[19].base + ((queue_id) * IRO[19].m1)) #define MSTORM_ETH_PF_PRODS_SIZE (IRO[19].size) -#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[20].base) -#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[20].size) +#define MSTORM_ETH_VF_PRODS_OFFSET(vf_id, vf_queue_id) \ + (IRO[20].base + ((vf_id) * IRO[20].m1) + ((vf_queue_id) * IRO[20].m2)) +#define MSTORM_ETH_VF_PRODS_SIZE (IRO[20].size) +#define MSTORM_TPA_TIMEOUT_US_OFFSET (IRO[21].base) +#define MSTORM_TPA_TIMEOUT_US_SIZE (IRO[21].size) #define MSTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[21].base + ((pf_id) * IRO[21].m1)) + (IRO[22].base + ((pf_id) * IRO[22].m1)) #define MSTORM_ETH_PF_STAT_SIZE (IRO[21].size) #define USTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ - (IRO[22].base + ((stat_counter_id) * IRO[22].m1)) -#define USTORM_QUEUE_STAT_SIZE (IRO[22].size) + (IRO[23].base + ((stat_counter_id) * IRO[23].m1)) +#define USTORM_QUEUE_STAT_SIZE (IRO[23].size) #define USTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[23].base + ((pf_id) * IRO[23].m1)) -#define USTORM_ETH_PF_STAT_SIZE (IRO[23].size) + (IRO[24].base + ((pf_id) * IRO[24].m1)) +#define USTORM_ETH_PF_STAT_SIZE (IRO[24].size) #define PSTORM_QUEUE_STAT_OFFSET(stat_counter_id) \ - (IRO[24].base + ((stat_counter_id) * IRO[24].m1)) -#define PSTORM_QUEUE_STAT_SIZE (IRO[24].size) + (IRO[25].base + ((stat_counter_id) * IRO[25].m1)) +#define PSTORM_QUEUE_STAT_SIZE (IRO[25].size) #define PSTORM_ETH_PF_STAT_OFFSET(pf_id) \ - (IRO[25].base + ((pf_id) * IRO[25].m1)) -#define PSTORM_ETH_PF_STAT_SIZE (IRO[25].size) + (IRO[26].base + ((pf_id) * IRO[26].m1)) +#define PSTORM_ETH_PF_STAT_SIZE (IRO[26].size) #define PSTORM_CTL_FRAME_ETHTYPE_OFFSET(ethtype) \ - (IRO[26].base + ((ethtype) * IRO[26].m1)) -#define PSTORM_CTL_FRAME_ETHTYPE_SIZE (IRO[26].size) -#define TSTORM_ETH_PRS_INPUT_OFFSET (IRO[27].base) -#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[27].size) + (IRO[27].base + ((ethtype) * IRO[27].m1)) +#define PSTORM_CTL_FRAME_ETHTYPE_SIZE (IRO[27].size) +#define TSTORM_ETH_PRS_INPUT_OFFSET (IRO[28].base) +#define TSTORM_ETH_PRS_INPUT_SIZE (IRO[28].size) #define ETH_RX_RATE_LIMIT_OFFSET(pf_id) \ - (IRO[28].base + ((pf_id) * IRO[28].m1)) -#define ETH_RX_RATE_LIMIT_SIZE (IRO[28].size) + (IRO[29].base + ((pf_id) * IRO[29].m1)) +#define ETH_RX_RATE_LIMIT_SIZE (IRO[29].size) #define XSTORM_ETH_QUEUE_ZONE_OFFSET(queue_id) \ - (IRO[29].base + ((queue_id) * IRO[29].m1)) -#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[29].size) + (IRO[30].base + ((queue_id) * IRO[30].m1)) +#define XSTORM_ETH_QUEUE_ZONE_SIZE (IRO[30].size) +#define TSTORM_SCSI_CMDQ_CONS_OFFSET(cmdq_queue_id) \ + (IRO[34].base + ((cmdq_queue_id) * IRO[34].m1)) +#define TSTORM_SCSI_CMDQ_CONS_SIZE (IRO[34].size) +#define TSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[35].base + ((func_id) * IRO[35].m1) + ((bdq_id) * IRO[35].m2)) +#define TSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[35].size) +#define MSTORM_SCSI_BDQ_EXT_PROD_OFFSET(func_id, bdq_id) \ + (IRO[36].base + ((func_id) * IRO[36].m1) + ((bdq_id) * IRO[36].m2)) +#define MSTORM_SCSI_BDQ_EXT_PROD_SIZE (IRO[36].size) +#define TSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[37].base + ((pf_id) * IRO[37].m1)) +#define TSTORM_ISCSI_RX_STATS_SIZE (IRO[37].size) +#define MSTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[38].base + ((pf_id) * IRO[38].m1)) +#define MSTORM_ISCSI_RX_STATS_SIZE (IRO[38].size) +#define USTORM_ISCSI_RX_STATS_OFFSET(pf_id) \ + (IRO[39].base + ((pf_id) * IRO[39].m1)) +#define USTORM_ISCSI_RX_STATS_SIZE (IRO[39].size) +#define XSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[40].base + ((pf_id) * IRO[40].m1)) +#define XSTORM_ISCSI_TX_STATS_SIZE (IRO[40].size) +#define YSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[41].base + ((pf_id) * IRO[41].m1)) +#define YSTORM_ISCSI_TX_STATS_SIZE (IRO[41].size) +#define PSTORM_ISCSI_TX_STATS_OFFSET(pf_id) \ + (IRO[42].base + ((pf_id) * IRO[42].m1)) +#define PSTORM_ISCSI_TX_STATS_SIZE (IRO[42].size) +#define PSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \ + (IRO[45].base + ((rdma_stat_counter_id) * IRO[45].m1)) +#define PSTORM_RDMA_QUEUE_STAT_SIZE (IRO[45].size) +#define TSTORM_RDMA_QUEUE_STAT_OFFSET(rdma_stat_counter_id) \ + (IRO[46].base + ((rdma_stat_counter_id) * IRO[46].m1)) +#define TSTORM_RDMA_QUEUE_STAT_SIZE (IRO[46].size) -static const struct iro iro_arr[46] = { +static const struct iro iro_arr[47] = { {0x0, 0x0, 0x0, 0x0, 0x8}, {0x4cb0, 0x78, 0x0, 0x0, 0x78}, {0x6318, 0x20, 0x0, 0x0, 0x20}, @@ -2201,20 +3466,21 @@ static const struct iro iro_arr[46] = { {0x3df0, 0x0, 0x0, 0x0, 0x78}, {0x29b0, 0x0, 0x0, 0x0, 0x78}, {0x4c38, 0x0, 0x0, 0x0, 0x78}, - {0x4a48, 0x0, 0x0, 0x0, 0x78}, + {0x4990, 0x0, 0x0, 0x0, 0x78}, {0x7e48, 0x0, 0x0, 0x0, 0x78}, {0xa28, 0x8, 0x0, 0x0, 0x8}, {0x60f8, 0x10, 0x0, 0x0, 0x10}, {0xb820, 0x30, 0x0, 0x0, 0x30}, {0x95b8, 0x30, 0x0, 0x0, 0x30}, - {0x4c18, 0x80, 0x0, 0x0, 0x40}, + {0x4b60, 0x80, 0x0, 0x0, 0x40}, {0x1f8, 0x4, 0x0, 0x0, 0x4}, - {0xc9a8, 0x0, 0x0, 0x0, 0x4}, - {0x4c58, 0x80, 0x0, 0x0, 0x20}, + {0x53a0, 0x80, 0x4, 0x0, 0x4}, + {0xc8f0, 0x0, 0x0, 0x0, 0x4}, + {0x4ba0, 0x80, 0x0, 0x0, 0x20}, {0x8050, 0x40, 0x0, 0x0, 0x30}, {0xe770, 0x60, 0x0, 0x0, 0x60}, {0x2b48, 0x80, 0x0, 0x0, 0x38}, - {0xdf88, 0x78, 0x0, 0x0, 0x78}, + {0xf188, 0x78, 0x0, 0x0, 0x78}, {0x1f8, 0x4, 0x0, 0x0, 0x4}, {0xacf0, 0x0, 0x0, 0x0, 0xf0}, {0xade0, 0x8, 0x0, 0x0, 0x8}, @@ -2226,455 +3492,457 @@ static const struct iro iro_arr[46] = { {0x200, 0x10, 0x8, 0x0, 0x8}, {0xb78, 0x10, 0x8, 0x0, 0x2}, {0xd888, 0x38, 0x0, 0x0, 0x24}, - {0x12120, 0x10, 0x0, 0x0, 0x8}, - {0x11b20, 0x38, 0x0, 0x0, 0x18}, + {0x12c38, 0x10, 0x0, 0x0, 0x8}, + {0x11aa0, 0x38, 0x0, 0x0, 0x18}, {0xa8c0, 0x30, 0x0, 0x0, 0x10}, {0x86f8, 0x28, 0x0, 0x0, 0x18}, - {0xeff8, 0x10, 0x0, 0x0, 0x10}, + {0x101f8, 0x10, 0x0, 0x0, 0x10}, {0xdd08, 0x48, 0x0, 0x0, 0x38}, - {0xf460, 0x20, 0x0, 0x0, 0x20}, + {0x10660, 0x20, 0x0, 0x0, 0x20}, {0x2b80, 0x80, 0x0, 0x0, 0x10}, {0x5000, 0x10, 0x0, 0x0, 0x10}, }; /* Runtime array offsets */ -#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0 -#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1 -#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2 -#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3 -#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4 -#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5 -#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6 -#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7 -#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8 -#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9 -#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10 -#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11 -#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12 -#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13 -#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14 -#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15 -#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16 -#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17 -#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18 -#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19 -#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20 -#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21 -#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22 -#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23 -#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 -#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 -#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 -#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497 -#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736 -#define CAU_REG_PI_MEMORY_RT_OFFSET 2233 -#define CAU_REG_PI_MEMORY_RT_SIZE 4416 -#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649 -#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650 -#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651 -#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652 -#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653 -#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654 -#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655 -#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656 -#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657 -#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658 -#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659 -#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660 -#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661 -#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662 -#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663 -#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664 -#define SRC_REG_FIRSTFREE_RT_OFFSET 6665 -#define SRC_REG_FIRSTFREE_RT_SIZE 2 -#define SRC_REG_LASTFREE_RT_OFFSET 6667 -#define SRC_REG_LASTFREE_RT_SIZE 2 -#define SRC_REG_COUNTFREE_RT_OFFSET 6669 -#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670 -#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671 -#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672 -#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673 -#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674 -#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675 -#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676 -#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677 -#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678 -#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679 -#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680 -#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681 -#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682 -#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683 -#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684 -#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685 -#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686 -#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687 -#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688 -#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689 -#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690 -#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691 -#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692 -#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693 -#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694 -#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695 -#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696 -#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697 -#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698 -#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699 -#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700 -#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701 -#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702 -#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703 -#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704 -#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000 -#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704 -#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28705 -#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28706 -#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28707 -#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28708 -#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28709 -#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28710 -#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28711 -#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28712 -#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28713 -#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28714 -#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416 -#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29130 -#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512 -#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29642 -#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29643 -#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29644 -#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29645 -#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29646 -#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29647 -#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29648 -#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29649 -#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29650 -#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29651 -#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29652 -#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29653 -#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29654 -#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29655 -#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29656 -#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29657 -#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29658 -#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29659 -#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29660 -#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29661 -#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29662 -#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29663 -#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29664 -#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29665 -#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29666 -#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29667 -#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29668 -#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29669 -#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29670 -#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29671 -#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29672 -#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29673 -#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29674 -#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29675 -#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29676 -#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29677 -#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29678 -#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29679 -#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29680 -#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29681 -#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29682 -#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29683 -#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29684 -#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29685 -#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29686 -#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29687 -#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29688 -#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29689 -#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29690 -#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29691 -#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29692 -#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29693 -#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29694 -#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29695 -#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29696 -#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29697 -#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29698 -#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29699 -#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29700 -#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29701 -#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29702 -#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29703 -#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29704 -#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29705 -#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29706 -#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29707 -#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29708 -#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29709 -#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128 -#define QM_REG_VOQCRDLINE_RT_OFFSET 29837 -#define QM_REG_VOQCRDLINE_RT_SIZE 20 -#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29857 -#define QM_REG_VOQINITCRDLINE_RT_SIZE 20 -#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29877 -#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29878 -#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29879 -#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29880 -#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29881 -#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29882 -#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29883 -#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29884 -#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29885 -#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29886 -#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29887 -#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29888 -#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29889 -#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29890 -#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29891 -#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29892 -#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29893 -#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29894 -#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29895 -#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29896 -#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29897 -#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29898 -#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29899 -#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29900 -#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29901 -#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29902 -#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29903 -#define QM_REG_PQTX2PF_0_RT_OFFSET 29904 -#define QM_REG_PQTX2PF_1_RT_OFFSET 29905 -#define QM_REG_PQTX2PF_2_RT_OFFSET 29906 -#define QM_REG_PQTX2PF_3_RT_OFFSET 29907 -#define QM_REG_PQTX2PF_4_RT_OFFSET 29908 -#define QM_REG_PQTX2PF_5_RT_OFFSET 29909 -#define QM_REG_PQTX2PF_6_RT_OFFSET 29910 -#define QM_REG_PQTX2PF_7_RT_OFFSET 29911 -#define QM_REG_PQTX2PF_8_RT_OFFSET 29912 -#define QM_REG_PQTX2PF_9_RT_OFFSET 29913 -#define QM_REG_PQTX2PF_10_RT_OFFSET 29914 -#define QM_REG_PQTX2PF_11_RT_OFFSET 29915 -#define QM_REG_PQTX2PF_12_RT_OFFSET 29916 -#define QM_REG_PQTX2PF_13_RT_OFFSET 29917 -#define QM_REG_PQTX2PF_14_RT_OFFSET 29918 -#define QM_REG_PQTX2PF_15_RT_OFFSET 29919 -#define QM_REG_PQTX2PF_16_RT_OFFSET 29920 -#define QM_REG_PQTX2PF_17_RT_OFFSET 29921 -#define QM_REG_PQTX2PF_18_RT_OFFSET 29922 -#define QM_REG_PQTX2PF_19_RT_OFFSET 29923 -#define QM_REG_PQTX2PF_20_RT_OFFSET 29924 -#define QM_REG_PQTX2PF_21_RT_OFFSET 29925 -#define QM_REG_PQTX2PF_22_RT_OFFSET 29926 -#define QM_REG_PQTX2PF_23_RT_OFFSET 29927 -#define QM_REG_PQTX2PF_24_RT_OFFSET 29928 -#define QM_REG_PQTX2PF_25_RT_OFFSET 29929 -#define QM_REG_PQTX2PF_26_RT_OFFSET 29930 -#define QM_REG_PQTX2PF_27_RT_OFFSET 29931 -#define QM_REG_PQTX2PF_28_RT_OFFSET 29932 -#define QM_REG_PQTX2PF_29_RT_OFFSET 29933 -#define QM_REG_PQTX2PF_30_RT_OFFSET 29934 -#define QM_REG_PQTX2PF_31_RT_OFFSET 29935 -#define QM_REG_PQTX2PF_32_RT_OFFSET 29936 -#define QM_REG_PQTX2PF_33_RT_OFFSET 29937 -#define QM_REG_PQTX2PF_34_RT_OFFSET 29938 -#define QM_REG_PQTX2PF_35_RT_OFFSET 29939 -#define QM_REG_PQTX2PF_36_RT_OFFSET 29940 -#define QM_REG_PQTX2PF_37_RT_OFFSET 29941 -#define QM_REG_PQTX2PF_38_RT_OFFSET 29942 -#define QM_REG_PQTX2PF_39_RT_OFFSET 29943 -#define QM_REG_PQTX2PF_40_RT_OFFSET 29944 -#define QM_REG_PQTX2PF_41_RT_OFFSET 29945 -#define QM_REG_PQTX2PF_42_RT_OFFSET 29946 -#define QM_REG_PQTX2PF_43_RT_OFFSET 29947 -#define QM_REG_PQTX2PF_44_RT_OFFSET 29948 -#define QM_REG_PQTX2PF_45_RT_OFFSET 29949 -#define QM_REG_PQTX2PF_46_RT_OFFSET 29950 -#define QM_REG_PQTX2PF_47_RT_OFFSET 29951 -#define QM_REG_PQTX2PF_48_RT_OFFSET 29952 -#define QM_REG_PQTX2PF_49_RT_OFFSET 29953 -#define QM_REG_PQTX2PF_50_RT_OFFSET 29954 -#define QM_REG_PQTX2PF_51_RT_OFFSET 29955 -#define QM_REG_PQTX2PF_52_RT_OFFSET 29956 -#define QM_REG_PQTX2PF_53_RT_OFFSET 29957 -#define QM_REG_PQTX2PF_54_RT_OFFSET 29958 -#define QM_REG_PQTX2PF_55_RT_OFFSET 29959 -#define QM_REG_PQTX2PF_56_RT_OFFSET 29960 -#define QM_REG_PQTX2PF_57_RT_OFFSET 29961 -#define QM_REG_PQTX2PF_58_RT_OFFSET 29962 -#define QM_REG_PQTX2PF_59_RT_OFFSET 29963 -#define QM_REG_PQTX2PF_60_RT_OFFSET 29964 -#define QM_REG_PQTX2PF_61_RT_OFFSET 29965 -#define QM_REG_PQTX2PF_62_RT_OFFSET 29966 -#define QM_REG_PQTX2PF_63_RT_OFFSET 29967 -#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29968 -#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29969 -#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29970 -#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29971 -#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29972 -#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29973 -#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29974 -#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29975 -#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29976 -#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29977 -#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29978 -#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29979 -#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29980 -#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29981 -#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29982 -#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29983 -#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29984 -#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29985 -#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29986 -#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29987 -#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29988 -#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29989 -#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29990 -#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29991 -#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29992 -#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29993 -#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29994 -#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29995 -#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29996 -#define QM_REG_RLGLBLINCVAL_RT_SIZE 256 -#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30252 -#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256 -#define QM_REG_RLGLBLCRD_RT_OFFSET 30508 -#define QM_REG_RLGLBLCRD_RT_SIZE 256 -#define QM_REG_RLGLBLENABLE_RT_OFFSET 30764 -#define QM_REG_RLPFPERIOD_RT_OFFSET 30765 -#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30766 -#define QM_REG_RLPFINCVAL_RT_OFFSET 30767 -#define QM_REG_RLPFINCVAL_RT_SIZE 16 -#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30783 -#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_RLPFCRD_RT_OFFSET 30799 -#define QM_REG_RLPFCRD_RT_SIZE 16 -#define QM_REG_RLPFENABLE_RT_OFFSET 30815 -#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30816 -#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30817 -#define QM_REG_WFQPFWEIGHT_RT_SIZE 16 -#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30833 -#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16 -#define QM_REG_WFQPFCRD_RT_OFFSET 30849 -#define QM_REG_WFQPFCRD_RT_SIZE 160 -#define QM_REG_WFQPFENABLE_RT_OFFSET 31009 -#define QM_REG_WFQVPENABLE_RT_OFFSET 31010 -#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31011 -#define QM_REG_BASEADDRTXPQ_RT_SIZE 512 -#define QM_REG_TXPQMAP_RT_OFFSET 31523 -#define QM_REG_TXPQMAP_RT_SIZE 512 -#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32035 -#define QM_REG_WFQVPWEIGHT_RT_SIZE 512 -#define QM_REG_WFQVPCRD_RT_OFFSET 32547 -#define QM_REG_WFQVPCRD_RT_SIZE 512 -#define QM_REG_WFQVPMAP_RT_OFFSET 33059 -#define QM_REG_WFQVPMAP_RT_SIZE 512 -#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33571 -#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160 -#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33731 -#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33732 -#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33733 -#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33734 -#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33735 -#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33736 -#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33737 -#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33738 -#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33742 -#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33746 -#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4 -#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33750 -#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33751 -#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32 -#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33783 -#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33799 -#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33815 -#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16 -#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33831 -#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16 -#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33847 -#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33848 -#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33849 -#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33850 -#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33851 -#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33852 -#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33853 -#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33854 -#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33855 -#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33856 -#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33857 -#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33858 -#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33859 -#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33860 -#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33861 -#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33862 -#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33863 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33864 -#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33865 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33866 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33867 -#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33868 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33869 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33870 -#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33871 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33872 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33873 -#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33874 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33875 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33876 -#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33877 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33878 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33879 -#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33880 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33881 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33882 -#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33883 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33884 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33885 -#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33886 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33887 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33888 -#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33889 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33890 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33891 -#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33892 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33893 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33894 -#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33895 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33896 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33897 -#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33898 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33899 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33900 -#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33901 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33902 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33903 -#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33904 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33905 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33906 -#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33907 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33908 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33909 -#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33910 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33911 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33912 -#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33913 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33914 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33915 -#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33916 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33917 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33918 -#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33919 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33920 -#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33921 -#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33922 -#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33923 -#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33924 +#define DORQ_REG_PF_MAX_ICID_0_RT_OFFSET 0 +#define DORQ_REG_PF_MAX_ICID_1_RT_OFFSET 1 +#define DORQ_REG_PF_MAX_ICID_2_RT_OFFSET 2 +#define DORQ_REG_PF_MAX_ICID_3_RT_OFFSET 3 +#define DORQ_REG_PF_MAX_ICID_4_RT_OFFSET 4 +#define DORQ_REG_PF_MAX_ICID_5_RT_OFFSET 5 +#define DORQ_REG_PF_MAX_ICID_6_RT_OFFSET 6 +#define DORQ_REG_PF_MAX_ICID_7_RT_OFFSET 7 +#define DORQ_REG_VF_MAX_ICID_0_RT_OFFSET 8 +#define DORQ_REG_VF_MAX_ICID_1_RT_OFFSET 9 +#define DORQ_REG_VF_MAX_ICID_2_RT_OFFSET 10 +#define DORQ_REG_VF_MAX_ICID_3_RT_OFFSET 11 +#define DORQ_REG_VF_MAX_ICID_4_RT_OFFSET 12 +#define DORQ_REG_VF_MAX_ICID_5_RT_OFFSET 13 +#define DORQ_REG_VF_MAX_ICID_6_RT_OFFSET 14 +#define DORQ_REG_VF_MAX_ICID_7_RT_OFFSET 15 +#define DORQ_REG_PF_WAKE_ALL_RT_OFFSET 16 +#define DORQ_REG_TAG1_ETHERTYPE_RT_OFFSET 17 +#define IGU_REG_PF_CONFIGURATION_RT_OFFSET 18 +#define IGU_REG_VF_CONFIGURATION_RT_OFFSET 19 +#define IGU_REG_ATTN_MSG_ADDR_L_RT_OFFSET 20 +#define IGU_REG_ATTN_MSG_ADDR_H_RT_OFFSET 21 +#define IGU_REG_LEADING_EDGE_LATCH_RT_OFFSET 22 +#define IGU_REG_TRAILING_EDGE_LATCH_RT_OFFSET 23 +#define CAU_REG_CQE_AGG_UNIT_SIZE_RT_OFFSET 24 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 +#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 +#define CAU_REG_SB_VAR_MEMORY_RT_OFFSET 761 +#define CAU_REG_SB_VAR_MEMORY_RT_SIZE 736 +#define CAU_REG_SB_ADDR_MEMORY_RT_OFFSET 1497 +#define CAU_REG_SB_ADDR_MEMORY_RT_SIZE 736 +#define CAU_REG_PI_MEMORY_RT_OFFSET 2233 +#define CAU_REG_PI_MEMORY_RT_SIZE 4416 +#define PRS_REG_SEARCH_RESP_INITIATOR_TYPE_RT_OFFSET 6649 +#define PRS_REG_TASK_ID_MAX_INITIATOR_PF_RT_OFFSET 6650 +#define PRS_REG_TASK_ID_MAX_INITIATOR_VF_RT_OFFSET 6651 +#define PRS_REG_TASK_ID_MAX_TARGET_PF_RT_OFFSET 6652 +#define PRS_REG_TASK_ID_MAX_TARGET_VF_RT_OFFSET 6653 +#define PRS_REG_SEARCH_TCP_RT_OFFSET 6654 +#define PRS_REG_SEARCH_FCOE_RT_OFFSET 6655 +#define PRS_REG_SEARCH_ROCE_RT_OFFSET 6656 +#define PRS_REG_ROCE_DEST_QP_MAX_VF_RT_OFFSET 6657 +#define PRS_REG_ROCE_DEST_QP_MAX_PF_RT_OFFSET 6658 +#define PRS_REG_SEARCH_OPENFLOW_RT_OFFSET 6659 +#define PRS_REG_SEARCH_NON_IP_AS_OPENFLOW_RT_OFFSET 6660 +#define PRS_REG_OPENFLOW_SUPPORT_ONLY_KNOWN_OVER_IP_RT_OFFSET 6661 +#define PRS_REG_OPENFLOW_SEARCH_KEY_MASK_RT_OFFSET 6662 +#define PRS_REG_TAG_ETHERTYPE_0_RT_OFFSET 6663 +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN_RT_OFFSET 6664 +#define SRC_REG_FIRSTFREE_RT_OFFSET 6665 +#define SRC_REG_FIRSTFREE_RT_SIZE 2 +#define SRC_REG_LASTFREE_RT_OFFSET 6667 +#define SRC_REG_LASTFREE_RT_SIZE 2 +#define SRC_REG_COUNTFREE_RT_OFFSET 6669 +#define SRC_REG_NUMBER_HASH_BITS_RT_OFFSET 6670 +#define PSWRQ2_REG_CDUT_P_SIZE_RT_OFFSET 6671 +#define PSWRQ2_REG_CDUC_P_SIZE_RT_OFFSET 6672 +#define PSWRQ2_REG_TM_P_SIZE_RT_OFFSET 6673 +#define PSWRQ2_REG_QM_P_SIZE_RT_OFFSET 6674 +#define PSWRQ2_REG_SRC_P_SIZE_RT_OFFSET 6675 +#define PSWRQ2_REG_TSDM_P_SIZE_RT_OFFSET 6676 +#define PSWRQ2_REG_TM_FIRST_ILT_RT_OFFSET 6677 +#define PSWRQ2_REG_TM_LAST_ILT_RT_OFFSET 6678 +#define PSWRQ2_REG_QM_FIRST_ILT_RT_OFFSET 6679 +#define PSWRQ2_REG_QM_LAST_ILT_RT_OFFSET 6680 +#define PSWRQ2_REG_SRC_FIRST_ILT_RT_OFFSET 6681 +#define PSWRQ2_REG_SRC_LAST_ILT_RT_OFFSET 6682 +#define PSWRQ2_REG_CDUC_FIRST_ILT_RT_OFFSET 6683 +#define PSWRQ2_REG_CDUC_LAST_ILT_RT_OFFSET 6684 +#define PSWRQ2_REG_CDUT_FIRST_ILT_RT_OFFSET 6685 +#define PSWRQ2_REG_CDUT_LAST_ILT_RT_OFFSET 6686 +#define PSWRQ2_REG_TSDM_FIRST_ILT_RT_OFFSET 6687 +#define PSWRQ2_REG_TSDM_LAST_ILT_RT_OFFSET 6688 +#define PSWRQ2_REG_TM_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6689 +#define PSWRQ2_REG_CDUT_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6690 +#define PSWRQ2_REG_CDUC_NUMBER_OF_PF_BLOCKS_RT_OFFSET 6691 +#define PSWRQ2_REG_TM_VF_BLOCKS_RT_OFFSET 6692 +#define PSWRQ2_REG_CDUT_VF_BLOCKS_RT_OFFSET 6693 +#define PSWRQ2_REG_CDUC_VF_BLOCKS_RT_OFFSET 6694 +#define PSWRQ2_REG_TM_BLOCKS_FACTOR_RT_OFFSET 6695 +#define PSWRQ2_REG_CDUT_BLOCKS_FACTOR_RT_OFFSET 6696 +#define PSWRQ2_REG_CDUC_BLOCKS_FACTOR_RT_OFFSET 6697 +#define PSWRQ2_REG_VF_BASE_RT_OFFSET 6698 +#define PSWRQ2_REG_VF_LAST_ILT_RT_OFFSET 6699 +#define PSWRQ2_REG_WR_MBS0_RT_OFFSET 6700 +#define PSWRQ2_REG_RD_MBS0_RT_OFFSET 6701 +#define PSWRQ2_REG_DRAM_ALIGN_WR_RT_OFFSET 6702 +#define PSWRQ2_REG_DRAM_ALIGN_RD_RT_OFFSET 6703 +#define PSWRQ2_REG_ILT_MEMORY_RT_OFFSET 6704 +#define PSWRQ2_REG_ILT_MEMORY_RT_SIZE 22000 +#define PGLUE_REG_B_VF_BASE_RT_OFFSET 28704 +#define PGLUE_REG_B_MSDM_OFFSET_MASK_B_RT_OFFSET 28705 +#define PGLUE_REG_B_MSDM_VF_SHIFT_B_RT_OFFSET 28706 +#define PGLUE_REG_B_CACHE_LINE_SIZE_RT_OFFSET 28707 +#define PGLUE_REG_B_PF_BAR0_SIZE_RT_OFFSET 28708 +#define PGLUE_REG_B_PF_BAR1_SIZE_RT_OFFSET 28709 +#define PGLUE_REG_B_VF_BAR1_SIZE_RT_OFFSET 28710 +#define TM_REG_VF_ENABLE_CONN_RT_OFFSET 28711 +#define TM_REG_PF_ENABLE_CONN_RT_OFFSET 28712 +#define TM_REG_PF_ENABLE_TASK_RT_OFFSET 28713 +#define TM_REG_GROUP_SIZE_RESOLUTION_CONN_RT_OFFSET 28714 +#define TM_REG_GROUP_SIZE_RESOLUTION_TASK_RT_OFFSET 28715 +#define TM_REG_CONFIG_CONN_MEM_RT_OFFSET 28716 +#define TM_REG_CONFIG_CONN_MEM_RT_SIZE 416 +#define TM_REG_CONFIG_TASK_MEM_RT_OFFSET 29132 +#define TM_REG_CONFIG_TASK_MEM_RT_SIZE 512 +#define QM_REG_MAXPQSIZE_0_RT_OFFSET 29644 +#define QM_REG_MAXPQSIZE_1_RT_OFFSET 29645 +#define QM_REG_MAXPQSIZE_2_RT_OFFSET 29646 +#define QM_REG_MAXPQSIZETXSEL_0_RT_OFFSET 29647 +#define QM_REG_MAXPQSIZETXSEL_1_RT_OFFSET 29648 +#define QM_REG_MAXPQSIZETXSEL_2_RT_OFFSET 29649 +#define QM_REG_MAXPQSIZETXSEL_3_RT_OFFSET 29650 +#define QM_REG_MAXPQSIZETXSEL_4_RT_OFFSET 29651 +#define QM_REG_MAXPQSIZETXSEL_5_RT_OFFSET 29652 +#define QM_REG_MAXPQSIZETXSEL_6_RT_OFFSET 29653 +#define QM_REG_MAXPQSIZETXSEL_7_RT_OFFSET 29654 +#define QM_REG_MAXPQSIZETXSEL_8_RT_OFFSET 29655 +#define QM_REG_MAXPQSIZETXSEL_9_RT_OFFSET 29656 +#define QM_REG_MAXPQSIZETXSEL_10_RT_OFFSET 29657 +#define QM_REG_MAXPQSIZETXSEL_11_RT_OFFSET 29658 +#define QM_REG_MAXPQSIZETXSEL_12_RT_OFFSET 29659 +#define QM_REG_MAXPQSIZETXSEL_13_RT_OFFSET 29660 +#define QM_REG_MAXPQSIZETXSEL_14_RT_OFFSET 29661 +#define QM_REG_MAXPQSIZETXSEL_15_RT_OFFSET 29662 +#define QM_REG_MAXPQSIZETXSEL_16_RT_OFFSET 29663 +#define QM_REG_MAXPQSIZETXSEL_17_RT_OFFSET 29664 +#define QM_REG_MAXPQSIZETXSEL_18_RT_OFFSET 29665 +#define QM_REG_MAXPQSIZETXSEL_19_RT_OFFSET 29666 +#define QM_REG_MAXPQSIZETXSEL_20_RT_OFFSET 29667 +#define QM_REG_MAXPQSIZETXSEL_21_RT_OFFSET 29668 +#define QM_REG_MAXPQSIZETXSEL_22_RT_OFFSET 29669 +#define QM_REG_MAXPQSIZETXSEL_23_RT_OFFSET 29670 +#define QM_REG_MAXPQSIZETXSEL_24_RT_OFFSET 29671 +#define QM_REG_MAXPQSIZETXSEL_25_RT_OFFSET 29672 +#define QM_REG_MAXPQSIZETXSEL_26_RT_OFFSET 29673 +#define QM_REG_MAXPQSIZETXSEL_27_RT_OFFSET 29674 +#define QM_REG_MAXPQSIZETXSEL_28_RT_OFFSET 29675 +#define QM_REG_MAXPQSIZETXSEL_29_RT_OFFSET 29676 +#define QM_REG_MAXPQSIZETXSEL_30_RT_OFFSET 29677 +#define QM_REG_MAXPQSIZETXSEL_31_RT_OFFSET 29678 +#define QM_REG_MAXPQSIZETXSEL_32_RT_OFFSET 29679 +#define QM_REG_MAXPQSIZETXSEL_33_RT_OFFSET 29680 +#define QM_REG_MAXPQSIZETXSEL_34_RT_OFFSET 29681 +#define QM_REG_MAXPQSIZETXSEL_35_RT_OFFSET 29682 +#define QM_REG_MAXPQSIZETXSEL_36_RT_OFFSET 29683 +#define QM_REG_MAXPQSIZETXSEL_37_RT_OFFSET 29684 +#define QM_REG_MAXPQSIZETXSEL_38_RT_OFFSET 29685 +#define QM_REG_MAXPQSIZETXSEL_39_RT_OFFSET 29686 +#define QM_REG_MAXPQSIZETXSEL_40_RT_OFFSET 29687 +#define QM_REG_MAXPQSIZETXSEL_41_RT_OFFSET 29688 +#define QM_REG_MAXPQSIZETXSEL_42_RT_OFFSET 29689 +#define QM_REG_MAXPQSIZETXSEL_43_RT_OFFSET 29690 +#define QM_REG_MAXPQSIZETXSEL_44_RT_OFFSET 29691 +#define QM_REG_MAXPQSIZETXSEL_45_RT_OFFSET 29692 +#define QM_REG_MAXPQSIZETXSEL_46_RT_OFFSET 29693 +#define QM_REG_MAXPQSIZETXSEL_47_RT_OFFSET 29694 +#define QM_REG_MAXPQSIZETXSEL_48_RT_OFFSET 29695 +#define QM_REG_MAXPQSIZETXSEL_49_RT_OFFSET 29696 +#define QM_REG_MAXPQSIZETXSEL_50_RT_OFFSET 29697 +#define QM_REG_MAXPQSIZETXSEL_51_RT_OFFSET 29698 +#define QM_REG_MAXPQSIZETXSEL_52_RT_OFFSET 29699 +#define QM_REG_MAXPQSIZETXSEL_53_RT_OFFSET 29700 +#define QM_REG_MAXPQSIZETXSEL_54_RT_OFFSET 29701 +#define QM_REG_MAXPQSIZETXSEL_55_RT_OFFSET 29702 +#define QM_REG_MAXPQSIZETXSEL_56_RT_OFFSET 29703 +#define QM_REG_MAXPQSIZETXSEL_57_RT_OFFSET 29704 +#define QM_REG_MAXPQSIZETXSEL_58_RT_OFFSET 29705 +#define QM_REG_MAXPQSIZETXSEL_59_RT_OFFSET 29706 +#define QM_REG_MAXPQSIZETXSEL_60_RT_OFFSET 29707 +#define QM_REG_MAXPQSIZETXSEL_61_RT_OFFSET 29708 +#define QM_REG_MAXPQSIZETXSEL_62_RT_OFFSET 29709 +#define QM_REG_MAXPQSIZETXSEL_63_RT_OFFSET 29710 +#define QM_REG_BASEADDROTHERPQ_RT_OFFSET 29711 +#define QM_REG_BASEADDROTHERPQ_RT_SIZE 128 +#define QM_REG_VOQCRDLINE_RT_OFFSET 29839 +#define QM_REG_VOQCRDLINE_RT_SIZE 20 +#define QM_REG_VOQINITCRDLINE_RT_OFFSET 29859 +#define QM_REG_VOQINITCRDLINE_RT_SIZE 20 +#define QM_REG_AFULLQMBYPTHRPFWFQ_RT_OFFSET 29879 +#define QM_REG_AFULLQMBYPTHRVPWFQ_RT_OFFSET 29880 +#define QM_REG_AFULLQMBYPTHRPFRL_RT_OFFSET 29881 +#define QM_REG_AFULLQMBYPTHRGLBLRL_RT_OFFSET 29882 +#define QM_REG_AFULLOPRTNSTCCRDMASK_RT_OFFSET 29883 +#define QM_REG_WRROTHERPQGRP_0_RT_OFFSET 29884 +#define QM_REG_WRROTHERPQGRP_1_RT_OFFSET 29885 +#define QM_REG_WRROTHERPQGRP_2_RT_OFFSET 29886 +#define QM_REG_WRROTHERPQGRP_3_RT_OFFSET 29887 +#define QM_REG_WRROTHERPQGRP_4_RT_OFFSET 29888 +#define QM_REG_WRROTHERPQGRP_5_RT_OFFSET 29889 +#define QM_REG_WRROTHERPQGRP_6_RT_OFFSET 29890 +#define QM_REG_WRROTHERPQGRP_7_RT_OFFSET 29891 +#define QM_REG_WRROTHERPQGRP_8_RT_OFFSET 29892 +#define QM_REG_WRROTHERPQGRP_9_RT_OFFSET 29893 +#define QM_REG_WRROTHERPQGRP_10_RT_OFFSET 29894 +#define QM_REG_WRROTHERPQGRP_11_RT_OFFSET 29895 +#define QM_REG_WRROTHERPQGRP_12_RT_OFFSET 29896 +#define QM_REG_WRROTHERPQGRP_13_RT_OFFSET 29897 +#define QM_REG_WRROTHERPQGRP_14_RT_OFFSET 29898 +#define QM_REG_WRROTHERPQGRP_15_RT_OFFSET 29899 +#define QM_REG_WRROTHERGRPWEIGHT_0_RT_OFFSET 29900 +#define QM_REG_WRROTHERGRPWEIGHT_1_RT_OFFSET 29901 +#define QM_REG_WRROTHERGRPWEIGHT_2_RT_OFFSET 29902 +#define QM_REG_WRROTHERGRPWEIGHT_3_RT_OFFSET 29903 +#define QM_REG_WRRTXGRPWEIGHT_0_RT_OFFSET 29904 +#define QM_REG_WRRTXGRPWEIGHT_1_RT_OFFSET 29905 +#define QM_REG_PQTX2PF_0_RT_OFFSET 29906 +#define QM_REG_PQTX2PF_1_RT_OFFSET 29907 +#define QM_REG_PQTX2PF_2_RT_OFFSET 29908 +#define QM_REG_PQTX2PF_3_RT_OFFSET 29909 +#define QM_REG_PQTX2PF_4_RT_OFFSET 29910 +#define QM_REG_PQTX2PF_5_RT_OFFSET 29911 +#define QM_REG_PQTX2PF_6_RT_OFFSET 29912 +#define QM_REG_PQTX2PF_7_RT_OFFSET 29913 +#define QM_REG_PQTX2PF_8_RT_OFFSET 29914 +#define QM_REG_PQTX2PF_9_RT_OFFSET 29915 +#define QM_REG_PQTX2PF_10_RT_OFFSET 29916 +#define QM_REG_PQTX2PF_11_RT_OFFSET 29917 +#define QM_REG_PQTX2PF_12_RT_OFFSET 29918 +#define QM_REG_PQTX2PF_13_RT_OFFSET 29919 +#define QM_REG_PQTX2PF_14_RT_OFFSET 29920 +#define QM_REG_PQTX2PF_15_RT_OFFSET 29921 +#define QM_REG_PQTX2PF_16_RT_OFFSET 29922 +#define QM_REG_PQTX2PF_17_RT_OFFSET 29923 +#define QM_REG_PQTX2PF_18_RT_OFFSET 29924 +#define QM_REG_PQTX2PF_19_RT_OFFSET 29925 +#define QM_REG_PQTX2PF_20_RT_OFFSET 29926 +#define QM_REG_PQTX2PF_21_RT_OFFSET 29927 +#define QM_REG_PQTX2PF_22_RT_OFFSET 29928 +#define QM_REG_PQTX2PF_23_RT_OFFSET 29929 +#define QM_REG_PQTX2PF_24_RT_OFFSET 29930 +#define QM_REG_PQTX2PF_25_RT_OFFSET 29931 +#define QM_REG_PQTX2PF_26_RT_OFFSET 29932 +#define QM_REG_PQTX2PF_27_RT_OFFSET 29933 +#define QM_REG_PQTX2PF_28_RT_OFFSET 29934 +#define QM_REG_PQTX2PF_29_RT_OFFSET 29935 +#define QM_REG_PQTX2PF_30_RT_OFFSET 29936 +#define QM_REG_PQTX2PF_31_RT_OFFSET 29937 +#define QM_REG_PQTX2PF_32_RT_OFFSET 29938 +#define QM_REG_PQTX2PF_33_RT_OFFSET 29939 +#define QM_REG_PQTX2PF_34_RT_OFFSET 29940 +#define QM_REG_PQTX2PF_35_RT_OFFSET 29941 +#define QM_REG_PQTX2PF_36_RT_OFFSET 29942 +#define QM_REG_PQTX2PF_37_RT_OFFSET 29943 +#define QM_REG_PQTX2PF_38_RT_OFFSET 29944 +#define QM_REG_PQTX2PF_39_RT_OFFSET 29945 +#define QM_REG_PQTX2PF_40_RT_OFFSET 29946 +#define QM_REG_PQTX2PF_41_RT_OFFSET 29947 +#define QM_REG_PQTX2PF_42_RT_OFFSET 29948 +#define QM_REG_PQTX2PF_43_RT_OFFSET 29949 +#define QM_REG_PQTX2PF_44_RT_OFFSET 29950 +#define QM_REG_PQTX2PF_45_RT_OFFSET 29951 +#define QM_REG_PQTX2PF_46_RT_OFFSET 29952 +#define QM_REG_PQTX2PF_47_RT_OFFSET 29953 +#define QM_REG_PQTX2PF_48_RT_OFFSET 29954 +#define QM_REG_PQTX2PF_49_RT_OFFSET 29955 +#define QM_REG_PQTX2PF_50_RT_OFFSET 29956 +#define QM_REG_PQTX2PF_51_RT_OFFSET 29957 +#define QM_REG_PQTX2PF_52_RT_OFFSET 29958 +#define QM_REG_PQTX2PF_53_RT_OFFSET 29959 +#define QM_REG_PQTX2PF_54_RT_OFFSET 29960 +#define QM_REG_PQTX2PF_55_RT_OFFSET 29961 +#define QM_REG_PQTX2PF_56_RT_OFFSET 29962 +#define QM_REG_PQTX2PF_57_RT_OFFSET 29963 +#define QM_REG_PQTX2PF_58_RT_OFFSET 29964 +#define QM_REG_PQTX2PF_59_RT_OFFSET 29965 +#define QM_REG_PQTX2PF_60_RT_OFFSET 29966 +#define QM_REG_PQTX2PF_61_RT_OFFSET 29967 +#define QM_REG_PQTX2PF_62_RT_OFFSET 29968 +#define QM_REG_PQTX2PF_63_RT_OFFSET 29969 +#define QM_REG_PQOTHER2PF_0_RT_OFFSET 29970 +#define QM_REG_PQOTHER2PF_1_RT_OFFSET 29971 +#define QM_REG_PQOTHER2PF_2_RT_OFFSET 29972 +#define QM_REG_PQOTHER2PF_3_RT_OFFSET 29973 +#define QM_REG_PQOTHER2PF_4_RT_OFFSET 29974 +#define QM_REG_PQOTHER2PF_5_RT_OFFSET 29975 +#define QM_REG_PQOTHER2PF_6_RT_OFFSET 29976 +#define QM_REG_PQOTHER2PF_7_RT_OFFSET 29977 +#define QM_REG_PQOTHER2PF_8_RT_OFFSET 29978 +#define QM_REG_PQOTHER2PF_9_RT_OFFSET 29979 +#define QM_REG_PQOTHER2PF_10_RT_OFFSET 29980 +#define QM_REG_PQOTHER2PF_11_RT_OFFSET 29981 +#define QM_REG_PQOTHER2PF_12_RT_OFFSET 29982 +#define QM_REG_PQOTHER2PF_13_RT_OFFSET 29983 +#define QM_REG_PQOTHER2PF_14_RT_OFFSET 29984 +#define QM_REG_PQOTHER2PF_15_RT_OFFSET 29985 +#define QM_REG_RLGLBLPERIOD_0_RT_OFFSET 29986 +#define QM_REG_RLGLBLPERIOD_1_RT_OFFSET 29987 +#define QM_REG_RLGLBLPERIODTIMER_0_RT_OFFSET 29988 +#define QM_REG_RLGLBLPERIODTIMER_1_RT_OFFSET 29989 +#define QM_REG_RLGLBLPERIODSEL_0_RT_OFFSET 29990 +#define QM_REG_RLGLBLPERIODSEL_1_RT_OFFSET 29991 +#define QM_REG_RLGLBLPERIODSEL_2_RT_OFFSET 29992 +#define QM_REG_RLGLBLPERIODSEL_3_RT_OFFSET 29993 +#define QM_REG_RLGLBLPERIODSEL_4_RT_OFFSET 29994 +#define QM_REG_RLGLBLPERIODSEL_5_RT_OFFSET 29995 +#define QM_REG_RLGLBLPERIODSEL_6_RT_OFFSET 29996 +#define QM_REG_RLGLBLPERIODSEL_7_RT_OFFSET 29997 +#define QM_REG_RLGLBLINCVAL_RT_OFFSET 29998 +#define QM_REG_RLGLBLINCVAL_RT_SIZE 256 +#define QM_REG_RLGLBLUPPERBOUND_RT_OFFSET 30254 +#define QM_REG_RLGLBLUPPERBOUND_RT_SIZE 256 +#define QM_REG_RLGLBLCRD_RT_OFFSET 30510 +#define QM_REG_RLGLBLCRD_RT_SIZE 256 +#define QM_REG_RLGLBLENABLE_RT_OFFSET 30766 +#define QM_REG_RLPFPERIOD_RT_OFFSET 30767 +#define QM_REG_RLPFPERIODTIMER_RT_OFFSET 30768 +#define QM_REG_RLPFINCVAL_RT_OFFSET 30769 +#define QM_REG_RLPFINCVAL_RT_SIZE 16 +#define QM_REG_RLPFUPPERBOUND_RT_OFFSET 30785 +#define QM_REG_RLPFUPPERBOUND_RT_SIZE 16 +#define QM_REG_RLPFCRD_RT_OFFSET 30801 +#define QM_REG_RLPFCRD_RT_SIZE 16 +#define QM_REG_RLPFENABLE_RT_OFFSET 30817 +#define QM_REG_RLPFVOQENABLE_RT_OFFSET 30818 +#define QM_REG_WFQPFWEIGHT_RT_OFFSET 30819 +#define QM_REG_WFQPFWEIGHT_RT_SIZE 16 +#define QM_REG_WFQPFUPPERBOUND_RT_OFFSET 30835 +#define QM_REG_WFQPFUPPERBOUND_RT_SIZE 16 +#define QM_REG_WFQPFCRD_RT_OFFSET 30851 +#define QM_REG_WFQPFCRD_RT_SIZE 160 +#define QM_REG_WFQPFENABLE_RT_OFFSET 31011 +#define QM_REG_WFQVPENABLE_RT_OFFSET 31012 +#define QM_REG_BASEADDRTXPQ_RT_OFFSET 31013 +#define QM_REG_BASEADDRTXPQ_RT_SIZE 512 +#define QM_REG_TXPQMAP_RT_OFFSET 31525 +#define QM_REG_TXPQMAP_RT_SIZE 512 +#define QM_REG_WFQVPWEIGHT_RT_OFFSET 32037 +#define QM_REG_WFQVPWEIGHT_RT_SIZE 512 +#define QM_REG_WFQVPCRD_RT_OFFSET 32549 +#define QM_REG_WFQVPCRD_RT_SIZE 512 +#define QM_REG_WFQVPMAP_RT_OFFSET 33061 +#define QM_REG_WFQVPMAP_RT_SIZE 512 +#define QM_REG_WFQPFCRD_MSB_RT_OFFSET 33573 +#define QM_REG_WFQPFCRD_MSB_RT_SIZE 160 +#define NIG_REG_TAG_ETHERTYPE_0_RT_OFFSET 33733 +#define NIG_REG_OUTER_TAG_VALUE_LIST0_RT_OFFSET 33734 +#define NIG_REG_OUTER_TAG_VALUE_LIST1_RT_OFFSET 33735 +#define NIG_REG_OUTER_TAG_VALUE_LIST2_RT_OFFSET 33736 +#define NIG_REG_OUTER_TAG_VALUE_LIST3_RT_OFFSET 33737 +#define NIG_REG_OUTER_TAG_VALUE_MASK_RT_OFFSET 33738 +#define NIG_REG_LLH_FUNC_TAGMAC_CLS_TYPE_RT_OFFSET 33739 +#define NIG_REG_LLH_FUNC_TAG_EN_RT_OFFSET 33740 +#define NIG_REG_LLH_FUNC_TAG_EN_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_OFFSET 33744 +#define NIG_REG_LLH_FUNC_TAG_HDR_SEL_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_OFFSET 33748 +#define NIG_REG_LLH_FUNC_TAG_VALUE_RT_SIZE 4 +#define NIG_REG_LLH_FUNC_NO_TAG_RT_OFFSET 33752 +#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_OFFSET 33753 +#define NIG_REG_LLH_FUNC_FILTER_VALUE_RT_SIZE 32 +#define NIG_REG_LLH_FUNC_FILTER_EN_RT_OFFSET 33785 +#define NIG_REG_LLH_FUNC_FILTER_EN_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_OFFSET 33801 +#define NIG_REG_LLH_FUNC_FILTER_MODE_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_OFFSET 33817 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_RT_SIZE 16 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_OFFSET 33833 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_RT_SIZE 16 +#define NIG_REG_TX_EDPM_CTRL_RT_OFFSET 33849 +#define NIG_REG_ROCE_DUPLICATE_TO_HOST_RT_OFFSET 33850 +#define CDU_REG_CID_ADDR_PARAMS_RT_OFFSET 33851 +#define CDU_REG_SEGMENT0_PARAMS_RT_OFFSET 33852 +#define CDU_REG_SEGMENT1_PARAMS_RT_OFFSET 33853 +#define CDU_REG_PF_SEG0_TYPE_OFFSET_RT_OFFSET 33854 +#define CDU_REG_PF_SEG1_TYPE_OFFSET_RT_OFFSET 33855 +#define CDU_REG_PF_SEG2_TYPE_OFFSET_RT_OFFSET 33856 +#define CDU_REG_PF_SEG3_TYPE_OFFSET_RT_OFFSET 33857 +#define CDU_REG_PF_FL_SEG0_TYPE_OFFSET_RT_OFFSET 33858 +#define CDU_REG_PF_FL_SEG1_TYPE_OFFSET_RT_OFFSET 33859 +#define CDU_REG_PF_FL_SEG2_TYPE_OFFSET_RT_OFFSET 33860 +#define CDU_REG_PF_FL_SEG3_TYPE_OFFSET_RT_OFFSET 33861 +#define CDU_REG_VF_SEG_TYPE_OFFSET_RT_OFFSET 33862 +#define CDU_REG_VF_FL_SEG_TYPE_OFFSET_RT_OFFSET 33863 +#define PBF_REG_TAG_ETHERTYPE_0_RT_OFFSET 33864 +#define PBF_REG_BTB_SHARED_AREA_SIZE_RT_OFFSET 33865 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ0_RT_OFFSET 33866 +#define PBF_REG_BTB_GUARANTEED_VOQ0_RT_OFFSET 33867 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ0_RT_OFFSET 33868 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ1_RT_OFFSET 33869 +#define PBF_REG_BTB_GUARANTEED_VOQ1_RT_OFFSET 33870 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ1_RT_OFFSET 33871 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ2_RT_OFFSET 33872 +#define PBF_REG_BTB_GUARANTEED_VOQ2_RT_OFFSET 33873 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ2_RT_OFFSET 33874 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ3_RT_OFFSET 33875 +#define PBF_REG_BTB_GUARANTEED_VOQ3_RT_OFFSET 33876 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ3_RT_OFFSET 33877 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ4_RT_OFFSET 33878 +#define PBF_REG_BTB_GUARANTEED_VOQ4_RT_OFFSET 33879 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ4_RT_OFFSET 33880 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ5_RT_OFFSET 33881 +#define PBF_REG_BTB_GUARANTEED_VOQ5_RT_OFFSET 33882 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ5_RT_OFFSET 33883 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ6_RT_OFFSET 33884 +#define PBF_REG_BTB_GUARANTEED_VOQ6_RT_OFFSET 33885 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ6_RT_OFFSET 33886 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ7_RT_OFFSET 33887 +#define PBF_REG_BTB_GUARANTEED_VOQ7_RT_OFFSET 33888 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ7_RT_OFFSET 33889 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ8_RT_OFFSET 33890 +#define PBF_REG_BTB_GUARANTEED_VOQ8_RT_OFFSET 33891 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ8_RT_OFFSET 33892 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ9_RT_OFFSET 33893 +#define PBF_REG_BTB_GUARANTEED_VOQ9_RT_OFFSET 33894 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ9_RT_OFFSET 33895 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ10_RT_OFFSET 33896 +#define PBF_REG_BTB_GUARANTEED_VOQ10_RT_OFFSET 33897 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ10_RT_OFFSET 33898 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ11_RT_OFFSET 33899 +#define PBF_REG_BTB_GUARANTEED_VOQ11_RT_OFFSET 33900 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ11_RT_OFFSET 33901 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ12_RT_OFFSET 33902 +#define PBF_REG_BTB_GUARANTEED_VOQ12_RT_OFFSET 33903 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ12_RT_OFFSET 33904 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ13_RT_OFFSET 33905 +#define PBF_REG_BTB_GUARANTEED_VOQ13_RT_OFFSET 33906 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ13_RT_OFFSET 33907 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ14_RT_OFFSET 33908 +#define PBF_REG_BTB_GUARANTEED_VOQ14_RT_OFFSET 33909 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ14_RT_OFFSET 33910 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ15_RT_OFFSET 33911 +#define PBF_REG_BTB_GUARANTEED_VOQ15_RT_OFFSET 33912 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ15_RT_OFFSET 33913 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ16_RT_OFFSET 33914 +#define PBF_REG_BTB_GUARANTEED_VOQ16_RT_OFFSET 33915 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ16_RT_OFFSET 33916 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ17_RT_OFFSET 33917 +#define PBF_REG_BTB_GUARANTEED_VOQ17_RT_OFFSET 33918 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ17_RT_OFFSET 33919 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ18_RT_OFFSET 33920 +#define PBF_REG_BTB_GUARANTEED_VOQ18_RT_OFFSET 33921 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ18_RT_OFFSET 33922 +#define PBF_REG_YCMD_QS_NUM_LINES_VOQ19_RT_OFFSET 33923 +#define PBF_REG_BTB_GUARANTEED_VOQ19_RT_OFFSET 33924 +#define PBF_REG_BTB_SHARED_AREA_SETUP_VOQ19_RT_OFFSET 33925 +#define XCM_REG_CON_PHY_Q3_RT_OFFSET 33926 -#define RUNTIME_ARRAY_SIZE 33925 +#define RUNTIME_ARRAY_SIZE 33927 /* The eth storm context for the Tstorm */ struct tstorm_eth_conn_st_ctx { @@ -3201,7 +4469,31 @@ struct eth_conn_context { struct mstorm_eth_conn_st_ctx mstorm_st_context; }; -/* opcodes for the event ring */ +enum eth_error_code { + ETH_OK = 0x00, + ETH_FILTERS_MAC_ADD_FAIL_FULL, + ETH_FILTERS_MAC_ADD_FAIL_FULL_MTT2, + ETH_FILTERS_MAC_ADD_FAIL_DUP_MTT2, + ETH_FILTERS_MAC_ADD_FAIL_DUP_STT2, + ETH_FILTERS_MAC_DEL_FAIL_NOF, + ETH_FILTERS_MAC_DEL_FAIL_NOF_MTT2, + ETH_FILTERS_MAC_DEL_FAIL_NOF_STT2, + ETH_FILTERS_MAC_ADD_FAIL_ZERO_MAC, + ETH_FILTERS_VLAN_ADD_FAIL_FULL, + ETH_FILTERS_VLAN_ADD_FAIL_DUP, + ETH_FILTERS_VLAN_DEL_FAIL_NOF, + ETH_FILTERS_VLAN_DEL_FAIL_NOF_TT1, + ETH_FILTERS_PAIR_ADD_FAIL_DUP, + ETH_FILTERS_PAIR_ADD_FAIL_FULL, + ETH_FILTERS_PAIR_ADD_FAIL_FULL_MAC, + ETH_FILTERS_PAIR_DEL_FAIL_NOF, + ETH_FILTERS_PAIR_DEL_FAIL_NOF_TT1, + ETH_FILTERS_PAIR_ADD_FAIL_ZERO_MAC, + ETH_FILTERS_VNI_ADD_FAIL_FULL, + ETH_FILTERS_VNI_ADD_FAIL_DUP, + MAX_ETH_ERROR_CODE +}; + enum eth_event_opcode { ETH_EVENT_UNUSED, ETH_EVENT_VPORT_START, @@ -3269,7 +4561,13 @@ enum eth_filter_type { MAX_ETH_FILTER_TYPE }; -/* Ethernet Ramrod Command IDs */ +enum eth_ipv4_frag_type { + ETH_IPV4_NOT_FRAG, + ETH_IPV4_FIRST_FRAG, + ETH_IPV4_NON_FIRST_FRAG, + MAX_ETH_IPV4_FRAG_TYPE +}; + enum eth_ramrod_cmd_id { ETH_RAMROD_UNUSED, ETH_RAMROD_VPORT_START, @@ -3451,8 +4749,8 @@ struct rx_queue_start_ramrod_data { u8 toggle_val; u8 vf_rx_prod_index; - - u8 reserved[6]; + u8 vf_rx_prod_use_zone_a; + u8 reserved[5]; __le16 reserved1; struct regpair cqe_pbl_addr; struct regpair bd_base; @@ -3526,10 +4824,11 @@ struct tx_queue_start_ramrod_data { __le16 pxp_st_index; __le16 comp_agg_size; __le16 queue_zone_id; - __le16 test_dup_count; + __le16 reserved2; __le16 pbl_size; __le16 tx_queue_id; - + __le16 same_as_last_id; + __le16 reserved[3]; struct regpair pbl_base_addr; struct regpair bd_cons_address; }; @@ -4926,8 +6225,8 @@ struct roce_create_qp_resp_ramrod_data { #define ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG_SHIFT 5 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_MASK 0x1 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN_SHIFT 6 -#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_MASK 0x1 -#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED0_SHIFT 7 +#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_MASK 0x1 +#define ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN_SHIFT 7 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_MASK 0x7 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_PRI_SHIFT 8 #define ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_MASK 0x1F @@ -4988,6 +6287,10 @@ enum roce_event_opcode { MAX_ROCE_EVENT_OPCODE }; +struct roce_init_func_ramrod_data { + struct rdma_init_func_ramrod_data rdma; +}; + struct roce_modify_qp_req_ramrod_data { __le16 flags; #define ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG_MASK 0x1 @@ -6639,6 +7942,35 @@ struct ystorm_iscsi_conn_ag_ctx { __le32 reg2; __le32 reg3; }; + +#define MFW_TRACE_SIGNATURE 0x25071946 + +/* The trace in the buffer */ +#define MFW_TRACE_EVENTID_MASK 0x00ffff +#define MFW_TRACE_PRM_SIZE_MASK 0x0f0000 +#define MFW_TRACE_PRM_SIZE_SHIFT 16 +#define MFW_TRACE_ENTRY_SIZE 3 + +struct mcp_trace { + u32 signature; /* Help to identify that the trace is valid */ + u32 size; /* the size of the trace buffer in bytes */ + u32 curr_level; /* 2 - all will be written to the buffer + * 1 - debug trace will not be written + * 0 - just errors will be written to the buffer + */ + u32 modules_mask[2]; /* a bit per module, 1 means write it, 0 means + * mask it. + */ + + /* Warning: the following pointers are assumed to be 32bits as they are + * used only in the MFW. + */ + u32 trace_prod; /* The next trace will be written to this offset */ + u32 trace_oldest; /* The oldest valid trace starts at this offset + * (usually very close after the current producer). + */ +}; + #define VF_MAX_STATIC 192 #define MCP_GLOB_PATH_MAX 2 @@ -6646,6 +7978,7 @@ struct ystorm_iscsi_conn_ag_ctx { #define MCP_GLOB_PORT_MAX 4 #define MCP_GLOB_FUNC_MAX 16 +typedef u32 offsize_t; /* In DWORDS !!! */ /* Offset from the beginning of the MCP scratchpad */ #define OFFSIZE_OFFSET_SHIFT 0 #define OFFSIZE_OFFSET_MASK 0x0000ffff @@ -7236,8 +8569,19 @@ struct public_drv_mb { #define DRV_MSG_CODE_NIG_DRAIN 0x30000000 #define DRV_MSG_CODE_VF_DISABLED_DONE 0xc0000000 #define DRV_MSG_CODE_CFG_VF_MSIX 0xc0010000 +#define DRV_MSG_CODE_NVM_GET_FILE_ATT 0x00030000 +#define DRV_MSG_CODE_NVM_READ_NVRAM 0x00050000 #define DRV_MSG_CODE_MCP_RESET 0x00090000 #define DRV_MSG_CODE_SET_VERSION 0x000f0000 +#define DRV_MSG_CODE_MCP_HALT 0x00100000 + +#define DRV_MSG_CODE_GET_STATS 0x00130000 +#define DRV_MSG_CODE_STATS_TYPE_LAN 1 +#define DRV_MSG_CODE_STATS_TYPE_FCOE 2 +#define DRV_MSG_CODE_STATS_TYPE_ISCSI 3 +#define DRV_MSG_CODE_STATS_TYPE_RDMA 4 + +#define DRV_MSG_CODE_MASK_PARITIES 0x001a0000 #define DRV_MSG_CODE_BIST_TEST 0x001e0000 #define DRV_MSG_CODE_SET_LED_MODE 0x00200000 @@ -7248,6 +8592,9 @@ struct public_drv_mb { #define DRV_MB_PARAM_UNLOAD_WOL_MCP 0x00000001 #define DRV_MB_PARAM_DCBX_NOTIFY_MASK 0x000000FF #define DRV_MB_PARAM_DCBX_NOTIFY_SHIFT 3 + +#define DRV_MB_PARAM_NVM_LEN_SHIFT 24 + #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_SHIFT 0 #define DRV_MB_PARAM_CFG_VF_MSIX_VF_ID_MASK 0x000000FF #define DRV_MB_PARAM_CFG_VF_MSIX_SB_NUM_SHIFT 8 @@ -7285,6 +8632,8 @@ struct public_drv_mb { #define FW_MSG_CODE_DRV_UNLOAD_FUNCTION 0x20130000 #define FW_MSG_CODE_DRV_UNLOAD_DONE 0x21100000 #define FW_MSG_CODE_DRV_CFG_VF_MSIX_DONE 0xb0010000 + +#define FW_MSG_CODE_NVM_OK 0x00010000 #define FW_MSG_CODE_OK 0x00160000 #define FW_MSG_SEQ_NUMBER_MASK 0x0000ffff @@ -7315,10 +8664,10 @@ enum MFW_DRV_MSG_TYPE { MFW_DRV_MSG_RESERVED4, MFW_DRV_MSG_BW_UPDATE, MFW_DRV_MSG_BW_UPDATE5, - MFW_DRV_MSG_BW_UPDATE6, - MFW_DRV_MSG_BW_UPDATE7, - MFW_DRV_MSG_BW_UPDATE8, - MFW_DRV_MSG_BW_UPDATE9, + MFW_DRV_MSG_GET_LAN_STATS, + MFW_DRV_MSG_GET_FCOE_STATS, + MFW_DRV_MSG_GET_ISCSI_STATS, + MFW_DRV_MSG_GET_RDMA_STATS, MFW_DRV_MSG_BW_UPDATE10, MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE, MFW_DRV_MSG_BW_UPDATE11, @@ -7521,4 +8870,101 @@ struct nvm_cfg1 { struct nvm_cfg1_port port[MCP_GLOB_PORT_MAX]; struct nvm_cfg1_func func[MCP_GLOB_FUNC_MAX]; }; + +enum spad_sections { + SPAD_SECTION_TRACE, + SPAD_SECTION_NVM_CFG, + SPAD_SECTION_PUBLIC, + SPAD_SECTION_PRIVATE, + SPAD_SECTION_MAX +}; + +#define MCP_TRACE_SIZE 2048 /* 2kb */ + +/* This section is located at a fixed location in the beginning of the + * scratchpad, to ensure that the MCP trace is not run over during MFW upgrade. + * All the rest of data has a floating location which differs from version to + * version, and is pointed by the mcp_meta_data below. + * Moreover, the spad_layout section is part of the MFW firmware, and is loaded + * with it from nvram in order to clear this portion. + */ +struct static_init { + u32 num_sections; + offsize_t sections[SPAD_SECTION_MAX]; +#define SECTION(_sec_) (*((offsize_t *)(STRUCT_OFFSET(sections[_sec_])))) + + struct mcp_trace trace; +#define MCP_TRACE_P ((struct mcp_trace *)(STRUCT_OFFSET(trace))) + u8 trace_buffer[MCP_TRACE_SIZE]; +#define MCP_TRACE_BUF ((u8 *)(STRUCT_OFFSET(trace_buffer))) + /* running_mfw has the same definition as in nvm_map.h. + * This bit indicate both the running dir, and the running bundle. + * It is set once when the LIM is loaded. + */ + u32 running_mfw; +#define RUNNING_MFW (*((u32 *)(STRUCT_OFFSET(running_mfw)))) + u32 build_time; +#define MFW_BUILD_TIME (*((u32 *)(STRUCT_OFFSET(build_time)))) + u32 reset_type; +#define RESET_TYPE (*((u32 *)(STRUCT_OFFSET(reset_type)))) + u32 mfw_secure_mode; +#define MFW_SECURE_MODE (*((u32 *)(STRUCT_OFFSET(mfw_secure_mode)))) + u16 pme_status_pf_bitmap; +#define PME_STATUS_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_status_pf_bitmap)))) + u16 pme_enable_pf_bitmap; +#define PME_ENABLE_PF_BITMAP (*((u16 *)(STRUCT_OFFSET(pme_enable_pf_bitmap)))) + u32 mim_nvm_addr; + u32 mim_start_addr; + u32 ah_pcie_link_params; +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_MASK (0x000000ff) +#define AH_PCIE_LINK_PARAMS_LINK_SPEED_SHIFT (0) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_MASK (0x0000ff00) +#define AH_PCIE_LINK_PARAMS_LINK_WIDTH_SHIFT (8) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_MASK (0x00ff0000) +#define AH_PCIE_LINK_PARAMS_ASPM_MODE_SHIFT (16) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_MASK (0xff000000) +#define AH_PCIE_LINK_PARAMS_ASPM_CAP_SHIFT (24) +#define AH_PCIE_LINK_PARAMS (*((u32 *)(STRUCT_OFFSET(ah_pcie_link_params)))) + + u32 rsrv_persist[5]; /* Persist reserved for MFW upgrades */ +}; + +enum nvm_image_type { + NVM_TYPE_TIM1 = 0x01, + NVM_TYPE_TIM2 = 0x02, + NVM_TYPE_MIM1 = 0x03, + NVM_TYPE_MIM2 = 0x04, + NVM_TYPE_MBA = 0x05, + NVM_TYPE_MODULES_PN = 0x06, + NVM_TYPE_VPD = 0x07, + NVM_TYPE_MFW_TRACE1 = 0x08, + NVM_TYPE_MFW_TRACE2 = 0x09, + NVM_TYPE_NVM_CFG1 = 0x0a, + NVM_TYPE_L2B = 0x0b, + NVM_TYPE_DIR1 = 0x0c, + NVM_TYPE_EAGLE_FW1 = 0x0d, + NVM_TYPE_FALCON_FW1 = 0x0e, + NVM_TYPE_PCIE_FW1 = 0x0f, + NVM_TYPE_HW_SET = 0x10, + NVM_TYPE_LIM = 0x11, + NVM_TYPE_AVS_FW1 = 0x12, + NVM_TYPE_DIR2 = 0x13, + NVM_TYPE_CCM = 0x14, + NVM_TYPE_EAGLE_FW2 = 0x15, + NVM_TYPE_FALCON_FW2 = 0x16, + NVM_TYPE_PCIE_FW2 = 0x17, + NVM_TYPE_AVS_FW2 = 0x18, + NVM_TYPE_INIT_HW = 0x19, + NVM_TYPE_DEFAULT_CFG = 0x1a, + NVM_TYPE_MDUMP = 0x1b, + NVM_TYPE_META = 0x1c, + NVM_TYPE_ISCSI_CFG = 0x1d, + NVM_TYPE_FCOE_CFG = 0x1f, + NVM_TYPE_ETH_PHY_FW1 = 0x20, + NVM_TYPE_ETH_PHY_FW2 = 0x21, + NVM_TYPE_MAX, +}; + +#define DIR_ID_1 (0) + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c index e17885321faf..6e4fae9b1430 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hw.c +++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c @@ -44,8 +44,7 @@ struct qed_ptt_pool { int qed_ptt_pool_alloc(struct qed_hwfn *p_hwfn) { - struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), - GFP_KERNEL); + struct qed_ptt_pool *p_pool = kmalloc(sizeof(*p_pool), GFP_KERNEL); int i; if (!p_pool) @@ -113,16 +112,14 @@ struct qed_ptt *qed_ptt_acquire(struct qed_hwfn *p_hwfn) return NULL; } -void qed_ptt_release(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_ptt_release(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { spin_lock_bh(&p_hwfn->p_ptt_pool->lock); list_add(&p_ptt->list_entry, &p_hwfn->p_ptt_pool->free_list); spin_unlock_bh(&p_hwfn->p_ptt_pool->lock); } -u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +u32 qed_ptt_get_hw_addr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { /* The HW is using DWORDS and we need to translate it to Bytes */ return le32_to_cpu(p_ptt->pxp.offset) << 2; @@ -141,8 +138,7 @@ u32 qed_ptt_get_bar_addr(struct qed_ptt *p_ptt) } void qed_ptt_set_win(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 new_hw_addr) + struct qed_ptt *p_ptt, u32 new_hw_addr) { u32 prev_hw_addr; @@ -166,8 +162,7 @@ void qed_ptt_set_win(struct qed_hwfn *p_hwfn, } static u32 qed_set_ptt(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 hw_addr) + struct qed_ptt *p_ptt, u32 hw_addr) { u32 win_hw_addr = qed_ptt_get_hw_addr(p_hwfn, p_ptt); u32 offset; @@ -224,10 +219,7 @@ u32 qed_rd(struct qed_hwfn *p_hwfn, static void qed_memcpy_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - void *addr, - u32 hw_addr, - size_t n, - bool to_device) + void *addr, u32 hw_addr, size_t n, bool to_device) { u32 dw_count, *host_addr, hw_offset; size_t quota, done = 0; @@ -259,8 +251,7 @@ static void qed_memcpy_hw(struct qed_hwfn *p_hwfn, } void qed_memcpy_from(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - void *dest, u32 hw_addr, size_t n) + struct qed_ptt *p_ptt, void *dest, u32 hw_addr, size_t n) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "hw_addr 0x%x, dest %p hw_addr 0x%x, size %lu\n", @@ -270,8 +261,7 @@ void qed_memcpy_from(struct qed_hwfn *p_hwfn, } void qed_memcpy_to(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 hw_addr, void *src, size_t n) + struct qed_ptt *p_ptt, u32 hw_addr, void *src, size_t n) { DP_VERBOSE(p_hwfn, NETIF_MSG_HW, "hw_addr 0x%x, hw_addr 0x%x, src %p size %lu\n", @@ -280,9 +270,7 @@ void qed_memcpy_to(struct qed_hwfn *p_hwfn, qed_memcpy_hw(p_hwfn, p_ptt, src, hw_addr, n, true); } -void qed_fid_pretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 fid) +void qed_fid_pretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u16 fid) { u16 control = 0; @@ -309,8 +297,7 @@ void qed_fid_pretend(struct qed_hwfn *p_hwfn, } void qed_port_pretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u8 port_id) + struct qed_ptt *p_ptt, u8 port_id) { u16 control = 0; @@ -326,8 +313,7 @@ void qed_port_pretend(struct qed_hwfn *p_hwfn, *(u32 *)&p_ptt->pxp.pretend); } -void qed_port_unpretend(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_port_unpretend(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u16 control = 0; @@ -429,28 +415,27 @@ u32 qed_dmae_idx_to_go_cmd(u8 idx) return DMAE_REG_GO_C0 + (idx << 2); } -static int -qed_dmae_post_command(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_dmae_post_command(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt) { - struct dmae_cmd *command = p_hwfn->dmae_info.p_dmae_cmd; + struct dmae_cmd *p_command = p_hwfn->dmae_info.p_dmae_cmd; u8 idx_cmd = p_hwfn->dmae_info.channel, i; int qed_status = 0; /* verify address is not NULL */ - if ((((command->dst_addr_lo == 0) && (command->dst_addr_hi == 0)) || - ((command->src_addr_lo == 0) && (command->src_addr_hi == 0)))) { + if ((((!p_command->dst_addr_lo) && (!p_command->dst_addr_hi)) || + ((!p_command->src_addr_lo) && (!p_command->src_addr_hi)))) { DP_NOTICE(p_hwfn, "source or destination address 0 idx_cmd=%d\n" "opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n", - idx_cmd, - le32_to_cpu(command->opcode), - le16_to_cpu(command->opcode_b), - le16_to_cpu(command->length_dw), - le32_to_cpu(command->src_addr_hi), - le32_to_cpu(command->src_addr_lo), - le32_to_cpu(command->dst_addr_hi), - le32_to_cpu(command->dst_addr_lo)); + idx_cmd, + le32_to_cpu(p_command->opcode), + le16_to_cpu(p_command->opcode_b), + le16_to_cpu(p_command->length_dw), + le32_to_cpu(p_command->src_addr_hi), + le32_to_cpu(p_command->src_addr_lo), + le32_to_cpu(p_command->dst_addr_hi), + le32_to_cpu(p_command->dst_addr_lo)); return -EINVAL; } @@ -459,13 +444,13 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, NETIF_MSG_HW, "Posting DMAE command [idx %d]: opcode = [0x%08x,0x%04x] len=0x%x src=0x%x:%x dst=0x%x:%x\n", idx_cmd, - le32_to_cpu(command->opcode), - le16_to_cpu(command->opcode_b), - le16_to_cpu(command->length_dw), - le32_to_cpu(command->src_addr_hi), - le32_to_cpu(command->src_addr_lo), - le32_to_cpu(command->dst_addr_hi), - le32_to_cpu(command->dst_addr_lo)); + le32_to_cpu(p_command->opcode), + le16_to_cpu(p_command->opcode_b), + le16_to_cpu(p_command->length_dw), + le32_to_cpu(p_command->src_addr_hi), + le32_to_cpu(p_command->src_addr_lo), + le32_to_cpu(p_command->dst_addr_hi), + le32_to_cpu(p_command->dst_addr_lo)); /* Copy the command to DMAE - need to do it before every call * for source/dest address no reset. @@ -475,7 +460,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, */ for (i = 0; i < DMAE_CMD_SIZE; i++) { u32 data = (i < DMAE_CMD_SIZE_TO_FILL) ? - *(((u32 *)command) + i) : 0; + *(((u32 *)p_command) + i) : 0; qed_wr(p_hwfn, p_ptt, DMAE_REG_CMD_MEM + @@ -483,9 +468,7 @@ qed_dmae_post_command(struct qed_hwfn *p_hwfn, (i * sizeof(u32)), data); } - qed_wr(p_hwfn, p_ptt, - qed_dmae_idx_to_go_cmd(idx_cmd), - DMAE_GO_VALUE); + qed_wr(p_hwfn, p_ptt, qed_dmae_idx_to_go_cmd(idx_cmd), DMAE_GO_VALUE); return qed_status; } @@ -498,31 +481,23 @@ int qed_dmae_info_alloc(struct qed_hwfn *p_hwfn) u32 **p_comp = &p_hwfn->dmae_info.p_completion_word; *p_comp = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - sizeof(u32), - p_addr, - GFP_KERNEL); - if (!*p_comp) { - DP_NOTICE(p_hwfn, "Failed to allocate `p_completion_word'\n"); + sizeof(u32), p_addr, GFP_KERNEL); + if (!*p_comp) goto err; - } p_addr = &p_hwfn->dmae_info.dmae_cmd_phys_addr; *p_cmd = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(struct dmae_cmd), p_addr, GFP_KERNEL); - if (!*p_cmd) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct dmae_cmd'\n"); + if (!*p_cmd) goto err; - } p_addr = &p_hwfn->dmae_info.intermediate_buffer_phys_addr; *p_buff = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(u32) * DMAE_MAX_RW_SIZE, p_addr, GFP_KERNEL); - if (!*p_buff) { - DP_NOTICE(p_hwfn, "Failed to allocate `intermediate_buffer'\n"); + if (!*p_buff) goto err; - } p_hwfn->dmae_info.channel = p_hwfn->rel_pf_id; @@ -543,8 +518,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) p_phys = p_hwfn->dmae_info.completion_word_phys_addr; dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(u32), - p_hwfn->dmae_info.p_completion_word, - p_phys); + p_hwfn->dmae_info.p_completion_word, p_phys); p_hwfn->dmae_info.p_completion_word = NULL; } @@ -552,8 +526,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) p_phys = p_hwfn->dmae_info.dmae_cmd_phys_addr; dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(struct dmae_cmd), - p_hwfn->dmae_info.p_dmae_cmd, - p_phys); + p_hwfn->dmae_info.p_dmae_cmd, p_phys); p_hwfn->dmae_info.p_dmae_cmd = NULL; } @@ -571,9 +544,7 @@ void qed_dmae_info_free(struct qed_hwfn *p_hwfn) static int qed_dmae_operation_wait(struct qed_hwfn *p_hwfn) { - u32 wait_cnt = 0; - u32 wait_cnt_limit = 10000; - + u32 wait_cnt_limit = 10000, wait_cnt = 0; int qed_status = 0; barrier(); @@ -606,7 +577,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, u64 dst_addr, u8 src_type, u8 dst_type, - u32 length) + u32 length_dw) { dma_addr_t phys = p_hwfn->dmae_info.intermediate_buffer_phys_addr; struct dmae_cmd *cmd = p_hwfn->dmae_info.p_dmae_cmd; @@ -624,7 +595,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, cmd->src_addr_lo = cpu_to_le32(lower_32_bits(phys)); memcpy(&p_hwfn->dmae_info.p_intermediate_buffer[0], (void *)(uintptr_t)src_addr, - length * sizeof(u32)); + length_dw * sizeof(u32)); break; default: return -EINVAL; @@ -645,7 +616,7 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, return -EINVAL; } - cmd->length_dw = cpu_to_le16((u16)length); + cmd->length_dw = cpu_to_le16((u16)length_dw); qed_dmae_post_command(p_hwfn, p_ptt); @@ -654,16 +625,14 @@ static int qed_dmae_execute_sub_operation(struct qed_hwfn *p_hwfn, if (qed_status) { DP_NOTICE(p_hwfn, "qed_dmae_host2grc: Wait Failed. source_addr 0x%llx, grc_addr 0x%llx, size_in_dwords 0x%x\n", - src_addr, - dst_addr, - length); + src_addr, dst_addr, length_dw); return qed_status; } if (dst_type == QED_DMAE_ADDRESS_HOST_VIRT) memcpy((void *)(uintptr_t)(dst_addr), &p_hwfn->dmae_info.p_intermediate_buffer[0], - length * sizeof(u32)); + length_dw * sizeof(u32)); return 0; } @@ -730,10 +699,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, if (qed_status) { DP_NOTICE(p_hwfn, "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n", - qed_status, - src_addr, - dst_addr, - length_cur); + qed_status, src_addr, dst_addr, length_cur); break; } } @@ -743,10 +709,7 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn, int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u64 source_addr, - u32 grc_addr, - u32 size_in_dwords, - u32 flags) + u64 source_addr, u32 grc_addr, u32 size_in_dwords, u32 flags) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); struct qed_dmae_params params; @@ -768,9 +731,10 @@ int qed_dmae_host2grc(struct qed_hwfn *p_hwfn, return rc; } -int -qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, - dma_addr_t dest_addr, u32 size_in_dwords, u32 flags) +int qed_dmae_grc2host(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 grc_addr, + dma_addr_t dest_addr, u32 size_in_dwords, u32 flags) { u32 grc_addr_in_dw = grc_addr / sizeof(u32); struct qed_dmae_params params; @@ -791,12 +755,11 @@ qed_dmae_grc2host(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u32 grc_addr, return rc; } -int -qed_dmae_host2host(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - dma_addr_t source_addr, - dma_addr_t dest_addr, - u32 size_in_dwords, struct qed_dmae_params *p_params) +int qed_dmae_host2host(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + dma_addr_t source_addr, + dma_addr_t dest_addr, + u32 size_in_dwords, struct qed_dmae_params *p_params) { int rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c index 9866a20d2128..d567ba94c8d1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_init_ops.c +++ b/drivers/net/ethernet/qlogic/qed/qed_init_ops.c @@ -59,17 +59,14 @@ void qed_init_clear_rt_data(struct qed_hwfn *p_hwfn) p_hwfn->rt_data.b_valid[i] = false; } -void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, - u32 rt_offset, - u32 val) +void qed_init_store_rt_reg(struct qed_hwfn *p_hwfn, u32 rt_offset, u32 val) { p_hwfn->rt_data.init_val[rt_offset] = val; p_hwfn->rt_data.b_valid[rt_offset] = true; } void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn, - u32 rt_offset, u32 *p_val, - size_t size) + u32 rt_offset, u32 *p_val, size_t size) { size_t i; @@ -81,10 +78,7 @@ void qed_init_store_rt_agg(struct qed_hwfn *p_hwfn, static int qed_init_rt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u16 rt_offset, - u16 size, - bool b_must_dmae) + u32 addr, u16 rt_offset, u16 size, bool b_must_dmae) { u32 *p_init_val = &p_hwfn->rt_data.init_val[rt_offset]; bool *p_valid = &p_hwfn->rt_data.b_valid[rt_offset]; @@ -102,8 +96,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn, * simply write the data instead of using dmae. */ if (!b_must_dmae) { - qed_wr(p_hwfn, p_ptt, addr + (i << 2), - p_init_val[i]); + qed_wr(p_hwfn, p_ptt, addr + (i << 2), p_init_val[i]); continue; } @@ -115,7 +108,7 @@ static int qed_init_rt(struct qed_hwfn *p_hwfn, rc = qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(p_init_val + i), addr + (i << 2), segment, 0); - if (rc != 0) + if (rc) return rc; /* Jump over the entire segment, including invalid entry */ @@ -182,9 +175,7 @@ static int qed_init_array_dmae(struct qed_hwfn *p_hwfn, static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u32 fill, - u32 fill_count) + u32 addr, u32 fill, u32 fill_count) { static u32 zero_buffer[DMAE_MAX_RW_SIZE]; @@ -199,15 +190,12 @@ static int qed_init_fill_dmae(struct qed_hwfn *p_hwfn, return qed_dmae_host2grc(p_hwfn, p_ptt, (uintptr_t)(&zero_buffer[0]), - addr, fill_count, - QED_DMAE_FLAG_RW_REPL_SRC); + addr, fill_count, QED_DMAE_FLAG_RW_REPL_SRC); } static void qed_init_fill(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u32 addr, - u32 fill, - u32 fill_count) + u32 addr, u32 fill, u32 fill_count) { u32 i; @@ -218,12 +206,12 @@ static void qed_init_fill(struct qed_hwfn *p_hwfn, static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct init_write_op *cmd, - bool b_must_dmae, - bool b_can_dmae) + bool b_must_dmae, bool b_can_dmae) { + u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset); u32 data = le32_to_cpu(cmd->data); u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; - u32 dmae_array_offset = le32_to_cpu(cmd->args.array_offset); + u32 offset, output_len, input_len, max_size; struct qed_dev *cdev = p_hwfn->cdev; union init_array_hdr *hdr; @@ -233,8 +221,7 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, array_data = cdev->fw_data->arr_data; - hdr = (union init_array_hdr *)(array_data + - dmae_array_offset); + hdr = (union init_array_hdr *)(array_data + dmae_array_offset); data = le32_to_cpu(hdr->raw.data); switch (GET_FIELD(data, INIT_ARRAY_RAW_HDR_TYPE)) { case INIT_ARR_ZIPPED: @@ -290,13 +277,12 @@ static int qed_init_cmd_array(struct qed_hwfn *p_hwfn, /* init_ops write command */ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct init_write_op *cmd, - bool b_can_dmae) + struct init_write_op *p_cmd, bool b_can_dmae) { - u32 data = le32_to_cpu(cmd->data); - u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; + u32 data = le32_to_cpu(p_cmd->data); bool b_must_dmae = GET_FIELD(data, INIT_WRITE_OP_WIDE_BUS); - union init_write_args *arg = &cmd->args; + u32 addr = GET_FIELD(data, INIT_WRITE_OP_ADDRESS) << 2; + union init_write_args *arg = &p_cmd->args; int rc = 0; /* Sanitize */ @@ -309,20 +295,18 @@ static int qed_init_cmd_wr(struct qed_hwfn *p_hwfn, switch (GET_FIELD(data, INIT_WRITE_OP_SOURCE)) { case INIT_SRC_INLINE: - qed_wr(p_hwfn, p_ptt, addr, - le32_to_cpu(arg->inline_val)); + data = le32_to_cpu(p_cmd->args.inline_val); + qed_wr(p_hwfn, p_ptt, addr, data); break; case INIT_SRC_ZEROS: - if (b_must_dmae || - (b_can_dmae && (le32_to_cpu(arg->zeros_count) >= 64))) - rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, - le32_to_cpu(arg->zeros_count)); + data = le32_to_cpu(p_cmd->args.zeros_count); + if (b_must_dmae || (b_can_dmae && (data >= 64))) + rc = qed_init_fill_dmae(p_hwfn, p_ptt, addr, 0, data); else - qed_init_fill(p_hwfn, p_ptt, addr, 0, - le32_to_cpu(arg->zeros_count)); + qed_init_fill(p_hwfn, p_ptt, addr, 0, data); break; case INIT_SRC_ARRAY: - rc = qed_init_cmd_array(p_hwfn, p_ptt, cmd, + rc = qed_init_cmd_array(p_hwfn, p_ptt, p_cmd, b_must_dmae, b_can_dmae); break; case INIT_SRC_RUNTIME: @@ -353,8 +337,7 @@ static inline bool comp_or(u32 val, u32 expected_val) /* init_ops read/poll commands */ static void qed_init_cmd_rd(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct init_read_op *cmd) + struct qed_ptt *p_ptt, struct init_read_op *cmd) { bool (*comp_check)(u32 val, u32 expected_val); u32 delay = QED_INIT_POLL_PERIOD_US, val; @@ -412,35 +395,33 @@ static void qed_init_cmd_cb(struct qed_hwfn *p_hwfn, } static u8 qed_init_cmd_mode_match(struct qed_hwfn *p_hwfn, - u16 *offset, - int modes) + u16 *p_offset, int modes) { struct qed_dev *cdev = p_hwfn->cdev; const u8 *modes_tree_buf; u8 arg1, arg2, tree_val; modes_tree_buf = cdev->fw_data->modes_tree_buf; - tree_val = modes_tree_buf[(*offset)++]; + tree_val = modes_tree_buf[(*p_offset)++]; switch (tree_val) { case INIT_MODE_OP_NOT: - return qed_init_cmd_mode_match(p_hwfn, offset, modes) ^ 1; + return qed_init_cmd_mode_match(p_hwfn, p_offset, modes) ^ 1; case INIT_MODE_OP_OR: - arg1 = qed_init_cmd_mode_match(p_hwfn, offset, modes); - arg2 = qed_init_cmd_mode_match(p_hwfn, offset, modes); + arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); + arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); return arg1 | arg2; case INIT_MODE_OP_AND: - arg1 = qed_init_cmd_mode_match(p_hwfn, offset, modes); - arg2 = qed_init_cmd_mode_match(p_hwfn, offset, modes); + arg1 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); + arg2 = qed_init_cmd_mode_match(p_hwfn, p_offset, modes); return arg1 & arg2; default: tree_val -= MAX_INIT_MODE_OPS; - return (modes & (1 << tree_val)) ? 1 : 0; + return (modes & BIT(tree_val)) ? 1 : 0; } } static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn, - struct init_if_mode_op *p_cmd, - int modes) + struct init_if_mode_op *p_cmd, int modes) { u16 offset = le16_to_cpu(p_cmd->modes_buf_offset); @@ -453,8 +434,7 @@ static u32 qed_init_cmd_mode(struct qed_hwfn *p_hwfn, static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn, struct init_if_phase_op *p_cmd, - u32 phase, - u32 phase_id) + u32 phase, u32 phase_id) { u32 data = le32_to_cpu(p_cmd->phase_data); u32 op_data = le32_to_cpu(p_cmd->op_data); @@ -468,10 +448,7 @@ static u32 qed_init_cmd_phase(struct qed_hwfn *p_hwfn, } int qed_init_run(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - int phase, - int phase_id, - int modes) + struct qed_ptt *p_ptt, int phase, int phase_id, int modes) { struct qed_dev *cdev = p_hwfn->cdev; u32 cmd_num, num_init_ops; @@ -483,10 +460,8 @@ int qed_init_run(struct qed_hwfn *p_hwfn, init_ops = cdev->fw_data->init_ops; p_hwfn->unzip_buf = kzalloc(MAX_ZIPPED_SIZE * 4, GFP_ATOMIC); - if (!p_hwfn->unzip_buf) { - DP_NOTICE(p_hwfn, "Failed to allocate unzip buffer\n"); + if (!p_hwfn->unzip_buf) return -ENOMEM; - } for (cmd_num = 0; cmd_num < num_init_ops; cmd_num++) { union init_op *cmd = &init_ops[cmd_num]; @@ -557,7 +532,7 @@ int qed_init_fw_data(struct qed_dev *cdev, const u8 *data) /* First Dword contains metadata and should be skipped */ buf_hdr = (struct bin_buffer_hdr *)(data + sizeof(u32)); - offset = buf_hdr[BIN_BUF_FW_VER_INFO].offset; + offset = buf_hdr[BIN_BUF_INIT_FW_VER_INFO].offset; fw->fw_ver_info = (struct fw_ver_info *)(data + offset); offset = buf_hdr[BIN_BUF_INIT_CMD].offset; diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c index 8fa50fa23c8d..2adedc6fb6cf 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_int.c +++ b/drivers/net/ethernet/qlogic/qed/qed_int.c @@ -1775,10 +1775,9 @@ struct qed_sb_attn_info { }; static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn, - struct qed_sb_attn_info *p_sb_desc) + struct qed_sb_attn_info *p_sb_desc) { - u16 rc = 0; - u16 index; + u16 rc = 0, index; /* Make certain HW write took affect */ mmiowb(); @@ -1802,15 +1801,13 @@ static inline u16 qed_attn_update_idx(struct qed_hwfn *p_hwfn, * @param asserted_bits newly asserted bits * @return int */ -static int qed_int_assertion(struct qed_hwfn *p_hwfn, - u16 asserted_bits) +static int qed_int_assertion(struct qed_hwfn *p_hwfn, u16 asserted_bits) { struct qed_sb_attn_info *sb_attn_sw = p_hwfn->p_sb_attn; u32 igu_mask; /* Mask the source of the attention in the IGU */ - igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - IGU_REG_ATTENTION_ENABLE); + igu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE); DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "IGU mask: 0x%08x --> 0x%08x\n", igu_mask, igu_mask & ~(asserted_bits & ATTN_BITS_MASKABLE)); igu_mask &= ~(asserted_bits & ATTN_BITS_MASKABLE); @@ -2041,7 +2038,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, struct aeu_invert_reg_bit *p_bit = &p_aeu->bits[j]; if ((p_bit->flags & ATTENTION_PARITY) && - !!(parities & (1 << bit_idx))) + !!(parities & BIT(bit_idx))) qed_int_deassertion_parity(p_hwfn, p_bit, bit_idx); @@ -2114,8 +2111,7 @@ static int qed_int_deassertion(struct qed_hwfn *p_hwfn, ~((u32)deasserted_bits)); /* Unmask deasserted attentions in IGU */ - aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, - IGU_REG_ATTENTION_ENABLE); + aeu_mask = qed_rd(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE); aeu_mask |= (deasserted_bits & ATTN_BITS_MASKABLE); qed_wr(p_hwfn, p_hwfn->p_dpc_ptt, IGU_REG_ATTENTION_ENABLE, aeu_mask); @@ -2160,8 +2156,7 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) index, attn_bits, attn_acks, asserted_bits, deasserted_bits, p_sb_attn_sw->known_attn); } else if (asserted_bits == 0x100) { - DP_INFO(p_hwfn, - "MFW indication via attention\n"); + DP_INFO(p_hwfn, "MFW indication via attention\n"); } else { DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "MFW indication [deassertion]\n"); @@ -2173,18 +2168,14 @@ static int qed_int_attentions(struct qed_hwfn *p_hwfn) return rc; } - if (deasserted_bits) { + if (deasserted_bits) rc = qed_int_deassertion(p_hwfn, deasserted_bits); - if (rc) - return rc; - } return rc; } static void qed_sb_ack_attn(struct qed_hwfn *p_hwfn, - void __iomem *igu_addr, - u32 ack_cons) + void __iomem *igu_addr, u32 ack_cons) { struct igu_prod_cons_update igu_ack = { 0 }; @@ -2242,9 +2233,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie) /* Gather Interrupts/Attentions information */ if (!sb_info->sb_virt) { - DP_ERR( - p_hwfn->cdev, - "Interrupt Status block is NULL - cannot check for new interrupts!\n"); + DP_ERR(p_hwfn->cdev, + "Interrupt Status block is NULL - cannot check for new interrupts!\n"); } else { u32 tmp_index = sb_info->sb_ack; @@ -2255,9 +2245,8 @@ void qed_int_sp_dpc(unsigned long hwfn_cookie) } if (!sb_attn || !sb_attn->sb_attn) { - DP_ERR( - p_hwfn->cdev, - "Attentions Status block is NULL - cannot check for new attentions!\n"); + DP_ERR(p_hwfn->cdev, + "Attentions Status block is NULL - cannot check for new attentions!\n"); } else { u16 tmp_index = sb_attn->index; @@ -2313,8 +2302,7 @@ static void qed_int_sb_attn_free(struct qed_hwfn *p_hwfn) if (p_sb->sb_attn) dma_free_coherent(&p_hwfn->cdev->pdev->dev, SB_ATTN_ALIGNED_SIZE(p_hwfn), - p_sb->sb_attn, - p_sb->sb_phys); + p_sb->sb_attn, p_sb->sb_phys); kfree(p_sb); } @@ -2337,8 +2325,7 @@ static void qed_int_sb_attn_setup(struct qed_hwfn *p_hwfn, static void qed_int_sb_attn_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - void *sb_virt_addr, - dma_addr_t sb_phy_addr) + void *sb_virt_addr, dma_addr_t sb_phy_addr) { struct qed_sb_attn_info *sb_info = p_hwfn->p_sb_attn; int i, j, k; @@ -2378,15 +2365,13 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, { struct qed_dev *cdev = p_hwfn->cdev; struct qed_sb_attn_info *p_sb; - void *p_virt; dma_addr_t p_phys = 0; + void *p_virt; /* SB struct */ p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); - if (!p_sb) { - DP_NOTICE(cdev, "Failed to allocate `struct qed_sb_attn_info'\n"); + if (!p_sb) return -ENOMEM; - } /* SB ring */ p_virt = dma_alloc_coherent(&cdev->pdev->dev, @@ -2394,7 +2379,6 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, &p_phys, GFP_KERNEL); if (!p_virt) { - DP_NOTICE(cdev, "Failed to allocate status block (attentions)\n"); kfree(p_sb); return -ENOMEM; } @@ -2412,9 +2396,7 @@ static int qed_int_sb_attn_alloc(struct qed_hwfn *p_hwfn, void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, struct cau_sb_entry *p_sb_entry, - u8 pf_id, - u16 vf_number, - u8 vf_valid) + u8 pf_id, u16 vf_number, u8 vf_valid) { struct qed_dev *cdev = p_hwfn->cdev; u32 cau_state; @@ -2428,12 +2410,6 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET0, 0x7F); SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_SB_TIMESET1, 0x7F); - /* setting the time resultion to a fixed value ( = 1) */ - SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES0, - QED_CAU_DEF_RX_TIMER_RES); - SET_FIELD(p_sb_entry->params, CAU_SB_ENTRY_TIMER_RES1, - QED_CAU_DEF_TX_TIMER_RES); - cau_state = CAU_HC_DISABLE_STATE; if (cdev->int_coalescing_mode == QED_COAL_MODE_ENABLE) { @@ -2468,9 +2444,7 @@ void qed_init_cau_sb_entry(struct qed_hwfn *p_hwfn, void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, dma_addr_t sb_phys, - u16 igu_sb_id, - u16 vf_number, - u8 vf_valid) + u16 igu_sb_id, u16 vf_number, u8 vf_valid) { struct cau_sb_entry sb_entry; @@ -2514,8 +2488,7 @@ void qed_int_cau_conf_sb(struct qed_hwfn *p_hwfn, timer_res = 2; timeset = (u8)(p_hwfn->cdev->rx_coalesce_usecs >> timer_res); qed_int_cau_conf_pi(p_hwfn, p_ptt, igu_sb_id, RX_PI, - QED_COAL_RX_STATE_MACHINE, - timeset); + QED_COAL_RX_STATE_MACHINE, timeset); if (p_hwfn->cdev->tx_coalesce_usecs <= 0x7F) timer_res = 0; @@ -2541,8 +2514,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn, u8 timeset) { struct cau_pi_entry pi_entry; - u32 sb_offset; - u32 pi_offset; + u32 sb_offset, pi_offset; if (IS_VF(p_hwfn->cdev)) return; @@ -2569,8 +2541,7 @@ void qed_int_cau_conf_pi(struct qed_hwfn *p_hwfn, } void qed_int_sb_setup(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - struct qed_sb_info *sb_info) + struct qed_ptt *p_ptt, struct qed_sb_info *sb_info) { /* zero status block and ack counter */ sb_info->sb_ack = 0; @@ -2590,8 +2561,7 @@ void qed_int_sb_setup(struct qed_hwfn *p_hwfn, * * @return u16 */ -static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, - u16 sb_id) +static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) { u16 igu_sb_id; @@ -2603,8 +2573,12 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, else igu_sb_id = qed_vf_get_igu_sb_id(p_hwfn, sb_id); - DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, "SB [%s] index is 0x%04x\n", - (sb_id == QED_SP_SB_ID) ? "DSB" : "non-DSB", igu_sb_id); + if (sb_id == QED_SP_SB_ID) + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, + "Slowpath SB index in IGU is 0x%04x\n", igu_sb_id); + else + DP_VERBOSE(p_hwfn, NETIF_MSG_INTR, + "SB [%04x] <--> IGU SB [%04x]\n", sb_id, igu_sb_id); return igu_sb_id; } @@ -2612,9 +2586,7 @@ static u16 qed_get_igu_sb_id(struct qed_hwfn *p_hwfn, int qed_int_sb_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, struct qed_sb_info *sb_info, - void *sb_virt_addr, - dma_addr_t sb_phy_addr, - u16 sb_id) + void *sb_virt_addr, dma_addr_t sb_phy_addr, u16 sb_id) { sb_info->sb_virt = sb_virt_addr; sb_info->sb_phys = sb_phy_addr; @@ -2650,8 +2622,7 @@ int qed_int_sb_init(struct qed_hwfn *p_hwfn, } int qed_int_sb_release(struct qed_hwfn *p_hwfn, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { if (sb_id == QED_SP_SB_ID) { DP_ERR(p_hwfn, "Do Not free sp sb using this function"); @@ -2685,8 +2656,7 @@ static void qed_int_sp_sb_free(struct qed_hwfn *p_hwfn) kfree(p_sb); } -static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_sb_sp_info *p_sb; dma_addr_t p_phys = 0; @@ -2694,17 +2664,14 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, /* SB struct */ p_sb = kmalloc(sizeof(*p_sb), GFP_KERNEL); - if (!p_sb) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sb_info'\n"); + if (!p_sb) return -ENOMEM; - } /* SB ring */ p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, SB_ALIGNED_SIZE(p_hwfn), &p_phys, GFP_KERNEL); if (!p_virt) { - DP_NOTICE(p_hwfn, "Failed to allocate status block\n"); kfree(p_sb); return -ENOMEM; } @@ -2721,9 +2688,7 @@ static int qed_int_sp_sb_alloc(struct qed_hwfn *p_hwfn, int qed_int_register_cb(struct qed_hwfn *p_hwfn, qed_int_comp_cb_t comp_cb, - void *cookie, - u8 *sb_idx, - __le16 **p_fw_cons) + void *cookie, u8 *sb_idx, __le16 **p_fw_cons) { struct qed_sb_sp_info *p_sp_sb = p_hwfn->p_sp_sb; int rc = -ENOMEM; @@ -2764,8 +2729,7 @@ u16 qed_int_get_sp_sb_id(struct qed_hwfn *p_hwfn) } void qed_int_igu_enable_int(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - enum qed_int_mode int_mode) + struct qed_ptt *p_ptt, enum qed_int_mode int_mode) { u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN | IGU_PF_CONF_ATTN_BIT_EN; @@ -2809,7 +2773,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, qed_wr(p_hwfn, p_ptt, MISC_REG_AEU_MASK_ATTN_IGU, 0xff); if ((int_mode != QED_INT_MODE_INTA) || IS_LEAD_HWFN(p_hwfn)) { rc = qed_slowpath_irq_req(p_hwfn); - if (rc != 0) { + if (rc) { DP_NOTICE(p_hwfn, "Slowpath IRQ request failed\n"); return -EINVAL; } @@ -2822,8 +2786,7 @@ int qed_int_igu_enable(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return rc; } -void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_int_igu_disable_int(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { p_hwfn->b_int_enabled = 0; @@ -2950,13 +2913,11 @@ void qed_int_igu_init_pure_rt(struct qed_hwfn *p_hwfn, p_hwfn->hw_info.opaque_fid, b_set); } -static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u16 sb_id) +static u32 qed_int_igu_read_cam_block(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u16 sb_id) { u32 val = qed_rd(p_hwfn, p_ptt, - IGU_REG_MAPPING_MEMORY + - sizeof(u32) * sb_id); + IGU_REG_MAPPING_MEMORY + sizeof(u32) * sb_id); struct qed_igu_block *p_block; p_block = &p_hwfn->hw_info.p_igu_info->igu_map.igu_blocks[sb_id]; @@ -2983,8 +2944,7 @@ out: return val; } -int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_igu_info *p_igu_info; u32 val, min_vf = 0, max_vf = 0; @@ -2993,7 +2953,6 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, u16 prev_sb_id = 0xFF; p_hwfn->hw_info.p_igu_info = kzalloc(sizeof(*p_igu_info), GFP_KERNEL); - if (!p_hwfn->hw_info.p_igu_info) return -ENOMEM; @@ -3104,22 +3063,19 @@ int qed_int_igu_read_cam(struct qed_hwfn *p_hwfn, */ void qed_int_igu_init_rt(struct qed_hwfn *p_hwfn) { - u32 igu_pf_conf = 0; - - igu_pf_conf |= IGU_PF_CONF_FUNC_EN; + u32 igu_pf_conf = IGU_PF_CONF_FUNC_EN; STORE_RT_REG(p_hwfn, IGU_REG_PF_CONFIGURATION_RT_OFFSET, igu_pf_conf); } u64 qed_int_igu_read_sisr_reg(struct qed_hwfn *p_hwfn) { - u64 intr_status = 0; - u32 intr_status_lo = 0; - u32 intr_status_hi = 0; u32 lsb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_LSB_UPPER - IGU_CMD_INT_ACK_BASE; u32 msb_igu_cmd_addr = IGU_REG_SISR_MDPC_WMASK_MSB_UPPER - IGU_CMD_INT_ACK_BASE; + u32 intr_status_hi = 0, intr_status_lo = 0; + u64 intr_status = 0; intr_status_lo = REG_RD(p_hwfn, GTT_BAR0_MAP_REG_IGU_CMD + @@ -3153,26 +3109,20 @@ static void qed_int_sp_dpc_free(struct qed_hwfn *p_hwfn) kfree(p_hwfn->sp_dpc); } -int qed_int_alloc(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_int_alloc(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { int rc = 0; rc = qed_int_sp_dpc_alloc(p_hwfn); - if (rc) { - DP_ERR(p_hwfn->cdev, "Failed to allocate sp dpc mem\n"); + if (rc) return rc; - } + rc = qed_int_sp_sb_alloc(p_hwfn, p_ptt); - if (rc) { - DP_ERR(p_hwfn->cdev, "Failed to allocate sp sb mem\n"); + if (rc) return rc; - } + rc = qed_int_sb_attn_alloc(p_hwfn, p_ptt); - if (rc) { - DP_ERR(p_hwfn->cdev, "Failed to allocate sb attn mem\n"); - return rc; - } + return rc; } @@ -3183,8 +3133,7 @@ void qed_int_free(struct qed_hwfn *p_hwfn) qed_int_sp_dpc_free(p_hwfn); } -void qed_int_setup(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_int_setup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { qed_int_sb_setup(p_hwfn, p_ptt, &p_hwfn->p_sp_sb->sb_info); qed_int_sb_attn_setup(p_hwfn, p_ptt); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 401e738543b5..ddd410a91e13 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -52,7 +52,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, u16 rx_mode = 0; rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -80,8 +80,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->rx_mode.state = cpu_to_le16(rx_mode); /* TPA related fields */ - memset(&p_ramrod->tpa_param, 0, - sizeof(struct eth_vport_tpa_param)); + memset(&p_ramrod->tpa_param, 0, sizeof(struct eth_vport_tpa_param)); p_ramrod->tpa_param.max_buff_num = p_params->max_buffers_per_cqe; @@ -102,6 +101,9 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->tx_switching_en = p_params->tx_switching; + p_ramrod->ctl_frame_mac_check_en = !!p_params->check_mac; + p_ramrod->ctl_frame_ethtype_check_en = !!p_params->check_ethtype; + /* Software Function ID in hwfn (PFs are 0 - 15, VFs are 16 - 135) */ p_ramrod->sw_fid = qed_concrete_to_sw_fid(p_hwfn->cdev, p_params->concrete_fid); @@ -109,8 +111,8 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -int qed_sp_vport_start(struct qed_hwfn *p_hwfn, - struct qed_sp_vport_start_params *p_params) +static int qed_sp_vport_start(struct qed_hwfn *p_hwfn, + struct qed_sp_vport_start_params *p_params) { if (IS_VF(p_hwfn->cdev)) { return qed_vf_pf_vport_start(p_hwfn, p_params->vport_id, @@ -306,14 +308,14 @@ qed_sp_update_mcast_bin(struct qed_hwfn *p_hwfn, memset(&p_ramrod->approx_mcast.bins, 0, sizeof(p_ramrod->approx_mcast.bins)); - if (p_params->update_approx_mcast_flg) { - p_ramrod->common.update_approx_mcast_flg = 1; - for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { - u32 *p_bins = (u32 *)p_params->bins; - __le32 val = cpu_to_le32(p_bins[i]); + if (!p_params->update_approx_mcast_flg) + return; - p_ramrod->approx_mcast.bins[i] = val; - } + p_ramrod->common.update_approx_mcast_flg = 1; + for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { + u32 *p_bins = (u32 *)p_params->bins; + + p_ramrod->approx_mcast.bins[i] = cpu_to_le32(p_bins[i]); } } @@ -336,7 +338,7 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, } rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -361,8 +363,8 @@ int qed_sp_vport_update(struct qed_hwfn *p_hwfn, p_cmn->tx_active_flg = p_params->vport_active_tx_flg; p_cmn->update_tx_active_flg = p_params->update_vport_active_tx_flg; p_cmn->accept_any_vlan = p_params->accept_any_vlan; - p_cmn->update_accept_any_vlan_flg = - p_params->update_accept_any_vlan_flg; + val = p_params->update_accept_any_vlan_flg; + p_cmn->update_accept_any_vlan_flg = val; p_cmn->inner_vlan_removal_en = p_params->inner_vlan_removal_flg; val = p_params->update_inner_vlan_removal_flg; @@ -411,7 +413,7 @@ int qed_sp_vport_stop(struct qed_hwfn *p_hwfn, u16 opaque_fid, u8 vport_id) return qed_vf_pf_vport_stop(p_hwfn); rc = qed_fw_vport(p_hwfn, vport_id, &abs_vport_id); - if (rc != 0) + if (rc) return rc; memset(&init_data, 0, sizeof(init_data)); @@ -476,7 +478,7 @@ static int qed_filter_accept_cmd(struct qed_dev *cdev, rc = qed_sp_vport_update(p_hwfn, &vport_update_params, comp_mode, p_comp_data); - if (rc != 0) { + if (rc) { DP_ERR(cdev, "Update rx_mode failed %d\n", rc); return rc; } @@ -511,11 +513,12 @@ static int qed_sp_release_queue_cid( int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u16 opaque_fid, u32 cid, - struct qed_queue_start_common_params *params, + struct qed_queue_start_common_params *p_params, u8 stats_id, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, - dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size) + dma_addr_t cqe_pbl_addr, + u16 cqe_pbl_size, bool b_use_zone_a_prod) { struct rx_queue_start_ramrod_data *p_ramrod = NULL; struct qed_spq_entry *p_ent = NULL; @@ -526,23 +529,23 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, int rc = -EINVAL; /* Store information for the stop */ - p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id]; - p_rx_cid->cid = cid; - p_rx_cid->opaque_fid = opaque_fid; - p_rx_cid->vport_id = params->vport_id; + p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id]; + p_rx_cid->cid = cid; + p_rx_cid->opaque_fid = opaque_fid; + p_rx_cid->vport_id = p_params->vport_id; - rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_vport_id); - if (rc != 0) + rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_vport_id); + if (rc) return rc; - rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_rx_q_id); - if (rc != 0) + rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_rx_q_id); + if (rc) return rc; DP_VERBOSE(p_hwfn, QED_MSG_SP, "opaque_fid=0x%x, cid=0x%x, rx_qid=0x%x, vport_id=0x%x, sb_id=0x%x\n", - opaque_fid, cid, params->queue_id, params->vport_id, - params->sb); + opaque_fid, + cid, p_params->queue_id, p_params->vport_id, p_params->sb); /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -558,24 +561,28 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, p_ramrod = &p_ent->ramrod.rx_queue_start; - p_ramrod->sb_id = cpu_to_le16(params->sb); - p_ramrod->sb_index = params->sb_idx; - p_ramrod->vport_id = abs_vport_id; - p_ramrod->stats_counter_id = stats_id; - p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id); - p_ramrod->complete_cqe_flg = 0; - p_ramrod->complete_event_flg = 1; + p_ramrod->sb_id = cpu_to_le16(p_params->sb); + p_ramrod->sb_index = p_params->sb_idx; + p_ramrod->vport_id = abs_vport_id; + p_ramrod->stats_counter_id = stats_id; + p_ramrod->rx_queue_id = cpu_to_le16(abs_rx_q_id); + p_ramrod->complete_cqe_flg = 0; + p_ramrod->complete_event_flg = 1; - p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes); + p_ramrod->bd_max_bytes = cpu_to_le16(bd_max_bytes); DMA_REGPAIR_LE(p_ramrod->bd_base, bd_chain_phys_addr); - p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); + p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, cqe_pbl_addr); - p_ramrod->vf_rx_prod_index = params->vf_qid; - if (params->vf_qid) + if (p_params->vf_qid || b_use_zone_a_prod) { + p_ramrod->vf_rx_prod_index = p_params->vf_qid; DP_VERBOSE(p_hwfn, QED_MSG_SP, - "Queue is meant for VF rxq[%04x]\n", params->vf_qid); + "Queue%s is meant for VF rxq[%02x]\n", + b_use_zone_a_prod ? " [legacy]" : "", + p_params->vf_qid); + p_ramrod->vf_rx_prod_use_zone_a = b_use_zone_a_prod; + } return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -583,7 +590,7 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, static int qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, u16 opaque_fid, - struct qed_queue_start_common_params *params, + struct qed_queue_start_common_params *p_params, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, dma_addr_t cqe_pbl_addr, @@ -597,20 +604,20 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, if (IS_VF(p_hwfn->cdev)) { return qed_vf_pf_rxq_start(p_hwfn, - params->queue_id, - params->sb, - params->sb_idx, + p_params->queue_id, + p_params->sb, + (u8)p_params->sb_idx, bd_max_bytes, bd_chain_phys_addr, cqe_pbl_addr, cqe_pbl_size, pp_prod); } - rc = qed_fw_l2_queue(p_hwfn, params->queue_id, &abs_l2_queue); - if (rc != 0) + rc = qed_fw_l2_queue(p_hwfn, p_params->queue_id, &abs_l2_queue); + if (rc) return rc; - rc = qed_fw_vport(p_hwfn, params->vport_id, &abs_stats_id); - if (rc != 0) + rc = qed_fw_vport(p_hwfn, p_params->vport_id, &abs_stats_id); + if (rc) return rc; *pp_prod = (u8 __iomem *)p_hwfn->regview + @@ -622,9 +629,8 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, (u32 *)(&init_prod_val)); /* Allocate a CID for the queue */ - p_rx_cid = &p_hwfn->p_rx_cids[params->queue_id]; - rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, - &p_rx_cid->cid); + p_rx_cid = &p_hwfn->p_rx_cids[p_params->queue_id]; + rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_rx_cid->cid); if (rc) { DP_NOTICE(p_hwfn, "Failed to acquire cid\n"); return rc; @@ -634,14 +640,13 @@ qed_sp_eth_rx_queue_start(struct qed_hwfn *p_hwfn, rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, opaque_fid, p_rx_cid->cid, - params, + p_params, abs_stats_id, bd_max_bytes, bd_chain_phys_addr, - cqe_pbl_addr, - cqe_pbl_size); + cqe_pbl_addr, cqe_pbl_size, false); - if (rc != 0) + if (rc) qed_sp_release_queue_cid(p_hwfn, p_rx_cid); return rc; @@ -788,21 +793,20 @@ int qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, if (rc) return rc; - p_ramrod = &p_ent->ramrod.tx_queue_start; - p_ramrod->vport_id = abs_vport_id; + p_ramrod = &p_ent->ramrod.tx_queue_start; + p_ramrod->vport_id = abs_vport_id; - p_ramrod->sb_id = cpu_to_le16(p_params->sb); - p_ramrod->sb_index = p_params->sb_idx; - p_ramrod->stats_counter_id = stats_id; + p_ramrod->sb_id = cpu_to_le16(p_params->sb); + p_ramrod->sb_index = p_params->sb_idx; + p_ramrod->stats_counter_id = stats_id; - p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id); - p_ramrod->pbl_size = cpu_to_le16(pbl_size); + p_ramrod->queue_zone_id = cpu_to_le16(abs_tx_q_id); + + p_ramrod->pbl_size = cpu_to_le16(pbl_size); DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, pbl_addr); - pq_id = qed_get_qm_pq(p_hwfn, - PROTOCOLID_ETH, - p_pq_params); - p_ramrod->qm_pq_id = cpu_to_le16(pq_id); + pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_ETH, p_pq_params); + p_ramrod->qm_pq_id = cpu_to_le16(pq_id); return qed_spq_post(p_hwfn, p_ent, NULL); } @@ -836,8 +840,7 @@ qed_sp_eth_tx_queue_start(struct qed_hwfn *p_hwfn, memset(&pq_params, 0, sizeof(pq_params)); /* Allocate a CID for the queue */ - rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, - &p_tx_cid->cid); + rc = qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_ETH, &p_tx_cid->cid); if (rc) { DP_NOTICE(p_hwfn, "Failed to acquire cid\n"); return rc; @@ -896,8 +899,7 @@ int qed_sp_eth_tx_queue_stop(struct qed_hwfn *p_hwfn, u16 tx_queue_id) return qed_sp_release_queue_cid(p_hwfn, p_tx_cid); } -static enum eth_filter_action -qed_filter_action(enum qed_filter_opcode opcode) +static enum eth_filter_action qed_filter_action(enum qed_filter_opcode opcode) { enum eth_filter_action action = MAX_ETH_FILTER_ACTION; @@ -1033,19 +1035,19 @@ qed_filter_ucast_common(struct qed_hwfn *p_hwfn, p_first_filter->vni = cpu_to_le32(p_filter_cmd->vni); if (p_filter_cmd->opcode == QED_FILTER_MOVE) { - p_second_filter->type = p_first_filter->type; - p_second_filter->mac_msb = p_first_filter->mac_msb; - p_second_filter->mac_mid = p_first_filter->mac_mid; - p_second_filter->mac_lsb = p_first_filter->mac_lsb; - p_second_filter->vlan_id = p_first_filter->vlan_id; - p_second_filter->vni = p_first_filter->vni; + p_second_filter->type = p_first_filter->type; + p_second_filter->mac_msb = p_first_filter->mac_msb; + p_second_filter->mac_mid = p_first_filter->mac_mid; + p_second_filter->mac_lsb = p_first_filter->mac_lsb; + p_second_filter->vlan_id = p_first_filter->vlan_id; + p_second_filter->vni = p_first_filter->vni; p_first_filter->action = ETH_FILTER_ACTION_REMOVE; p_first_filter->vport_id = vport_to_remove_from; - p_second_filter->action = ETH_FILTER_ACTION_ADD; - p_second_filter->vport_id = vport_to_add_to; + p_second_filter->action = ETH_FILTER_ACTION_ADD; + p_second_filter->vport_id = vport_to_add_to; } else if (p_filter_cmd->opcode == QED_FILTER_REPLACE) { p_first_filter->vport_id = vport_to_add_to; memcpy(p_second_filter, p_first_filter, @@ -1086,7 +1088,7 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, rc = qed_filter_ucast_common(p_hwfn, opaque_fid, p_filter_cmd, &p_ramrod, &p_ent, comp_mode, p_comp_data); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "Uni. filter command failed %d\n", rc); return rc; } @@ -1094,10 +1096,8 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, p_header->assert_on_error = p_filter_cmd->assert_on_error; rc = qed_spq_post(p_hwfn, p_ent, NULL); - if (rc != 0) { - DP_ERR(p_hwfn, - "Unicast filter ADD command failed %d\n", - rc); + if (rc) { + DP_ERR(p_hwfn, "Unicast filter ADD command failed %d\n", rc); return rc; } @@ -1136,15 +1136,10 @@ int qed_sp_eth_filter_ucast(struct qed_hwfn *p_hwfn, * Return: ******************************************************************************/ static u32 qed_calc_crc32c(u8 *crc32_packet, - u32 crc32_length, - u32 crc32_seed, - u8 complement) + u32 crc32_length, u32 crc32_seed, u8 complement) { - u32 byte = 0; - u32 bit = 0; - u8 msb = 0; - u8 current_byte = 0; - u32 crc32_result = crc32_seed; + u32 byte = 0, bit = 0, crc32_result = crc32_seed; + u8 msb = 0, current_byte = 0; if ((!crc32_packet) || (crc32_length == 0) || @@ -1164,9 +1159,7 @@ static u32 qed_calc_crc32c(u8 *crc32_packet, return crc32_result; } -static inline u32 qed_crc32c_le(u32 seed, - u8 *mac, - u32 len) +static u32 qed_crc32c_le(u32 seed, u8 *mac, u32 len) { u32 packet_buf[2] = { 0 }; @@ -1196,17 +1189,14 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, u8 abs_vport_id = 0; int rc, i; - if (p_filter_cmd->opcode == QED_FILTER_ADD) { + if (p_filter_cmd->opcode == QED_FILTER_ADD) rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_add_to, &abs_vport_id); - if (rc) - return rc; - } else { + else rc = qed_fw_vport(p_hwfn, p_filter_cmd->vport_to_remove_from, &abs_vport_id); - if (rc) - return rc; - } + if (rc) + return rc; /* Get SPQ entry */ memset(&init_data, 0, sizeof(init_data)); @@ -1244,11 +1234,11 @@ qed_sp_eth_filter_mcast(struct qed_hwfn *p_hwfn, /* Convert to correct endianity */ for (i = 0; i < ETH_MULTICAST_MAC_BINS_IN_REGS; i++) { + struct vport_update_ramrod_mcast *p_ramrod_bins; u32 *p_bins = (u32 *)bins; - struct vport_update_ramrod_mcast *approx_mcast; - approx_mcast = &p_ramrod->approx_mcast; - approx_mcast->bins[i] = cpu_to_le32(p_bins[i]); + p_ramrod_bins = &p_ramrod->approx_mcast; + p_ramrod_bins->bins[i] = cpu_to_le32(p_bins[i]); } } @@ -1286,8 +1276,7 @@ static int qed_filter_mcast_cmd(struct qed_dev *cdev, rc = qed_sp_eth_filter_mcast(p_hwfn, opaque_fid, p_filter_cmd, - comp_mode, - p_comp_data); + comp_mode, p_comp_data); } return rc; } @@ -1314,9 +1303,8 @@ static int qed_filter_ucast_cmd(struct qed_dev *cdev, rc = qed_sp_eth_filter_ucast(p_hwfn, opaque_fid, p_filter_cmd, - comp_mode, - p_comp_data); - if (rc != 0) + comp_mode, p_comp_data); + if (rc) break; } @@ -1590,8 +1578,7 @@ out: } } -void qed_get_vport_stats(struct qed_dev *cdev, - struct qed_eth_stats *stats) +void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats) { u32 i; @@ -1698,6 +1685,8 @@ static int qed_fill_eth_dev_info(struct qed_dev *cdev, qed_vf_get_num_vlan_filters(&cdev->hwfns[0], &info->num_vlan_filters); qed_vf_get_port_mac(&cdev->hwfns[0], info->port_mac); + + info->is_legacy = !!cdev->hwfns[0].vf_iov_info->b_pre_fp_hsi; } qed_fill_dev_info(cdev, &info->common); @@ -1766,8 +1755,7 @@ static int qed_start_vport(struct qed_dev *cdev, return 0; } -static int qed_stop_vport(struct qed_dev *cdev, - u8 vport_id) +static int qed_stop_vport(struct qed_dev *cdev, u8 vport_id) { int rc, i; @@ -1775,8 +1763,7 @@ static int qed_stop_vport(struct qed_dev *cdev, struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; rc = qed_sp_vport_stop(p_hwfn, - p_hwfn->hw_info.opaque_fid, - vport_id); + p_hwfn->hw_info.opaque_fid, vport_id); if (rc) { DP_ERR(cdev, "Failed to stop VPORT\n"); @@ -1801,10 +1788,8 @@ static int qed_update_vport(struct qed_dev *cdev, /* Translate protocol params into sp params */ sp_params.vport_id = params->vport_id; - sp_params.update_vport_active_rx_flg = - params->update_vport_active_flg; - sp_params.update_vport_active_tx_flg = - params->update_vport_active_flg; + sp_params.update_vport_active_rx_flg = params->update_vport_active_flg; + sp_params.update_vport_active_tx_flg = params->update_vport_active_flg; sp_params.vport_active_rx_flg = params->vport_active_flg; sp_params.vport_active_tx_flg = params->vport_active_flg; sp_params.update_tx_switching_flg = params->update_tx_switching_flg; @@ -1817,8 +1802,7 @@ static int qed_update_vport(struct qed_dev *cdev, * We need to re-fix the rss values per engine for CMT. */ if (cdev->num_hwfns > 1 && params->update_rss_flg) { - struct qed_update_vport_rss_params *rss = - ¶ms->rss_params; + struct qed_update_vport_rss_params *rss = ¶ms->rss_params; int k, max = 0; /* Find largest entry, since it's possible RSS needs to @@ -1861,8 +1845,8 @@ static int qed_update_vport(struct qed_dev *cdev, QED_RSS_IND_TABLE_SIZE * sizeof(u16)); memcpy(sp_rss_params.rss_key, params->rss_params.rss_key, QED_RSS_KEY_SIZE * sizeof(u32)); + sp_params.rss_params = &sp_rss_params; } - sp_params.rss_params = &sp_rss_params; for_each_hwfn(cdev, i) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -1893,8 +1877,8 @@ static int qed_start_rxq(struct qed_dev *cdev, u16 cqe_pbl_size, void __iomem **pp_prod) { - int rc, hwfn_index; struct qed_hwfn *p_hwfn; + int rc, hwfn_index; hwfn_index = params->rss_id % cdev->num_hwfns; p_hwfn = &cdev->hwfns[hwfn_index]; @@ -1935,8 +1919,7 @@ static int qed_stop_rxq(struct qed_dev *cdev, rc = qed_sp_eth_rx_queue_stop(p_hwfn, params->rx_queue_id / cdev->num_hwfns, - params->eq_completion_only, - false); + params->eq_completion_only, false); if (rc) { DP_ERR(cdev, "Failed to stop RXQ#%d\n", params->rx_queue_id); return rc; @@ -2047,11 +2030,11 @@ static int qed_configure_filter_rx_mode(struct qed_dev *cdev, memset(&accept_flags, 0, sizeof(accept_flags)); - accept_flags.update_rx_mode_config = 1; - accept_flags.update_tx_mode_config = 1; - accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED | - QED_ACCEPT_MCAST_MATCHED | - QED_ACCEPT_BCAST; + accept_flags.update_rx_mode_config = 1; + accept_flags.update_tx_mode_config = 1; + accept_flags.rx_accept_filter = QED_ACCEPT_UCAST_MATCHED | + QED_ACCEPT_MCAST_MATCHED | + QED_ACCEPT_BCAST; accept_flags.tx_accept_filter = QED_ACCEPT_UCAST_MATCHED | QED_ACCEPT_MCAST_MATCHED | QED_ACCEPT_BCAST; @@ -2072,9 +2055,8 @@ static int qed_configure_filter_ucast(struct qed_dev *cdev, struct qed_filter_ucast ucast; if (!params->vlan_valid && !params->mac_valid) { - DP_NOTICE( - cdev, - "Tried configuring a unicast filter, but both MAC and VLAN are not set\n"); + DP_NOTICE(cdev, + "Tried configuring a unicast filter, but both MAC and VLAN are not set\n"); return -EINVAL; } @@ -2135,8 +2117,7 @@ static int qed_configure_filter_mcast(struct qed_dev *cdev, for (i = 0; i < mcast.num_mc_addrs; i++) ether_addr_copy(mcast.mac[i], params->mac[i]); - return qed_filter_mcast_cmd(cdev, &mcast, - QED_SPQ_MODE_CB, NULL); + return qed_filter_mcast_cmd(cdev, &mcast, QED_SPQ_MODE_CB, NULL); } static int qed_configure_filter(struct qed_dev *cdev, @@ -2153,15 +2134,13 @@ static int qed_configure_filter(struct qed_dev *cdev, accept_flags = params->filter.accept_flags; return qed_configure_filter_rx_mode(cdev, accept_flags); default: - DP_NOTICE(cdev, "Unknown filter type %d\n", - (int)params->type); + DP_NOTICE(cdev, "Unknown filter type %d\n", (int)params->type); return -EINVAL; } } static int qed_fp_cqe_completion(struct qed_dev *dev, - u8 rss_id, - struct eth_slow_path_rx_cqe *cqe) + u8 rss_id, struct eth_slow_path_rx_cqe *cqe) { return qed_eth_cqe_completion(&dev->hwfns[rss_id % dev->num_hwfns], cqe); diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.h b/drivers/net/ethernet/qlogic/qed/qed_l2.h index 002114543451..e495d62fcc03 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.h +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.h @@ -102,6 +102,8 @@ struct qed_sp_vport_start_params { u16 opaque_fid; u8 vport_id; u16 mtu; + bool check_mac; + bool check_ethtype; }; int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, @@ -213,6 +215,8 @@ qed_sp_eth_rx_queues_update(struct qed_hwfn *p_hwfn, enum spq_mode comp_mode, struct qed_spq_comp_cb *p_comp_data); +void qed_get_vport_stats(struct qed_dev *cdev, struct qed_eth_stats *stats); + int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, struct qed_sp_vport_start_params *p_params); @@ -223,7 +227,8 @@ int qed_sp_eth_rxq_start_ramrod(struct qed_hwfn *p_hwfn, u8 stats_id, u16 bd_max_bytes, dma_addr_t bd_chain_phys_addr, - dma_addr_t cqe_pbl_addr, u16 cqe_pbl_size); + dma_addr_t cqe_pbl_addr, + u16 cqe_pbl_size, bool b_use_zone_a_prod); int qed_sp_eth_txq_start_ramrod(struct qed_hwfn *p_hwfn, u16 opaque_fid, diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c new file mode 100644 index 000000000000..a6db10717d5c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -0,0 +1,1792 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" + +#define QED_LL2_RX_REGISTERED(ll2) ((ll2)->rx_queue.b_cb_registred) +#define QED_LL2_TX_REGISTERED(ll2) ((ll2)->tx_queue.b_cb_registred) + +#define QED_LL2_TX_SIZE (256) +#define QED_LL2_RX_SIZE (4096) + +struct qed_cb_ll2_info { + int rx_cnt; + u32 rx_size; + u8 handle; + bool frags_mapped; + + /* Lock protecting LL2 buffer lists in sleepless context */ + spinlock_t lock; + struct list_head list; + + const struct qed_ll2_cb_ops *cbs; + void *cb_cookie; +}; + +struct qed_ll2_buffer { + struct list_head list; + void *data; + dma_addr_t phys_addr; +}; + +static void qed_ll2b_complete_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, + bool b_last_packet) +{ + struct qed_dev *cdev = p_hwfn->cdev; + struct sk_buff *skb = cookie; + + /* All we need to do is release the mapping */ + dma_unmap_single(&p_hwfn->cdev->pdev->dev, first_frag_addr, + skb_headlen(skb), DMA_TO_DEVICE); + + if (cdev->ll2->cbs && cdev->ll2->cbs->tx_cb) + cdev->ll2->cbs->tx_cb(cdev->ll2->cb_cookie, skb, + b_last_fragment); + + if (cdev->ll2->frags_mapped) + /* Case where mapped frags were received, need to + * free skb with nr_frags marked as 0 + */ + skb_shinfo(skb)->nr_frags = 0; + + dev_kfree_skb_any(skb); +} + +static int qed_ll2_alloc_buffer(struct qed_dev *cdev, + u8 **data, dma_addr_t *phys_addr) +{ + *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + if (!(*data)) { + DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); + return -ENOMEM; + } + + *phys_addr = dma_map_single(&cdev->pdev->dev, + ((*data) + NET_SKB_PAD), + cdev->ll2->rx_size, DMA_FROM_DEVICE); + if (dma_mapping_error(&cdev->pdev->dev, *phys_addr)) { + DP_INFO(cdev, "Failed to map LL2 buffer data\n"); + kfree((*data)); + return -ENOMEM; + } + + return 0; +} + +static int qed_ll2_dealloc_buffer(struct qed_dev *cdev, + struct qed_ll2_buffer *buffer) +{ + spin_lock_bh(&cdev->ll2->lock); + + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + + cdev->ll2->rx_cnt--; + if (!cdev->ll2->rx_cnt) + DP_INFO(cdev, "All LL2 entries were removed\n"); + + spin_unlock_bh(&cdev->ll2->lock); + + return 0; +} + +static void qed_ll2_kill_buffers(struct qed_dev *cdev) +{ + struct qed_ll2_buffer *buffer, *tmp_buffer; + + list_for_each_entry_safe(buffer, tmp_buffer, &cdev->ll2->list, list) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +void qed_ll2b_complete_rx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + struct qed_ll2_rx_packet *p_pkt, + struct core_rx_fast_path_cqe *p_cqe, + bool b_last_packet) +{ + u16 packet_length = le16_to_cpu(p_cqe->packet_length); + struct qed_ll2_buffer *buffer = p_pkt->cookie; + struct qed_dev *cdev = p_hwfn->cdev; + u16 vlan = le16_to_cpu(p_cqe->vlan); + u32 opaque_data_0, opaque_data_1; + u8 pad = p_cqe->placement_offset; + dma_addr_t new_phys_addr; + struct sk_buff *skb; + bool reuse = false; + int rc = -EINVAL; + u8 *new_data; + + opaque_data_0 = le32_to_cpu(p_cqe->opaque_data.data[0]); + opaque_data_1 = le32_to_cpu(p_cqe->opaque_data.data[1]); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_RX_STATUS | QED_MSG_STORAGE | NETIF_MSG_PKTDATA), + "Got an LL2 Rx completion: [Buffer at phys 0x%llx, offset 0x%02x] Length 0x%04x Parse_flags 0x%04x vlan 0x%04x Opaque data [0x%08x:0x%08x]\n", + (u64)p_pkt->rx_buf_addr, pad, packet_length, + le16_to_cpu(p_cqe->parse_flags.flags), vlan, + opaque_data_0, opaque_data_1); + + if ((cdev->dp_module & NETIF_MSG_PKTDATA) && buffer->data) { + print_hex_dump(KERN_INFO, "", + DUMP_PREFIX_OFFSET, 16, 1, + buffer->data, packet_length, false); + } + + /* Determine if data is valid */ + if (packet_length < ETH_HLEN) + reuse = true; + + /* Allocate a replacement for buffer; Reuse upon failure */ + if (!reuse) + rc = qed_ll2_alloc_buffer(p_hwfn->cdev, &new_data, + &new_phys_addr); + + /* If need to reuse or there's no replacement buffer, repost this */ + if (rc) + goto out_post; + + skb = build_skb(buffer->data, 0); + if (!skb) { + rc = -ENOMEM; + goto out_post; + } + + pad += NET_SKB_PAD; + skb_reserve(skb, pad); + skb_put(skb, packet_length); + skb_checksum_none_assert(skb); + + /* Get parital ethernet information instead of eth_type_trans(), + * Since we don't have an associated net_device. + */ + skb_reset_mac_header(skb); + skb->protocol = eth_hdr(skb)->h_proto; + + /* Pass SKB onward */ + if (cdev->ll2->cbs && cdev->ll2->cbs->rx_cb) { + if (vlan) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + cdev->ll2->cbs->rx_cb(cdev->ll2->cb_cookie, skb, + opaque_data_0, opaque_data_1); + } + + /* Update Buffer information and update FW producer */ + buffer->data = new_data; + buffer->phys_addr = new_phys_addr; + +out_post: + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + + if (rc) + qed_ll2_dealloc_buffer(cdev, buffer); +} + +static struct qed_ll2_info *__qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle, + bool b_lock, + bool b_only_active) +{ + struct qed_ll2_info *p_ll2_conn, *p_ret = NULL; + + if (connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) + return NULL; + + if (!p_hwfn->p_ll2_info) + return NULL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + if (b_only_active) { + if (b_lock) + mutex_lock(&p_ll2_conn->mutex); + if (p_ll2_conn->b_active) + p_ret = p_ll2_conn; + if (b_lock) + mutex_unlock(&p_ll2_conn->mutex); + } else { + p_ret = p_ll2_conn; + } + + return p_ret; +} + +static struct qed_ll2_info *qed_ll2_handle_sanity(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_lock(struct qed_hwfn *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, true, true); +} + +static struct qed_ll2_info *qed_ll2_handle_sanity_inactive(struct qed_hwfn + *p_hwfn, + u8 connection_handle) +{ + return __qed_ll2_handle_sanity(p_hwfn, connection_handle, false, false); +} + +static void qed_ll2_txq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + bool b_last_packet = false, b_last_frag = false; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_tx_queue *p_tx; + dma_addr_t tx_frag; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_tx = &p_ll2_conn->tx_queue; + + while (!list_empty(&p_tx->active_descq)) { + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + b_last_packet = list_empty(&p_tx->active_descq); + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + p_tx->cur_completing_packet = *p_pkt; + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_release_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, + b_last_packet); + + } +} + +static int qed_ll2_txq_completion(struct qed_hwfn *p_hwfn, void *p_cookie) +{ + struct qed_ll2_info *p_ll2_conn = p_cookie; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + u16 new_idx = 0, num_bds = 0, num_bds_in_packet = 0; + struct qed_ll2_tx_packet *p_pkt; + bool b_last_frag = false; + unsigned long flags; + dma_addr_t tx_frag; + int rc = -EINVAL; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->b_completing_packet) { + rc = -EBUSY; + goto out; + } + + new_idx = le16_to_cpu(*p_tx->p_fw_cons); + num_bds = ((s16)new_idx - (s16)p_tx->bds_idx); + while (num_bds) { + if (list_empty(&p_tx->active_descq)) + goto out; + + p_pkt = list_first_entry(&p_tx->active_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + goto out; + + p_tx->b_completing_packet = true; + p_tx->cur_completing_packet = *p_pkt; + num_bds_in_packet = p_pkt->bd_used; + list_del(&p_pkt->list_entry); + + if (num_bds < num_bds_in_packet) { + DP_NOTICE(p_hwfn, + "Rest of BDs does not cover whole packet\n"); + goto out; + } + + num_bds -= num_bds_in_packet; + p_tx->bds_idx += num_bds_in_packet; + while (num_bds_in_packet--) + qed_chain_consume(&p_tx->txq_chain); + + p_tx->cur_completing_bd_idx = 1; + b_last_frag = p_tx->cur_completing_bd_idx == p_pkt->bd_used; + list_add_tail(&p_pkt->list_entry, &p_tx->free_descq); + + spin_unlock_irqrestore(&p_tx->lock, flags); + tx_frag = p_pkt->bds_set[0].tx_frag; + if (p_ll2_conn->gsi_enable) + qed_ll2b_complete_tx_gsi_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); + else + qed_ll2b_complete_tx_packet(p_hwfn, + p_ll2_conn->my_id, + p_pkt->cookie, + tx_frag, + b_last_frag, !num_bds); + spin_lock_irqsave(&p_tx->lock, flags); + } + + p_tx->b_completing_packet = false; + rc = 0; +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +static int +qed_ll2_rxq_completion_gsi(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_info->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + u16 packet_length, parse_flags, vlan; + u32 src_mac_addrhi; + u16 src_mac_addrlo; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "GSI Rx completion but active_descq is empty\n"); + return -EIO; + } + + list_del(&p_pkt->list_entry); + parse_flags = le16_to_cpu(p_cqe->rx_cqe_gsi.parse_flags.flags); + packet_length = le16_to_cpu(p_cqe->rx_cqe_gsi.data_length); + vlan = le16_to_cpu(p_cqe->rx_cqe_gsi.vlan); + src_mac_addrhi = le32_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrhi); + src_mac_addrlo = le16_to_cpu(p_cqe->rx_cqe_gsi.src_mac_addrlo); + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_gsi_packet(p_hwfn, + p_ll2_info->my_id, + p_pkt->cookie, + p_pkt->rx_buf_addr, + packet_length, + p_cqe->rx_cqe_gsi.data_length_error, + parse_flags, + vlan, + src_mac_addrhi, + src_mac_addrlo, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + +static int qed_ll2_rxq_completion_reg(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + union core_rx_cqe_union *p_cqe, + unsigned long lock_flags, + bool b_last_cqe) +{ + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct qed_ll2_rx_packet *p_pkt = NULL; + + if (!list_empty(&p_rx->active_descq)) + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) { + DP_NOTICE(p_hwfn, + "LL2 Rx completion but active_descq is empty\n"); + return -EIO; + } + list_del(&p_pkt->list_entry); + + if (qed_chain_consume(&p_rx->rxq_chain) != p_pkt->rxq_bd) + DP_NOTICE(p_hwfn, + "Mismatch between active_descq and the LL2 Rx chain\n"); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + spin_unlock_irqrestore(&p_rx->lock, lock_flags); + qed_ll2b_complete_rx_packet(p_hwfn, p_ll2_conn->my_id, + p_pkt, &p_cqe->rx_cqe_fp, b_last_cqe); + spin_lock_irqsave(&p_rx->lock, lock_flags); + + return 0; +} + +static int qed_ll2_rxq_completion(struct qed_hwfn *p_hwfn, void *cookie) +{ + struct qed_ll2_info *p_ll2_conn = cookie; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + union core_rx_cqe_union *cqe = NULL; + u16 cq_new_idx = 0, cq_old_idx = 0; + unsigned long flags = 0; + int rc = 0; + + spin_lock_irqsave(&p_rx->lock, flags); + cq_new_idx = le16_to_cpu(*p_rx->p_fw_cons); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + while (cq_new_idx != cq_old_idx) { + bool b_last_cqe = (cq_new_idx == cq_old_idx); + + cqe = qed_chain_consume(&p_rx->rcq_chain); + cq_old_idx = qed_chain_get_cons_idx(&p_rx->rcq_chain); + + DP_VERBOSE(p_hwfn, + QED_MSG_LL2, + "LL2 [sw. cons %04x, fw. at %04x] - Got Packet of type %02x\n", + cq_old_idx, cq_new_idx, cqe->rx_cqe_sp.type); + + switch (cqe->rx_cqe_sp.type) { + case CORE_RX_CQE_TYPE_SLOW_PATH: + DP_NOTICE(p_hwfn, "LL2 - unexpected Rx CQE slowpath\n"); + rc = -EINVAL; + break; + case CORE_RX_CQE_TYPE_GSI_OFFLOAD: + rc = qed_ll2_rxq_completion_gsi(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; + case CORE_RX_CQE_TYPE_REGULAR: + rc = qed_ll2_rxq_completion_reg(p_hwfn, p_ll2_conn, + cqe, flags, b_last_cqe); + break; + default: + rc = -EIO; + } + } + + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +void qed_ll2_rxq_flush(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ll2_rx_packet *p_pkt = NULL; + struct qed_ll2_rx_queue *p_rx; + + p_ll2_conn = qed_ll2_handle_sanity_inactive(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + p_rx = &p_ll2_conn->rx_queue; + + while (!list_empty(&p_rx->active_descq)) { + dma_addr_t rx_buf_addr; + void *cookie; + bool b_last; + + p_pkt = list_first_entry(&p_rx->active_descq, + struct qed_ll2_rx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_rx->free_descq); + + rx_buf_addr = p_pkt->rx_buf_addr; + cookie = p_pkt->cookie; + + b_last = list_empty(&p_rx->active_descq); + } +} + +static int qed_sp_ll2_rx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn, + u8 action_on_error) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_rx_queue *p_rx = &p_ll2_conn->rx_queue; + struct core_rx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + u16 cqe_pbl_size; + int rc = 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_rx->rx_sb_index; + p_ramrod->complete_event_flg = 1; + + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + DMA_REGPAIR_LE(p_ramrod->bd_base, + p_rx->rxq_chain.p_phys_addr); + cqe_pbl_size = (u16)qed_chain_get_page_cnt(&p_rx->rcq_chain); + p_ramrod->num_of_pbl_pages = cpu_to_le16(cqe_pbl_size); + DMA_REGPAIR_LE(p_ramrod->cqe_pbl_addr, + qed_chain_get_pbl_phys(&p_rx->rcq_chain)); + + p_ramrod->drop_ttl0_flg = p_ll2_conn->rx_drop_ttl0_flg; + p_ramrod->inner_vlan_removal_en = p_ll2_conn->rx_vlan_removal_en; + p_ramrod->queue_id = p_ll2_conn->queue_id; + p_ramrod->main_func_queue = 1; + + if ((IS_MF_DEFAULT(p_hwfn) || IS_MF_SI(p_hwfn)) && + p_ramrod->main_func_queue && (conn_type != QED_LL2_TYPE_ROCE)) { + p_ramrod->mf_si_bcast_accept_all = 1; + p_ramrod->mf_si_mcast_accept_all = 1; + } else { + p_ramrod->mf_si_bcast_accept_all = 0; + p_ramrod->mf_si_mcast_accept_all = 0; + } + + p_ramrod->action_on_error.error_type = action_on_error; + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_start(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + enum qed_ll2_conn_type conn_type = p_ll2_conn->conn_type; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct core_tx_start_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + union qed_qm_pq_params pq_params; + u16 pq_id = 0, pbl_size; + int rc = -EINVAL; + + if (!QED_LL2_TX_REGISTERED(p_ll2_conn)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_START, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_tx_queue_start; + + p_ramrod->sb_id = cpu_to_le16(qed_int_get_sp_sb_id(p_hwfn)); + p_ramrod->sb_index = p_tx->tx_sb_index; + p_ramrod->mtu = cpu_to_le16(p_ll2_conn->mtu); + p_ll2_conn->tx_stats_en = 1; + p_ramrod->stats_en = p_ll2_conn->tx_stats_en; + p_ramrod->stats_id = p_ll2_conn->tx_stats_id; + + DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, + qed_chain_get_pbl_phys(&p_tx->txq_chain)); + pbl_size = qed_chain_get_page_cnt(&p_tx->txq_chain); + p_ramrod->pbl_size = cpu_to_le16(pbl_size); + + memset(&pq_params, 0, sizeof(pq_params)); + pq_params.core.tc = p_ll2_conn->tx_tc; + pq_id = qed_get_qm_pq(p_hwfn, PROTOCOLID_CORE, &pq_params); + p_ramrod->qm_pq_id = cpu_to_le16(pq_id); + + switch (conn_type) { + case QED_LL2_TYPE_ISCSI: + case QED_LL2_TYPE_ISCSI_OOO: + p_ramrod->conn_type = PROTOCOLID_ISCSI; + break; + case QED_LL2_TYPE_ROCE: + p_ramrod->conn_type = PROTOCOLID_ROCE; + break; + default: + p_ramrod->conn_type = PROTOCOLID_ETH; + DP_NOTICE(p_hwfn, "Unknown connection type: %d\n", conn_type); + } + + p_ramrod->gsi_offload_flag = p_ll2_conn->gsi_enable; + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_rx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct core_rx_stop_ramrod_data *p_ramrod = NULL; + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_RX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.core_rx_queue_stop; + + p_ramrod->complete_event_flg = 1; + p_ramrod->queue_id = p_ll2_conn->queue_id; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_sp_ll2_tx_queue_stop(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + struct qed_spq_entry *p_ent = NULL; + struct qed_sp_init_data init_data; + int rc = -EINVAL; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = p_ll2_conn->cid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + CORE_RAMROD_TX_QUEUE_STOP, + PROTOCOLID_CORE, &init_data); + if (rc) + return rc; + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int +qed_ll2_acquire_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, u16 rx_num_desc) +{ + struct qed_ll2_rx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!rx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_NEXT_PTR, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_bd), + &p_ll2_info->rx_queue.rxq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rxq chain\n"); + goto out; + } + + capacity = qed_chain_get_capacity(&p_ll2_info->rx_queue.rxq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_rx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, "Failed to allocate ll2 Rx desc\n"); + goto out; + } + p_ll2_info->rx_queue.descq_array = p_descq; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + rx_num_desc, + sizeof(struct core_rx_fast_path_cqe), + &p_ll2_info->rx_queue.rcq_chain); + if (rc) { + DP_NOTICE(p_hwfn, "Failed to allocate ll2 rcq chain\n"); + goto out; + } + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Rxq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, rx_num_desc); + +out: + return rc; +} + +static int qed_ll2_acquire_connection_tx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_info, + u16 tx_num_desc) +{ + struct qed_ll2_tx_packet *p_descq; + u32 capacity; + int rc = 0; + + if (!tx_num_desc) + goto out; + + rc = qed_chain_alloc(p_hwfn->cdev, + QED_CHAIN_USE_TO_CONSUME_PRODUCE, + QED_CHAIN_MODE_PBL, + QED_CHAIN_CNT_TYPE_U16, + tx_num_desc, + sizeof(struct core_tx_bd), + &p_ll2_info->tx_queue.txq_chain); + if (rc) + goto out; + + capacity = qed_chain_get_capacity(&p_ll2_info->tx_queue.txq_chain); + p_descq = kcalloc(capacity, sizeof(struct qed_ll2_tx_packet), + GFP_KERNEL); + if (!p_descq) { + rc = -ENOMEM; + goto out; + } + p_ll2_info->tx_queue.descq_array = p_descq; + + DP_VERBOSE(p_hwfn, QED_MSG_LL2, + "Allocated LL2 Txq [Type %08x] with 0x%08x buffers\n", + p_ll2_info->conn_type, tx_num_desc); + +out: + if (rc) + DP_NOTICE(p_hwfn, + "Can't allocate memory for Tx LL2 with 0x%08x buffers\n", + tx_num_desc); + return rc; +} + +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle) +{ + qed_int_comp_cb_t comp_rx_cb, comp_tx_cb; + struct qed_ll2_info *p_ll2_info = NULL; + int rc; + u8 i; + + if (!p_connection_handle || !p_hwfn->p_ll2_info) + return -EINVAL; + + /* Find a free connection to be used */ + for (i = 0; (i < QED_MAX_NUM_OF_LL2_CONNECTIONS); i++) { + mutex_lock(&p_hwfn->p_ll2_info[i].mutex); + if (p_hwfn->p_ll2_info[i].b_active) { + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + continue; + } + + p_hwfn->p_ll2_info[i].b_active = true; + p_ll2_info = &p_hwfn->p_ll2_info[i]; + mutex_unlock(&p_hwfn->p_ll2_info[i].mutex); + break; + } + if (!p_ll2_info) + return -EBUSY; + + p_ll2_info->conn_type = p_params->conn_type; + p_ll2_info->mtu = p_params->mtu; + p_ll2_info->rx_drop_ttl0_flg = p_params->rx_drop_ttl0_flg; + p_ll2_info->rx_vlan_removal_en = p_params->rx_vlan_removal_en; + p_ll2_info->tx_tc = p_params->tx_tc; + p_ll2_info->tx_dest = p_params->tx_dest; + p_ll2_info->ai_err_packet_too_big = p_params->ai_err_packet_too_big; + p_ll2_info->ai_err_no_buf = p_params->ai_err_no_buf; + p_ll2_info->gsi_enable = p_params->gsi_enable; + + rc = qed_ll2_acquire_connection_rx(p_hwfn, p_ll2_info, rx_num_desc); + if (rc) + goto q_allocate_fail; + + rc = qed_ll2_acquire_connection_tx(p_hwfn, p_ll2_info, tx_num_desc); + if (rc) + goto q_allocate_fail; + + /* Register callbacks for the Rx/Tx queues */ + comp_rx_cb = qed_ll2_rxq_completion; + comp_tx_cb = qed_ll2_txq_completion; + + if (rx_num_desc) { + qed_int_register_cb(p_hwfn, comp_rx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->rx_queue.rx_sb_index, + &p_ll2_info->rx_queue.p_fw_cons); + p_ll2_info->rx_queue.b_cb_registred = true; + } + + if (tx_num_desc) { + qed_int_register_cb(p_hwfn, + comp_tx_cb, + &p_hwfn->p_ll2_info[i], + &p_ll2_info->tx_queue.tx_sb_index, + &p_ll2_info->tx_queue.p_fw_cons); + p_ll2_info->tx_queue.b_cb_registred = true; + } + + *p_connection_handle = i; + return rc; + +q_allocate_fail: + qed_ll2_release_connection(p_hwfn, i); + return -ENOMEM; +} + +static int qed_ll2_establish_connection_rx(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + u8 action_on_error = 0; + + if (!QED_LL2_RX_REGISTERED(p_ll2_conn)) + return 0; + + DIRECT_REG_WR(p_ll2_conn->rx_queue.set_prod_addr, 0x0); + + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_PACKET_TOO_BIG, + p_ll2_conn->ai_err_packet_too_big); + SET_FIELD(action_on_error, + CORE_RX_ACTION_ON_ERROR_NO_BUFF, p_ll2_conn->ai_err_no_buf); + + return qed_sp_ll2_rx_queue_start(p_hwfn, p_ll2_conn, action_on_error); +} + +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + struct qed_ll2_tx_queue *p_tx; + int rc = -EINVAL; + u32 i, capacity; + u8 qid; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + p_tx = &p_ll2_conn->tx_queue; + + qed_chain_reset(&p_rx->rxq_chain); + qed_chain_reset(&p_rx->rcq_chain); + INIT_LIST_HEAD(&p_rx->active_descq); + INIT_LIST_HEAD(&p_rx->free_descq); + INIT_LIST_HEAD(&p_rx->posting_descq); + spin_lock_init(&p_rx->lock); + capacity = qed_chain_get_capacity(&p_rx->rxq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_rx->descq_array[i].list_entry, + &p_rx->free_descq); + *p_rx->p_fw_cons = 0; + + qed_chain_reset(&p_tx->txq_chain); + INIT_LIST_HEAD(&p_tx->active_descq); + INIT_LIST_HEAD(&p_tx->free_descq); + INIT_LIST_HEAD(&p_tx->sending_descq); + spin_lock_init(&p_tx->lock); + capacity = qed_chain_get_capacity(&p_tx->txq_chain); + for (i = 0; i < capacity; i++) + list_add_tail(&p_tx->descq_array[i].list_entry, + &p_tx->free_descq); + p_tx->cur_completing_bd_idx = 0; + p_tx->bds_idx = 0; + p_tx->b_completing_packet = false; + p_tx->cur_send_packet = NULL; + p_tx->cur_send_frag_num = 0; + p_tx->cur_completing_frag_num = 0; + *p_tx->p_fw_cons = 0; + + qed_cxt_acquire_cid(p_hwfn, PROTOCOLID_CORE, &p_ll2_conn->cid); + + qid = p_hwfn->hw_info.resc_start[QED_LL2_QUEUE] + connection_handle; + p_ll2_conn->queue_id = qid; + p_ll2_conn->tx_stats_id = qid; + p_rx->set_prod_addr = (u8 __iomem *)p_hwfn->regview + + GTT_BAR0_MAP_REG_TSDM_RAM + + TSTORM_LL2_RX_PRODS_OFFSET(qid); + p_tx->doorbell_addr = (u8 __iomem *)p_hwfn->doorbells + + qed_db_addr(p_ll2_conn->cid, + DQ_DEMS_LEGACY); + + rc = qed_ll2_establish_connection_rx(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + rc = qed_sp_ll2_tx_queue_start(p_hwfn, p_ll2_conn); + if (rc) + return rc; + + if (p_hwfn->hw_info.personality != QED_PCI_ETH_ROCE) + qed_wr(p_hwfn, p_hwfn->p_main_ptt, PRS_REG_USE_LIGHT_L2, 1); + + return rc; +} + +static void qed_ll2_post_rx_buffer_notify_fw(struct qed_hwfn *p_hwfn, + struct qed_ll2_rx_queue *p_rx, + struct qed_ll2_rx_packet *p_curp) +{ + struct qed_ll2_rx_packet *p_posting_packet = NULL; + struct core_ll2_rx_prod rx_prod = { 0, 0, 0 }; + bool b_notify_fw = false; + u16 bd_prod, cq_prod; + + /* This handles the flushing of already posted buffers */ + while (!list_empty(&p_rx->posting_descq)) { + p_posting_packet = list_first_entry(&p_rx->posting_descq, + struct qed_ll2_rx_packet, + list_entry); + list_del(&p_posting_packet->list_entry); + list_add_tail(&p_posting_packet->list_entry, + &p_rx->active_descq); + b_notify_fw = true; + } + + /* This handles the supplied packet [if there is one] */ + if (p_curp) { + list_add_tail(&p_curp->list_entry, &p_rx->active_descq); + b_notify_fw = true; + } + + if (!b_notify_fw) + return; + + bd_prod = qed_chain_get_prod_idx(&p_rx->rxq_chain); + cq_prod = qed_chain_get_prod_idx(&p_rx->rcq_chain); + rx_prod.bd_prod = cpu_to_le16(bd_prod); + rx_prod.cqe_prod = cpu_to_le16(cq_prod); + DIRECT_REG_WR(p_rx->set_prod_addr, *((u32 *)&rx_prod)); +} + +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw) +{ + struct core_rx_bd_with_buff_len *p_curb = NULL; + struct qed_ll2_rx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn; + struct qed_ll2_rx_queue *p_rx; + unsigned long flags; + void *p_data; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_rx = &p_ll2_conn->rx_queue; + + spin_lock_irqsave(&p_rx->lock, flags); + if (!list_empty(&p_rx->free_descq)) + p_curp = list_first_entry(&p_rx->free_descq, + struct qed_ll2_rx_packet, list_entry); + if (p_curp) { + if (qed_chain_get_elem_left(&p_rx->rxq_chain) && + qed_chain_get_elem_left(&p_rx->rcq_chain)) { + p_data = qed_chain_produce(&p_rx->rxq_chain); + p_curb = (struct core_rx_bd_with_buff_len *)p_data; + qed_chain_produce(&p_rx->rcq_chain); + } + } + + /* If we're lacking entires, let's try to flush buffers to FW */ + if (!p_curp || !p_curb) { + rc = -EBUSY; + p_curp = NULL; + goto out_notify; + } + + /* We have an Rx packet we can fill */ + DMA_REGPAIR_LE(p_curb->addr, addr); + p_curb->buff_length = cpu_to_le16(buf_len); + p_curp->rx_buf_addr = addr; + p_curp->cookie = cookie; + p_curp->rxq_bd = p_curb; + p_curp->buf_length = buf_len; + list_del(&p_curp->list_entry); + + /* Check if we only want to enqueue this packet without informing FW */ + if (!notify_fw) { + list_add_tail(&p_curp->list_entry, &p_rx->posting_descq); + goto out; + } + +out_notify: + qed_ll2_post_rx_buffer_notify_fw(p_hwfn, p_rx, p_curp); +out: + spin_unlock_irqrestore(&p_rx->lock, flags); + return rc; +} + +static void qed_ll2_prepare_tx_packet_set(struct qed_hwfn *p_hwfn, + struct qed_ll2_tx_queue *p_tx, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + dma_addr_t first_frag, + u16 first_frag_len, void *p_cookie, + u8 notify_fw) +{ + list_del(&p_curp->list_entry); + p_curp->cookie = p_cookie; + p_curp->bd_used = num_of_bds; + p_curp->notify_fw = notify_fw; + p_tx->cur_send_packet = p_curp; + p_tx->cur_send_frag_num = 0; + + p_curp->bds_set[p_tx->cur_send_frag_num].tx_frag = first_frag; + p_curp->bds_set[p_tx->cur_send_frag_num].frag_len = first_frag_len; + p_tx->cur_send_frag_num++; +} + +static void qed_ll2_prepare_tx_packet_set_bd(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2, + struct qed_ll2_tx_packet *p_curp, + u8 num_of_bds, + enum core_tx_dest tx_dest, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + enum core_roce_flavor_type type, + dma_addr_t first_frag, + u16 first_frag_len) +{ + struct qed_chain *p_tx_chain = &p_ll2->tx_queue.txq_chain; + u16 prod_idx = qed_chain_get_prod_idx(p_tx_chain); + struct core_tx_bd *start_bd = NULL; + u16 frag_idx; + + start_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + start_bd->nw_vlan_or_lb_echo = cpu_to_le16(vlan); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_L4_HDR_OFFSET_W, + cpu_to_le16(l4_hdr_offset_w)); + SET_FIELD(start_bd->bitfield1, CORE_TX_BD_TX_DST, tx_dest); + start_bd->bd_flags.as_bitfield = bd_flags; + start_bd->bd_flags.as_bitfield |= CORE_TX_BD_FLAGS_START_BD_MASK << + CORE_TX_BD_FLAGS_START_BD_SHIFT; + SET_FIELD(start_bd->bitfield0, CORE_TX_BD_NBDS, num_of_bds); + DMA_REGPAIR_LE(start_bd->addr, first_frag); + start_bd->nbytes = cpu_to_le16(first_frag_len); + + SET_FIELD(start_bd->bd_flags.as_bitfield, CORE_TX_BD_FLAGS_ROCE_FLAV, + type); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Tx Producer at [0x%04x] - set with a %04x bytes %02x BDs buffer at %08x:%08x\n", + p_ll2->queue_id, + p_ll2->cid, + p_ll2->conn_type, + prod_idx, + first_frag_len, + num_of_bds, + le32_to_cpu(start_bd->addr.hi), + le32_to_cpu(start_bd->addr.lo)); + + if (p_ll2->tx_queue.cur_send_frag_num == num_of_bds) + return; + + /* Need to provide the packet with additional BDs for frags */ + for (frag_idx = p_ll2->tx_queue.cur_send_frag_num; + frag_idx < num_of_bds; frag_idx++) { + struct core_tx_bd **p_bd = &p_curp->bds_set[frag_idx].txq_bd; + + *p_bd = (struct core_tx_bd *)qed_chain_produce(p_tx_chain); + (*p_bd)->bd_flags.as_bitfield = 0; + (*p_bd)->bitfield1 = 0; + (*p_bd)->bitfield0 = 0; + p_curp->bds_set[frag_idx].tx_frag = 0; + p_curp->bds_set[frag_idx].frag_len = 0; + } +} + +/* This should be called while the Txq spinlock is being held */ +static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_conn) +{ + bool b_notify = p_ll2_conn->tx_queue.cur_send_packet->notify_fw; + struct qed_ll2_tx_queue *p_tx = &p_ll2_conn->tx_queue; + struct qed_ll2_tx_packet *p_pkt = NULL; + struct core_db_data db_msg = { 0, 0, 0 }; + u16 bd_prod; + + /* If there are missing BDs, don't do anything now */ + if (p_ll2_conn->tx_queue.cur_send_frag_num != + p_ll2_conn->tx_queue.cur_send_packet->bd_used) + return; + + /* Push the current packet to the list and clean after it */ + list_add_tail(&p_ll2_conn->tx_queue.cur_send_packet->list_entry, + &p_ll2_conn->tx_queue.sending_descq); + p_ll2_conn->tx_queue.cur_send_packet = NULL; + p_ll2_conn->tx_queue.cur_send_frag_num = 0; + + /* Notify FW of packet only if requested to */ + if (!b_notify) + return; + + bd_prod = qed_chain_get_prod_idx(&p_ll2_conn->tx_queue.txq_chain); + + while (!list_empty(&p_tx->sending_descq)) { + p_pkt = list_first_entry(&p_tx->sending_descq, + struct qed_ll2_tx_packet, list_entry); + if (!p_pkt) + break; + + list_del(&p_pkt->list_entry); + list_add_tail(&p_pkt->list_entry, &p_tx->active_descq); + } + + SET_FIELD(db_msg.params, CORE_DB_DATA_DEST, DB_DEST_XCM); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_CMD, DB_AGG_CMD_SET); + SET_FIELD(db_msg.params, CORE_DB_DATA_AGG_VAL_SEL, + DQ_XCM_CORE_TX_BD_PROD_CMD); + db_msg.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; + db_msg.spq_prod = cpu_to_le16(bd_prod); + + /* Make sure the BDs data is updated before ringing the doorbell */ + wmb(); + + DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg)); + + DP_VERBOSE(p_hwfn, + (NETIF_MSG_TX_QUEUED | QED_MSG_LL2), + "LL2 [q 0x%02x cid 0x%08x type 0x%08x] Doorbelled [producer 0x%04x]\n", + p_ll2_conn->queue_id, + p_ll2_conn->cid, p_ll2_conn->conn_type, db_msg.spq_prod); +} + +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw) +{ + struct qed_ll2_tx_packet *p_curp = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + enum core_roce_flavor_type roce_flavor; + struct qed_ll2_tx_queue *p_tx; + struct qed_chain *p_tx_chain; + unsigned long flags; + int rc = 0; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + p_tx = &p_ll2_conn->tx_queue; + p_tx_chain = &p_tx->txq_chain; + + if (num_of_bds > CORE_LL2_TX_MAX_BDS_PER_PACKET) + return -EIO; + + spin_lock_irqsave(&p_tx->lock, flags); + if (p_tx->cur_send_packet) { + rc = -EEXIST; + goto out; + } + + /* Get entry, but only if we have tx elements for it */ + if (!list_empty(&p_tx->free_descq)) + p_curp = list_first_entry(&p_tx->free_descq, + struct qed_ll2_tx_packet, list_entry); + if (p_curp && qed_chain_get_elem_left(p_tx_chain) < num_of_bds) + p_curp = NULL; + + if (!p_curp) { + rc = -EBUSY; + goto out; + } + + if (qed_roce_flavor == QED_LL2_ROCE) { + roce_flavor = CORE_ROCE; + } else if (qed_roce_flavor == QED_LL2_RROCE) { + roce_flavor = CORE_RROCE; + } else { + rc = -EINVAL; + goto out; + } + + /* Prepare packet and BD, and perhaps send a doorbell to FW */ + qed_ll2_prepare_tx_packet_set(p_hwfn, p_tx, p_curp, + num_of_bds, first_frag, + first_frag_len, cookie, notify_fw); + qed_ll2_prepare_tx_packet_set_bd(p_hwfn, p_ll2_conn, p_curp, + num_of_bds, CORE_TX_DEST_NW, + vlan, bd_flags, l4_hdr_offset_w, + roce_flavor, + first_frag, first_frag_len); + + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + +out: + spin_unlock_irqrestore(&p_tx->lock, flags); + return rc; +} + +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes) +{ + struct qed_ll2_tx_packet *p_cur_send_packet = NULL; + struct qed_ll2_info *p_ll2_conn = NULL; + u16 cur_send_frag_num = 0; + struct core_tx_bd *p_bd; + unsigned long flags; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + if (!p_ll2_conn->tx_queue.cur_send_packet) + return -EINVAL; + + p_cur_send_packet = p_ll2_conn->tx_queue.cur_send_packet; + cur_send_frag_num = p_ll2_conn->tx_queue.cur_send_frag_num; + + if (cur_send_frag_num >= p_cur_send_packet->bd_used) + return -EINVAL; + + /* Fill the BD information, and possibly notify FW */ + p_bd = p_cur_send_packet->bds_set[cur_send_frag_num].txq_bd; + DMA_REGPAIR_LE(p_bd->addr, addr); + p_bd->nbytes = cpu_to_le16(nbytes); + p_cur_send_packet->bds_set[cur_send_frag_num].tx_frag = addr; + p_cur_send_packet->bds_set[cur_send_frag_num].frag_len = nbytes; + + p_ll2_conn->tx_queue.cur_send_frag_num++; + + spin_lock_irqsave(&p_ll2_conn->tx_queue.lock, flags); + qed_ll2_tx_packet_notify(p_hwfn, p_ll2_conn); + spin_unlock_irqrestore(&p_ll2_conn->tx_queue.lock, flags); + + return 0; +} + +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + int rc = -EINVAL; + + p_ll2_conn = qed_ll2_handle_sanity_lock(p_hwfn, connection_handle); + if (!p_ll2_conn) + return -EINVAL; + + /* Stop Tx & Rx of connection, if needed */ + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_tx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_txq_flush(p_hwfn, connection_handle); + } + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + rc = qed_sp_ll2_rx_queue_stop(p_hwfn, p_ll2_conn); + if (rc) + return rc; + qed_ll2_rxq_flush(p_hwfn, connection_handle); + } + + return rc; +} + +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + + p_ll2_conn = qed_ll2_handle_sanity(p_hwfn, connection_handle); + if (!p_ll2_conn) + return; + + if (QED_LL2_RX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->rx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->rx_queue.rx_sb_index); + } + + if (QED_LL2_TX_REGISTERED(p_ll2_conn)) { + p_ll2_conn->tx_queue.b_cb_registred = false; + qed_int_unregister_cb(p_hwfn, p_ll2_conn->tx_queue.tx_sb_index); + } + + kfree(p_ll2_conn->tx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->tx_queue.txq_chain); + + kfree(p_ll2_conn->rx_queue.descq_array); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rxq_chain); + qed_chain_free(p_hwfn->cdev, &p_ll2_conn->rx_queue.rcq_chain); + + qed_cxt_release_cid(p_hwfn, p_ll2_conn->cid); + + mutex_lock(&p_ll2_conn->mutex); + p_ll2_conn->b_active = false; + mutex_unlock(&p_ll2_conn->mutex); +} + +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn) +{ + struct qed_ll2_info *p_ll2_connections; + u8 i; + + /* Allocate LL2's set struct */ + p_ll2_connections = kcalloc(QED_MAX_NUM_OF_LL2_CONNECTIONS, + sizeof(struct qed_ll2_info), GFP_KERNEL); + if (!p_ll2_connections) { + DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_ll2'\n"); + return NULL; + } + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + p_ll2_connections[i].my_id = i; + + return p_ll2_connections; +} + +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + int i; + + for (i = 0; i < QED_MAX_NUM_OF_LL2_CONNECTIONS; i++) + mutex_init(&p_ll2_connections[i].mutex); +} + +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections) +{ + kfree(p_ll2_connections); +} + +static void _qed_ll2_get_tstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_tstorm_per_queue_stat tstats; + u8 qid = p_ll2_conn->queue_id; + u32 tstats_addr; + + memset(&tstats, 0, sizeof(tstats)); + tstats_addr = BAR0_MAP_REG_TSDM_RAM + + CORE_LL2_TSTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &tstats, tstats_addr, sizeof(tstats)); + + p_stats->packet_too_big_discard = + HILO_64_REGPAIR(tstats.packet_too_big_discard); + p_stats->no_buff_discard = HILO_64_REGPAIR(tstats.no_buff_discard); +} + +static void _qed_ll2_get_ustats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_ustorm_per_queue_stat ustats; + u8 qid = p_ll2_conn->queue_id; + u32 ustats_addr; + + memset(&ustats, 0, sizeof(ustats)); + ustats_addr = BAR0_MAP_REG_USDM_RAM + + CORE_LL2_USTORM_PER_QUEUE_STAT_OFFSET(qid); + qed_memcpy_from(p_hwfn, p_ptt, &ustats, ustats_addr, sizeof(ustats)); + + p_stats->rcv_ucast_bytes = HILO_64_REGPAIR(ustats.rcv_ucast_bytes); + p_stats->rcv_mcast_bytes = HILO_64_REGPAIR(ustats.rcv_mcast_bytes); + p_stats->rcv_bcast_bytes = HILO_64_REGPAIR(ustats.rcv_bcast_bytes); + p_stats->rcv_ucast_pkts = HILO_64_REGPAIR(ustats.rcv_ucast_pkts); + p_stats->rcv_mcast_pkts = HILO_64_REGPAIR(ustats.rcv_mcast_pkts); + p_stats->rcv_bcast_pkts = HILO_64_REGPAIR(ustats.rcv_bcast_pkts); +} + +static void _qed_ll2_get_pstats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_ll2_info *p_ll2_conn, + struct qed_ll2_stats *p_stats) +{ + struct core_ll2_pstorm_per_queue_stat pstats; + u8 stats_id = p_ll2_conn->tx_stats_id; + u32 pstats_addr; + + memset(&pstats, 0, sizeof(pstats)); + pstats_addr = BAR0_MAP_REG_PSDM_RAM + + CORE_LL2_PSTORM_PER_QUEUE_STAT_OFFSET(stats_id); + qed_memcpy_from(p_hwfn, p_ptt, &pstats, pstats_addr, sizeof(pstats)); + + p_stats->sent_ucast_bytes = HILO_64_REGPAIR(pstats.sent_ucast_bytes); + p_stats->sent_mcast_bytes = HILO_64_REGPAIR(pstats.sent_mcast_bytes); + p_stats->sent_bcast_bytes = HILO_64_REGPAIR(pstats.sent_bcast_bytes); + p_stats->sent_ucast_pkts = HILO_64_REGPAIR(pstats.sent_ucast_pkts); + p_stats->sent_mcast_pkts = HILO_64_REGPAIR(pstats.sent_mcast_pkts); + p_stats->sent_bcast_pkts = HILO_64_REGPAIR(pstats.sent_bcast_pkts); +} + +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats) +{ + struct qed_ll2_info *p_ll2_conn = NULL; + struct qed_ptt *p_ptt; + + memset(p_stats, 0, sizeof(*p_stats)); + + if ((connection_handle >= QED_MAX_NUM_OF_LL2_CONNECTIONS) || + !p_hwfn->p_ll2_info) + return -EINVAL; + + p_ll2_conn = &p_hwfn->p_ll2_info[connection_handle]; + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_ERR(p_hwfn, "Failed to acquire ptt\n"); + return -EINVAL; + } + + _qed_ll2_get_tstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + _qed_ll2_get_ustats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + if (p_ll2_conn->tx_stats_en) + _qed_ll2_get_pstats(p_hwfn, p_ptt, p_ll2_conn, p_stats); + + qed_ptt_release(p_hwfn, p_ptt); + return 0; +} + +static void qed_ll2_register_cb_ops(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie) +{ + cdev->ll2->cbs = ops; + cdev->ll2->cb_cookie = cookie; +} + +static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) +{ + struct qed_ll2_info ll2_info; + struct qed_ll2_buffer *buffer; + enum qed_ll2_conn_type conn_type; + struct qed_ptt *p_ptt; + int rc, i; + + /* Initialize LL2 locks & lists */ + INIT_LIST_HEAD(&cdev->ll2->list); + spin_lock_init(&cdev->ll2->lock); + cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + L1_CACHE_BYTES + params->mtu; + cdev->ll2->frags_mapped = params->frags_mapped; + + /*Allocate memory for LL2 */ + DP_INFO(cdev, "Allocating LL2 buffers of size %08x bytes\n", + cdev->ll2->rx_size); + for (i = 0; i < QED_LL2_RX_SIZE; i++) { + buffer = kzalloc(sizeof(*buffer), GFP_KERNEL); + if (!buffer) { + DP_INFO(cdev, "Failed to allocate LL2 buffers\n"); + goto fail; + } + + rc = qed_ll2_alloc_buffer(cdev, (u8 **)&buffer->data, + &buffer->phys_addr); + if (rc) { + kfree(buffer); + goto fail; + } + + list_add_tail(&buffer->list, &cdev->ll2->list); + } + + switch (QED_LEADING_HWFN(cdev)->hw_info.personality) { + case QED_PCI_ISCSI: + conn_type = QED_LL2_TYPE_ISCSI; + break; + case QED_PCI_ETH_ROCE: + conn_type = QED_LL2_TYPE_ROCE; + break; + default: + conn_type = QED_LL2_TYPE_TEST; + } + + /* Prepare the temporary ll2 information */ + memset(&ll2_info, 0, sizeof(ll2_info)); + ll2_info.conn_type = conn_type; + ll2_info.mtu = params->mtu; + ll2_info.rx_drop_ttl0_flg = params->drop_ttl0_packets; + ll2_info.rx_vlan_removal_en = params->rx_vlan_stripping; + ll2_info.tx_tc = 0; + ll2_info.tx_dest = CORE_TX_DEST_NW; + ll2_info.gsi_enable = 1; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_info, + QED_LL2_RX_SIZE, QED_LL2_TX_SIZE, + &cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to acquire LL2 connection\n"); + goto fail; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) { + DP_INFO(cdev, "Failed to establish LL2 connection\n"); + goto release_fail; + } + + /* Post all Rx buffers to FW */ + spin_lock_bh(&cdev->ll2->lock); + list_for_each_entry(buffer, &cdev->ll2->list, list) { + rc = qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + buffer->phys_addr, 0, buffer, 1); + if (rc) { + DP_INFO(cdev, + "Failed to post an Rx buffer; Deleting it\n"); + dma_unmap_single(&cdev->pdev->dev, buffer->phys_addr, + cdev->ll2->rx_size, DMA_FROM_DEVICE); + kfree(buffer->data); + list_del(&buffer->list); + kfree(buffer); + } else { + cdev->ll2->rx_cnt++; + } + } + spin_unlock_bh(&cdev->ll2->lock); + + if (!cdev->ll2->rx_cnt) { + DP_INFO(cdev, "Failed passing even a single Rx buffer\n"); + goto release_terminate; + } + + if (!is_valid_ether_addr(params->ll2_mac_address)) { + DP_INFO(cdev, "Invalid Ethernet address\n"); + goto release_terminate; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto release_terminate; + } + + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + params->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + if (rc) { + DP_ERR(cdev, "Failed to allocate LLH filter\n"); + goto release_terminate_all; + } + + ether_addr_copy(cdev->ll2_mac_address, params->ll2_mac_address); + + return 0; + +release_terminate_all: + +release_terminate: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +release_fail: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); +fail: + qed_ll2_kill_buffers(cdev); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + return -EINVAL; +} + +static int qed_ll2_stop(struct qed_dev *cdev) +{ + struct qed_ptt *p_ptt; + int rc; + + if (cdev->ll2->handle == QED_LL2_UNUSED_HANDLE) + return 0; + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_INFO(cdev, "Failed to acquire PTT\n"); + goto fail; + } + + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + cdev->ll2_mac_address); + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + eth_zero_addr(cdev->ll2_mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + cdev->ll2->handle); + if (rc) + DP_INFO(cdev, "Failed to terminate LL2 connection\n"); + + qed_ll2_kill_buffers(cdev); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), cdev->ll2->handle); + cdev->ll2->handle = QED_LL2_UNUSED_HANDLE; + + return rc; +fail: + return -EINVAL; +} + +static int qed_ll2_start_xmit(struct qed_dev *cdev, struct sk_buff *skb) +{ + const skb_frag_t *frag; + int rc = -EINVAL, i; + dma_addr_t mapping; + u16 vlan = 0; + u8 flags = 0; + + if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { + DP_INFO(cdev, "Cannot transmit a checksumed packet\n"); + return -EINVAL; + } + + if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { + DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", + 1 + skb_shinfo(skb)->nr_frags); + return -EINVAL; + } + + mapping = dma_map_single(&cdev->pdev->dev, skb->data, + skb->len, DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(&cdev->pdev->dev, mapping))) { + DP_NOTICE(cdev, "SKB mapping failed\n"); + return -EINVAL; + } + + /* Request HW to calculate IP csum */ + if (!((vlan_get_protocol(skb) == htons(ETH_P_IPV6)) && + ipv6_hdr(skb)->nexthdr == NEXTHDR_IPV6)) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + if (skb_vlan_tag_present(skb)) { + vlan = skb_vlan_tag_get(skb); + flags |= BIT(CORE_TX_BD_FLAGS_VLAN_INSERTION_SHIFT); + } + + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + 1 + skb_shinfo(skb)->nr_frags, + vlan, flags, 0, 0 /* RoCE FLAVOR */, + mapping, skb->len, skb, 1); + if (rc) + goto err; + + for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { + frag = &skb_shinfo(skb)->frags[i]; + if (!cdev->ll2->frags_mapped) { + mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0, + skb_frag_size(frag), + DMA_TO_DEVICE); + + if (unlikely(dma_mapping_error(&cdev->pdev->dev, + mapping))) { + DP_NOTICE(cdev, + "Unable to map frag - dropping packet\n"); + goto err; + } + } else { + mapping = page_to_phys(skb_frag_page(frag)) | + frag->page_offset; + } + + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, + mapping, + skb_frag_size(frag)); + + /* if failed not much to do here, partial packet has been posted + * we can't free memory, will need to wait for completion. + */ + if (rc) + goto err2; + } + + return 0; + +err: + dma_unmap_single(&cdev->pdev->dev, mapping, skb->len, DMA_TO_DEVICE); + +err2: + return rc; +} + +static int qed_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + if (!cdev->ll2) + return -EINVAL; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + cdev->ll2->handle, stats); +} + +const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = &qed_ll2_start, + .stop = &qed_ll2_stop, + .start_xmit = &qed_ll2_start_xmit, + .register_cb_ops = &qed_ll2_register_cb_ops, + .get_stats = &qed_ll2_stats, +}; + +int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + cdev->ll2 = kzalloc(sizeof(*cdev->ll2), GFP_KERNEL); + return cdev->ll2 ? 0 : -ENOMEM; +} + +void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ + kfree(cdev->ll2); + cdev->ll2 = NULL; +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.h b/drivers/net/ethernet/qlogic/qed/qed_ll2.h new file mode 100644 index 000000000000..80a5dc2d652d --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.h @@ -0,0 +1,316 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_H +#define _QED_LL2_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_hsi.h" +#include "qed_sp.h" + +#define QED_MAX_NUM_OF_LL2_CONNECTIONS (4) + +enum qed_ll2_roce_flavor_type { + QED_LL2_ROCE, + QED_LL2_RROCE, + MAX_QED_LL2_ROCE_FLAVOR_TYPE +}; + +enum qed_ll2_conn_type { + QED_LL2_TYPE_RESERVED, + QED_LL2_TYPE_ISCSI, + QED_LL2_TYPE_TEST, + QED_LL2_TYPE_ISCSI_OOO, + QED_LL2_TYPE_RESERVED2, + QED_LL2_TYPE_ROCE, + QED_LL2_TYPE_RESERVED3, + MAX_QED_LL2_RX_CONN_TYPE +}; + +struct qed_ll2_rx_packet { + struct list_head list_entry; + struct core_rx_bd_with_buff_len *rxq_bd; + dma_addr_t rx_buf_addr; + u16 buf_length; + void *cookie; + u8 placement_offset; + u16 parse_flags; + u16 packet_length; + u16 vlan; + u32 opaque_data[2]; +}; + +struct qed_ll2_tx_packet { + struct list_head list_entry; + u16 bd_used; + u16 vlan; + u16 l4_hdr_offset_w; + u8 bd_flags; + bool notify_fw; + void *cookie; + + struct { + struct core_tx_bd *txq_bd; + dma_addr_t tx_frag; + u16 frag_len; + } bds_set[ETH_TX_MAX_BDS_PER_NON_LSO_PACKET]; +}; + +struct qed_ll2_rx_queue { + /* Lock protecting the Rx queue manipulation */ + spinlock_t lock; + struct qed_chain rxq_chain; + struct qed_chain rcq_chain; + u8 rx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head posting_descq; + struct qed_ll2_rx_packet *descq_array; + void __iomem *set_prod_addr; +}; + +struct qed_ll2_tx_queue { + /* Lock protecting the Tx queue manipulation */ + spinlock_t lock; + struct qed_chain txq_chain; + u8 tx_sb_index; + bool b_cb_registred; + __le16 *p_fw_cons; + struct list_head active_descq; + struct list_head free_descq; + struct list_head sending_descq; + struct qed_ll2_tx_packet *descq_array; + struct qed_ll2_tx_packet *cur_send_packet; + struct qed_ll2_tx_packet cur_completing_packet; + u16 cur_completing_bd_idx; + void __iomem *doorbell_addr; + u16 bds_idx; + u16 cur_send_frag_num; + u16 cur_completing_frag_num; + bool b_completing_packet; +}; + +struct qed_ll2_info { + /* Lock protecting the state of LL2 */ + struct mutex mutex; + enum qed_ll2_conn_type conn_type; + u32 cid; + u8 my_id; + u8 queue_id; + u8 tx_stats_id; + bool b_active; + u16 mtu; + u8 rx_drop_ttl0_flg; + u8 rx_vlan_removal_en; + u8 tx_tc; + enum core_tx_dest tx_dest; + enum core_error_handle ai_err_packet_too_big; + enum core_error_handle ai_err_no_buf; + u8 tx_stats_en; + struct qed_ll2_rx_queue rx_queue; + struct qed_ll2_tx_queue tx_queue; + u8 gsi_enable; +}; + +/** + * @brief qed_ll2_acquire_connection - allocate resources, + * starts rx & tx (if relevant) queues pair. Provides + * connecion handler as output parameter. + * + * @param p_hwfn + * @param p_params Contain various configuration properties + * @param rx_num_desc + * @param tx_num_desc + * + * @param p_connection_handle Output container for LL2 connection's handle + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_acquire_connection(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_params, + u16 rx_num_desc, + u16 tx_num_desc, + u8 *p_connection_handle); + +/** + * @brief qed_ll2_establish_connection - start previously + * allocated LL2 queues pair + * + * @param p_hwfn + * @param p_ptt + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_establish_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_post_rx_buffers - submit buffers to LL2 Rx queue. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param addr rx (physical address) buffers to submit + * @param cookie + * @param notify_fw produce corresponding Rx BD immediately + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_post_rx_buffer(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, + u16 buf_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_prepare_tx_packet - request for start Tx BD + * to prepare Tx packet submission to FW. + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param num_of_bds a number of requested BD equals a number of + * fragments in Tx packet + * @param vlan VLAN to insert to packet (if insertion set) + * @param bd_flags + * @param l4_hdr_offset_w L4 Header Offset from start of packet + * (in words). This is needed if both l4_csum + * and ipv6_ext are set + * @param first_frag + * @param first_frag_len + * @param cookie + * + * @param notify_fw + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_prepare_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + u8 num_of_bds, + u16 vlan, + u8 bd_flags, + u16 l4_hdr_offset_w, + enum qed_ll2_roce_flavor_type qed_roce_flavor, + dma_addr_t first_frag, + u16 first_frag_len, void *cookie, u8 notify_fw); + +/** + * @brief qed_ll2_release_connection - releases resources + * allocated for LL2 connection + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + */ +void qed_ll2_release_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_set_fragment_of_tx_packet - provides fragments to fill + * Tx BD of BDs requested by + * qed_ll2_prepare_tx_packet + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * @param addr + * @param nbytes + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_set_fragment_of_tx_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + dma_addr_t addr, u16 nbytes); + +/** + * @brief qed_ll2_terminate_connection - stops Tx/Rx queues + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle + * obtained from + * qed_ll2_require_connection + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_terminate_connection(struct qed_hwfn *p_hwfn, u8 connection_handle); + +/** + * @brief qed_ll2_get_stats - get LL2 queue's statistics + * + * + * @param p_hwfn + * @param connection_handle LL2 connection's handle obtained from + * qed_ll2_require_connection + * @param p_stats + * + * @return 0 on success, failure otherwise + */ +int qed_ll2_get_stats(struct qed_hwfn *p_hwfn, + u8 connection_handle, struct qed_ll2_stats *p_stats); + +/** + * @brief qed_ll2_alloc - Allocates LL2 connections set + * + * @param p_hwfn + * + * @return pointer to alocated qed_ll2_info or NULL + */ +struct qed_ll2_info *qed_ll2_alloc(struct qed_hwfn *p_hwfn); + +/** + * @brief qed_ll2_setup - Inits LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_setup(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); + +/** + * @brief qed_ll2_free - Releases LL2 connections set + * + * @param p_hwfn + * @param p_ll2_connections + * + */ +void qed_ll2_free(struct qed_hwfn *p_hwfn, + struct qed_ll2_info *p_ll2_connections); +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet); +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet); +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index c7dc34bfdd0a..4ee3151e80c2 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -22,15 +22,22 @@ #include #include #include +#include #include "qed.h" #include "qed_sriov.h" #include "qed_sp.h" #include "qed_dev_api.h" +#include "qed_ll2.h" #include "qed_mcp.h" #include "qed_hw.h" #include "qed_selftest.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#define QED_ROCE_QPS (8192) +#define QED_ROCE_DPIS (8) +#endif + static char version[] = "QLogic FastLinQ 4xxxx Core Module qed " DRV_MODULE_VERSION "\n"; @@ -51,8 +58,6 @@ MODULE_FIRMWARE(QED_FW_FILE_NAME); static int __init qed_init(void) { - pr_notice("qed_init called\n"); - pr_info("%s", version); return 0; @@ -106,8 +111,7 @@ static void qed_free_pci(struct qed_dev *cdev) /* Performs PCI initializations as well as initializing PCI-related parameters * in the device structrue. Returns 0 in case of success. */ -static int qed_init_pci(struct qed_dev *cdev, - struct pci_dev *pdev) +static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev) { u8 rev_id; int rc; @@ -207,8 +211,8 @@ int qed_fill_dev_info(struct qed_dev *cdev, dev_info->pci_mem_start = cdev->pci_params.mem_start; dev_info->pci_mem_end = cdev->pci_params.mem_end; dev_info->pci_irq = cdev->pci_params.irq; - dev_info->rdma_supported = - (cdev->hwfns[0].hw_info.personality == QED_PCI_ETH_ROCE); + dev_info->rdma_supported = (cdev->hwfns[0].hw_info.personality == + QED_PCI_ETH_ROCE); dev_info->is_mf_default = IS_MF_DEFAULT(&cdev->hwfns[0]); ether_addr_copy(dev_info->hw_mac, cdev->hwfns[0].hw_info.hw_mac_addr); @@ -263,8 +267,7 @@ static struct qed_dev *qed_alloc_cdev(struct pci_dev *pdev) } /* Sets the requested power state */ -static int qed_set_power_state(struct qed_dev *cdev, - pci_power_t state) +static int qed_set_power_state(struct qed_dev *cdev, pci_power_t state) { if (!cdev) return -ENODEV; @@ -366,8 +369,8 @@ static int qed_enable_msix(struct qed_dev *cdev, DP_NOTICE(cdev, "Trying to enable MSI-X with less vectors (%d out of %d)\n", cnt, int_params->in.num_vectors); - rc = pci_enable_msix_exact(cdev->pdev, - int_params->msix_table, cnt); + rc = pci_enable_msix_exact(cdev->pdev, int_params->msix_table, + cnt); if (!rc) rc = cnt; } @@ -439,6 +442,11 @@ static int qed_set_int_mode(struct qed_dev *cdev, bool force_mode) } out: + if (!rc) + DP_INFO(cdev, "Using %s interrupts\n", + int_params->out.int_mode == QED_INT_MODE_INTA ? + "INTa" : int_params->out.int_mode == QED_INT_MODE_MSI ? + "MSI" : "MSIX"); cdev->int_coalescing_mode = QED_COAL_MODE_ENABLE; return rc; @@ -514,19 +522,18 @@ static irqreturn_t qed_single_int(int irq, void *dev_instance) int qed_slowpath_irq_req(struct qed_hwfn *hwfn) { struct qed_dev *cdev = hwfn->cdev; + u32 int_mode; int rc = 0; u8 id; - if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { + int_mode = cdev->int_params.out.int_mode; + if (int_mode == QED_INT_MODE_MSIX) { id = hwfn->my_id; snprintf(hwfn->name, NAME_SIZE, "sp-%d-%02x:%02x.%02x", id, cdev->pdev->bus->number, PCI_SLOT(cdev->pdev->devfn), hwfn->abs_pf_id); rc = request_irq(cdev->int_params.msix_table[id].vector, qed_msix_sp_int, 0, hwfn->name, hwfn->sp_dpc); - if (!rc) - DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP), - "Requested slowpath MSI-X\n"); } else { unsigned long flags = 0; @@ -541,6 +548,13 @@ int qed_slowpath_irq_req(struct qed_hwfn *hwfn) flags, cdev->name, cdev); } + if (rc) + DP_NOTICE(cdev, "request_irq failed, rc = %d\n", rc); + else + DP_VERBOSE(hwfn, (NETIF_MSG_INTR | QED_MSG_SP), + "Requested slowpath %s\n", + (int_mode == QED_INT_MODE_MSIX) ? "MSI-X" : "IRQ"); + return rc; } @@ -581,6 +595,8 @@ static int qed_nic_stop(struct qed_dev *cdev) } } + qed_dbg_pf_exit(cdev); + return rc; } @@ -599,7 +615,16 @@ static int qed_nic_reset(struct qed_dev *cdev) static int qed_nic_setup(struct qed_dev *cdev) { - int rc; + int rc, i; + + /* Determine if interface is going to require LL2 */ + if (QED_LEADING_HWFN(cdev)->hw_info.personality != QED_PCI_ETH) { + for (i = 0; i < cdev->num_hwfns; i++) { + struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; + + p_hwfn->using_ll2 = true; + } + } rc = qed_resc_alloc(cdev); if (rc) @@ -657,6 +682,9 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, enum qed_int_mode int_mode) { struct qed_sb_cnt_info sb_cnt_info; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + int num_l2_queues; +#endif int rc; int i; @@ -687,6 +715,31 @@ static int qed_slowpath_setup_int(struct qed_dev *cdev, cdev->int_params.fp_msix_cnt = cdev->int_params.out.num_vectors - cdev->num_hwfns; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + num_l2_queues = 0; + for_each_hwfn(cdev, i) + num_l2_queues += FEAT_NUM(&cdev->hwfns[i], QED_PF_L2_QUE); + + DP_VERBOSE(cdev, QED_MSG_RDMA, + "cdev->int_params.fp_msix_cnt=%d num_l2_queues=%d\n", + cdev->int_params.fp_msix_cnt, num_l2_queues); + + if (cdev->int_params.fp_msix_cnt > num_l2_queues) { + cdev->int_params.rdma_msix_cnt = + (cdev->int_params.fp_msix_cnt - num_l2_queues) + / cdev->num_hwfns; + cdev->int_params.rdma_msix_base = + cdev->int_params.fp_msix_base + num_l2_queues; + cdev->int_params.fp_msix_cnt = num_l2_queues; + } else { + cdev->int_params.rdma_msix_cnt = 0; + } + + DP_VERBOSE(cdev, QED_MSG_RDMA, "roce_msix_cnt=%d roce_msix_base=%d\n", + cdev->int_params.rdma_msix_cnt, + cdev->int_params.rdma_msix_base); +#endif + return 0; } @@ -790,6 +843,13 @@ static void qed_update_pf_params(struct qed_dev *cdev, { int i; +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + params->rdma_pf_params.num_qps = QED_ROCE_QPS; + params->rdma_pf_params.min_dpis = QED_ROCE_DPIS; + /* divide by 3 the MRs to avoid MF ILT overflow */ + params->rdma_pf_params.num_mrs = RDMA_MAX_TIDS; + params->rdma_pf_params.gl_pi = QED_ROCE_PROTOCOL_INDEX; +#endif for (i = 0; i < cdev->num_hwfns; i++) { struct qed_hwfn *p_hwfn = &cdev->hwfns[i]; @@ -834,13 +894,13 @@ static int qed_slowpath_start(struct qed_dev *cdev, if (IS_PF(cdev)) { /* Allocate stream for unzipping */ rc = qed_alloc_stream_mem(cdev); - if (rc) { - DP_NOTICE(cdev, "Failed to allocate stream memory\n"); + if (rc) goto err2; - } /* First Dword used to diffrentiate between various sources */ data = cdev->firmware->data + sizeof(u32); + + qed_dbg_pf_init(cdev); } memset(&tunn_info, 0, sizeof(tunn_info)); @@ -864,6 +924,12 @@ static int qed_slowpath_start(struct qed_dev *cdev, DP_INFO(cdev, "HW initialization and function start completed successfully\n"); + /* Allocate LL2 interface if needed */ + if (QED_LEADING_HWFN(cdev)->using_ll2) { + rc = qed_ll2_alloc_if(cdev); + if (rc) + goto err3; + } if (IS_PF(cdev)) { hwfn = QED_LEADING_HWFN(cdev); drv_version.version = (params->drv_major << 24) | @@ -884,6 +950,8 @@ static int qed_slowpath_start(struct qed_dev *cdev, return 0; +err3: + qed_hw_stop(cdev); err2: qed_hw_timers_stop_all(cdev); if (IS_PF(cdev)) @@ -906,6 +974,8 @@ static int qed_slowpath_stop(struct qed_dev *cdev) if (!cdev) return -ENODEV; + qed_ll2_dealloc_if(cdev); + if (IS_PF(cdev)) { qed_free_stream_mem(cdev); if (IS_QED_ETH_IF(cdev)) @@ -974,8 +1044,7 @@ static u32 qed_sb_init(struct qed_dev *cdev, } static u32 qed_sb_release(struct qed_dev *cdev, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { struct qed_hwfn *p_hwfn; int hwfn_index; @@ -1025,20 +1094,23 @@ static int qed_set_link(struct qed_dev *cdev, struct qed_link_params *params) link_params->speed.autoneg = params->autoneg; if (params->override_flags & QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS) { link_params->speed.advertised_speeds = 0; - if ((params->adv_speeds & SUPPORTED_1000baseT_Half) || - (params->adv_speeds & SUPPORTED_1000baseT_Full)) + if ((params->adv_speeds & QED_LM_1000baseT_Half_BIT) || + (params->adv_speeds & QED_LM_1000baseT_Full_BIT)) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; - if (params->adv_speeds & SUPPORTED_10000baseKR_Full) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G; + if (params->adv_speeds & QED_LM_10000baseKR_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; - if (params->adv_speeds & SUPPORTED_40000baseLR4_Full) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G; + if (params->adv_speeds & QED_LM_25000baseKR_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; - if (params->adv_speeds & 0) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G; + if (params->adv_speeds & QED_LM_40000baseLR4_Full_BIT) link_params->speed.advertised_speeds |= - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G; - if (params->adv_speeds & 0) + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G; + if (params->adv_speeds & QED_LM_50000baseKR2_Full_BIT) + link_params->speed.advertised_speeds |= + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G; + if (params->adv_speeds & QED_LM_100000baseKR4_Full_BIT) link_params->speed.advertised_speeds |= NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G; } @@ -1168,50 +1240,56 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->link_up = true; /* TODO - at the moment assume supported and advertised speed equal */ - if_link->supported_caps = SUPPORTED_FIBRE; + if_link->supported_caps = QED_LM_FIBRE_BIT; if (params.speed.autoneg) - if_link->supported_caps |= SUPPORTED_Autoneg; + if_link->supported_caps |= QED_LM_Autoneg_BIT; if (params.pause.autoneg || (params.pause.forced_rx && params.pause.forced_tx)) - if_link->supported_caps |= SUPPORTED_Asym_Pause; + if_link->supported_caps |= QED_LM_Asym_Pause_BIT; if (params.pause.autoneg || params.pause.forced_rx || params.pause.forced_tx) - if_link->supported_caps |= SUPPORTED_Pause; + if_link->supported_caps |= QED_LM_Pause_BIT; if_link->advertised_caps = if_link->supported_caps; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->advertised_caps |= SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full; + if_link->advertised_caps |= QED_LM_1000baseT_Half_BIT | + QED_LM_1000baseT_Full_BIT; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->advertised_caps |= SUPPORTED_10000baseKR_Full; + if_link->advertised_caps |= QED_LM_10000baseKR_Full_BIT; if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->advertised_caps |= SUPPORTED_40000baseLR4_Full; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + if_link->advertised_caps |= QED_LM_25000baseKR_Full_BIT; if (params.speed.advertised_speeds & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->advertised_caps |= 0; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + if_link->advertised_caps |= QED_LM_40000baseLR4_Full_BIT; + if (params.speed.advertised_speeds & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + if_link->advertised_caps |= QED_LM_50000baseKR2_Full_BIT; if (params.speed.advertised_speeds & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->advertised_caps |= 0; + if_link->advertised_caps |= QED_LM_100000baseKR4_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_1G) - if_link->supported_caps |= SUPPORTED_1000baseT_Half | - SUPPORTED_1000baseT_Full; + if_link->supported_caps |= QED_LM_1000baseT_Half_BIT | + QED_LM_1000baseT_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_10G) - if_link->supported_caps |= SUPPORTED_10000baseKR_Full; + if_link->supported_caps |= QED_LM_10000baseKR_Full_BIT; if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) - if_link->supported_caps |= SUPPORTED_40000baseLR4_Full; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_25G) + if_link->supported_caps |= QED_LM_25000baseKR_Full_BIT; if (link_caps.speed_capabilities & - NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) - if_link->supported_caps |= 0; + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_40G) + if_link->supported_caps |= QED_LM_40000baseLR4_Full_BIT; + if (link_caps.speed_capabilities & + NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_50G) + if_link->supported_caps |= QED_LM_50000baseKR2_Full_BIT; if (link_caps.speed_capabilities & NVM_CFG1_PORT_DRV_SPEED_CAPABILITY_MASK_BB_100G) - if_link->supported_caps |= 0; + if_link->supported_caps |= QED_LM_100000baseKR4_Full_BIT; if (link.link_up) if_link->speed = link.speed; @@ -1231,33 +1309,29 @@ static void qed_fill_link(struct qed_hwfn *hwfn, if_link->pause_config |= QED_LINK_PAUSE_TX_ENABLE; /* Link partner capabilities */ - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_1G_HD) - if_link->lp_caps |= SUPPORTED_1000baseT_Half; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_1G_FD) - if_link->lp_caps |= SUPPORTED_1000baseT_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_10G) - if_link->lp_caps |= SUPPORTED_10000baseKR_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_40G) - if_link->lp_caps |= SUPPORTED_40000baseLR4_Full; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_50G) - if_link->lp_caps |= 0; - if (link.partner_adv_speed & - QED_LINK_PARTNER_SPEED_100G) - if_link->lp_caps |= 0; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_HD) + if_link->lp_caps |= QED_LM_1000baseT_Half_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_1G_FD) + if_link->lp_caps |= QED_LM_1000baseT_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_10G) + if_link->lp_caps |= QED_LM_10000baseKR_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_25G) + if_link->lp_caps |= QED_LM_25000baseKR_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_40G) + if_link->lp_caps |= QED_LM_40000baseLR4_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_50G) + if_link->lp_caps |= QED_LM_50000baseKR2_Full_BIT; + if (link.partner_adv_speed & QED_LINK_PARTNER_SPEED_100G) + if_link->lp_caps |= QED_LM_100000baseKR4_Full_BIT; if (link.an_complete) - if_link->lp_caps |= SUPPORTED_Autoneg; + if_link->lp_caps |= QED_LM_Autoneg_BIT; if (link.partner_adv_pause) - if_link->lp_caps |= SUPPORTED_Pause; + if_link->lp_caps |= QED_LM_Pause_BIT; if (link.partner_adv_pause == QED_LINK_PARTNER_ASYMMETRIC_PAUSE || link.partner_adv_pause == QED_LINK_PARTNER_BOTH_PAUSE) - if_link->lp_caps |= SUPPORTED_Asym_Pause; + if_link->lp_caps |= QED_LM_Asym_Pause_BIT; } static void qed_get_current_link(struct qed_dev *cdev, @@ -1385,9 +1459,32 @@ const struct qed_common_ops qed_common_ops_pass = { .get_link = &qed_get_current_link, .drain = &qed_drain, .update_msglvl = &qed_init_dp, + .dbg_all_data = &qed_dbg_all_data, + .dbg_all_data_size = &qed_dbg_all_data_size, .chain_alloc = &qed_chain_alloc, .chain_free = &qed_chain_free, .get_coalesce = &qed_get_coalesce, .set_coalesce = &qed_set_coalesce, .set_led = &qed_set_led, }; + +void qed_get_protocol_stats(struct qed_dev *cdev, + enum qed_mcp_protocol_type type, + union qed_mcp_protocol_stats *stats) +{ + struct qed_eth_stats eth_stats; + + memset(stats, 0, sizeof(*stats)); + + switch (type) { + case QED_MCP_LAN_STATS: + qed_get_vport_stats(cdev, ð_stats); + stats->lan_stats.ucast_rx_pkts = eth_stats.rx_ucast_pkts; + stats->lan_stats.ucast_tx_pkts = eth_stats.tx_ucast_pkts; + stats->lan_stats.fcs_err = -1; + break; + default: + DP_ERR(cdev, "Invalid protocol type = %d\n", type); + return; + } +} diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c index f776a77794c5..bdc9ba92f6d4 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c @@ -54,8 +54,7 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn) return true; } -void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, PUBLIC_PORT); @@ -68,8 +67,7 @@ void qed_mcp_cmd_port_init(struct qed_hwfn *p_hwfn, p_hwfn->mcp_info->port_addr, MFW_PORT(p_hwfn)); } -void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +void qed_mcp_read_mb(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 length = MFW_DRV_MSG_MAX_DWORDS(p_hwfn->mcp_info->mfw_mb_length); u32 tmp, i; @@ -99,8 +97,7 @@ int qed_mcp_free(struct qed_hwfn *p_hwfn) return 0; } -static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info = p_hwfn->mcp_info; u32 drv_mb_offsize, mfw_mb_offsize; @@ -143,8 +140,7 @@ static int qed_load_mcp_offsets(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_info *p_info; u32 size; @@ -165,9 +161,7 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, size = MFW_DRV_MSG_MAX_DWORDS(p_info->mfw_mb_length) * sizeof(u32); p_info->mfw_mb_cur = kzalloc(size, GFP_KERNEL); - p_info->mfw_mb_shadow = - kzalloc(sizeof(u32) * MFW_DRV_MSG_MAX_DWORDS( - p_info->mfw_mb_length), GFP_KERNEL); + p_info->mfw_mb_shadow = kzalloc(size, GFP_KERNEL); if (!p_info->mfw_mb_shadow || !p_info->mfw_mb_addr) goto err; @@ -177,7 +171,6 @@ int qed_mcp_cmd_init(struct qed_hwfn *p_hwfn, return 0; err: - DP_NOTICE(p_hwfn, "Failed to allocate mcp memory\n"); qed_mcp_free(p_hwfn); return -ENOMEM; } @@ -189,8 +182,7 @@ err: * access is achieved by setting a blocking flag, which will fail other * competing contexts to send their mailboxes. */ -static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, - u32 cmd) +static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, u32 cmd) { spin_lock_bh(&p_hwfn->mcp_info->lock); @@ -221,15 +213,13 @@ static int qed_mcp_mb_lock(struct qed_hwfn *p_hwfn, return 0; } -static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, - u32 cmd) +static void qed_mcp_mb_unlock(struct qed_hwfn *p_hwfn, u32 cmd) { if (cmd != DRV_MSG_CODE_LOAD_REQ && cmd != DRV_MSG_CODE_UNLOAD_REQ) spin_unlock_bh(&p_hwfn->mcp_info->lock); } -int qed_mcp_reset(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 seq = ++p_hwfn->mcp_info->drv_mb_seq; u8 delay = CHIP_MCP_RESP_ITER_US; @@ -326,7 +316,8 @@ static int qed_do_mcp_cmd(struct qed_hwfn *p_hwfn, *o_mcp_param = DRV_MB_RD(p_hwfn, p_ptt, fw_mb_param); } else { /* FW BUG! */ - DP_ERR(p_hwfn, "MFW failed to respond!\n"); + DP_ERR(p_hwfn, "MFW failed to respond [cmd 0x%x param 0x%x]\n", + cmd, param); *o_mcp_resp = 0; rc = -EAGAIN; } @@ -342,7 +333,7 @@ static int qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn, /* MCP not initialized */ if (!qed_mcp_is_init(p_hwfn)) { - DP_NOTICE(p_hwfn, "MFW is not initialized !\n"); + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); return -EBUSY; } @@ -398,9 +389,36 @@ int qed_mcp_cmd(struct qed_hwfn *p_hwfn, return 0; } +int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf) +{ + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + int rc; + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = cmd; + mb_params.param = param; + mb_params.p_data_dst = &union_data; + rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); + if (rc) + return rc; + + *o_mcp_resp = mb_params.mcp_resp; + *o_mcp_param = mb_params.mcp_param; + + *o_txn_size = *o_mcp_param; + memcpy(o_buf, &union_data.raw_data, *o_txn_size); + + return 0; +} + int qed_mcp_load_req(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *p_load_code) + struct qed_ptt *p_ptt, u32 *p_load_code) { struct qed_dev *cdev = p_hwfn->cdev; struct qed_mcp_mb_params mb_params; @@ -527,8 +545,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn, "Received transceiver state update [0x%08x] from mfw [Addr 0x%x]\n", transceiver_state, (u32)(p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, - transceiver_data))); + offsetof(struct public_port, transceiver_data))); transceiver_state = GET_FIELD(transceiver_state, ETH_TRANSCEIVER_STATE); @@ -540,8 +557,7 @@ static void qed_mcp_handle_transceiver_change(struct qed_hwfn *p_hwfn, } static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - bool b_reset) + struct qed_ptt *p_ptt, bool b_reset) { struct qed_mcp_link_state *p_link; u8 max_bw, min_bw; @@ -557,8 +573,7 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, "Received link update [0x%08x] from mfw [Addr 0x%x]\n", status, (u32)(p_hwfn->mcp_info->port_addr + - offsetof(struct public_port, - link_status))); + offsetof(struct public_port, link_status))); } else { DP_VERBOSE(p_hwfn, NETIF_MSG_LINK, "Resetting link indications\n"); @@ -634,6 +649,9 @@ static void qed_mcp_handle_link_change(struct qed_hwfn *p_hwfn, p_link->partner_adv_speed |= (status & LINK_STATUS_LINK_PARTNER_20G_CAPABLE) ? QED_LINK_PARTNER_SPEED_20G : 0; + p_link->partner_adv_speed |= + (status & LINK_STATUS_LINK_PARTNER_25G_CAPABLE) ? + QED_LINK_PARTNER_SPEED_25G : 0; p_link->partner_adv_speed |= (status & LINK_STATUS_LINK_PARTNER_40G_CAPABLE) ? QED_LINK_PARTNER_SPEED_40G : 0; @@ -722,6 +740,48 @@ int qed_mcp_set_link(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, bool b_up) return 0; } +static void qed_mcp_send_protocol_stats(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + enum MFW_DRV_MSG_TYPE type) +{ + enum qed_mcp_protocol_type stats_type; + union qed_mcp_protocol_stats stats; + struct qed_mcp_mb_params mb_params; + union drv_union_data union_data; + u32 hsi_param; + + switch (type) { + case MFW_DRV_MSG_GET_LAN_STATS: + stats_type = QED_MCP_LAN_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_LAN; + break; + case MFW_DRV_MSG_GET_FCOE_STATS: + stats_type = QED_MCP_FCOE_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_FCOE; + break; + case MFW_DRV_MSG_GET_ISCSI_STATS: + stats_type = QED_MCP_ISCSI_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_ISCSI; + break; + case MFW_DRV_MSG_GET_RDMA_STATS: + stats_type = QED_MCP_RDMA_STATS; + hsi_param = DRV_MSG_CODE_STATS_TYPE_RDMA; + break; + default: + DP_NOTICE(p_hwfn, "Invalid protocol type %d\n", type); + return; + } + + qed_get_protocol_stats(p_hwfn->cdev, stats_type, &stats); + + memset(&mb_params, 0, sizeof(mb_params)); + mb_params.cmd = DRV_MSG_CODE_GET_STATS; + mb_params.param = hsi_param; + memcpy(&union_data, &stats, sizeof(stats)); + mb_params.p_data_src = &union_data; + qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params); +} + static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn, struct public_func *p_shmem_info) { @@ -752,8 +812,7 @@ static void qed_read_pf_bandwidth(struct qed_hwfn *p_hwfn, static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct public_func *p_data, - int pfid) + struct public_func *p_data, int pfid) { u32 addr = SECTION_OFFSIZE_ADDR(p_hwfn->mcp_info->public_base, PUBLIC_FUNC); @@ -763,51 +822,20 @@ static u32 qed_mcp_get_shmem_func(struct qed_hwfn *p_hwfn, memset(p_data, 0, sizeof(*p_data)); - size = min_t(u32, sizeof(*p_data), - QED_SECTION_SIZE(mfw_path_offsize)); + size = min_t(u32, sizeof(*p_data), QED_SECTION_SIZE(mfw_path_offsize)); for (i = 0; i < size / sizeof(u32); i++) ((u32 *)p_data)[i] = qed_rd(p_hwfn, p_ptt, func_addr + (i << 2)); return size; } -int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_pf) -{ - struct public_func shmem_info; - int i; - - /* Find first Ethernet interface in port */ - for (i = 0; i < NUM_OF_ENG_PFS(p_hwfn->cdev); - i += p_hwfn->cdev->num_ports_in_engines) { - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID_BY_REL(p_hwfn, i)); - - if (shmem_info.config & FUNC_MF_CFG_FUNC_HIDE) - continue; - - if ((shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK) == - FUNC_MF_CFG_PROTOCOL_ETHERNET) { - *p_pf = (u8)i; - return 0; - } - } - - DP_NOTICE(p_hwfn, - "Failed to find on port an ethernet interface in MF_SI mode\n"); - - return -EINVAL; -} - -static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +static void qed_mcp_update_bw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { struct qed_mcp_function_info *p_info; struct public_func shmem_info; u32 resp = 0, param = 0; - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID(p_hwfn)); + qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn)); qed_read_pf_bandwidth(p_hwfn, &shmem_info); @@ -867,6 +895,12 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn, case MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE: qed_mcp_handle_transceiver_change(p_hwfn, p_ptt); break; + case MFW_DRV_MSG_GET_LAN_STATS: + case MFW_DRV_MSG_GET_FCOE_STATS: + case MFW_DRV_MSG_GET_ISCSI_STATS: + case MFW_DRV_MSG_GET_RDMA_STATS: + qed_mcp_send_protocol_stats(p_hwfn, p_ptt, i); + break; case MFW_DRV_MSG_BW_UPDATE: qed_mcp_update_bw(p_hwfn, p_ptt); break; @@ -940,8 +974,7 @@ int qed_mcp_get_mfw_ver(struct qed_hwfn *p_hwfn, return 0; } -int qed_mcp_get_media_type(struct qed_dev *cdev, - u32 *p_media_type) +int qed_mcp_get_media_type(struct qed_dev *cdev, u32 *p_media_type) { struct qed_hwfn *p_hwfn = &cdev->hwfns[0]; struct qed_ptt *p_ptt; @@ -950,7 +983,7 @@ int qed_mcp_get_media_type(struct qed_dev *cdev, return -EINVAL; if (!qed_mcp_is_init(p_hwfn)) { - DP_NOTICE(p_hwfn, "MFW is not initialized !\n"); + DP_NOTICE(p_hwfn, "MFW is not initialized!\n"); return -EBUSY; } @@ -1003,15 +1036,13 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, struct qed_mcp_function_info *info; struct public_func shmem_info; - qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, - MCP_PF_ID(p_hwfn)); + qed_mcp_get_shmem_func(p_hwfn, p_ptt, &shmem_info, MCP_PF_ID(p_hwfn)); info = &p_hwfn->mcp_info->func_info; info->pause_on_host = (shmem_info.config & FUNC_MF_CFG_PAUSE_ON_HOST_RING) ? 1 : 0; - if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, - &info->protocol)) { + if (qed_mcp_get_shmem_proto(p_hwfn, &shmem_info, &info->protocol)) { DP_ERR(p_hwfn, "Unknown personality %08x\n", (u32)(shmem_info.config & FUNC_MF_CFG_PROTOCOL_MASK)); return -EINVAL; @@ -1072,15 +1103,13 @@ struct qed_mcp_link_capabilities return &p_hwfn->mcp_info->link_capabilities; } -int qed_mcp_drain(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt) +int qed_mcp_drain(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 resp = 0, param = 0; int rc; rc = qed_mcp_cmd(p_hwfn, p_ptt, - DRV_MSG_CODE_NIG_DRAIN, 1000, - &resp, ¶m); + DRV_MSG_CODE_NIG_DRAIN, 1000, &resp, ¶m); /* Wait for the drain to complete before returning */ msleep(1020); @@ -1089,8 +1118,7 @@ int qed_mcp_drain(struct qed_hwfn *p_hwfn, } int qed_mcp_get_flash_size(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, - u32 *p_flash_size) + struct qed_ptt *p_ptt, u32 *p_flash_size) { u32 flash_size; @@ -1168,8 +1196,35 @@ qed_mcp_send_drv_version(struct qed_hwfn *p_hwfn, return rc; } -int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - enum qed_led_mode mode) +int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MCP_HALT, 0, &resp, + ¶m); + if (rc) + DP_ERR(p_hwfn, "MCP response failure, aborting\n"); + + return rc; +} + +int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 value, cpu_mode; + + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_STATE, 0xffffffff); + + value = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + value &= ~MCP_REG_CPU_MODE_SOFT_HALT; + qed_wr(p_hwfn, p_ptt, MCP_REG_CPU_MODE, value); + cpu_mode = qed_rd(p_hwfn, p_ptt, MCP_REG_CPU_MODE); + + return (cpu_mode & MCP_REG_CPU_MODE_SOFT_HALT) ? -EAGAIN : 0; +} + +int qed_mcp_set_led(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, enum qed_led_mode mode) { u32 resp = 0, param = 0, drv_mb_param; int rc; @@ -1195,6 +1250,27 @@ int qed_mcp_set_led(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, return rc; } +int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 mask_parities) +{ + u32 resp = 0, param = 0; + int rc; + + rc = qed_mcp_cmd(p_hwfn, p_ptt, DRV_MSG_CODE_MASK_PARITIES, + mask_parities, &resp, ¶m); + + if (rc) { + DP_ERR(p_hwfn, + "MCP response failure for mask parities, aborting\n"); + } else if (resp != FW_MSG_CODE_OK) { + DP_ERR(p_hwfn, + "MCP did not acknowledge mask parity request. Old MFW?\n"); + rc = -EINVAL; + } + + return rc; +} + int qed_mcp_bist_register_test(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 drv_mb_param = 0, rsp, param; diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h index 7f319aa1b229..dff520ed069b 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h @@ -60,9 +60,10 @@ struct qed_mcp_link_state { #define QED_LINK_PARTNER_SPEED_1G_FD BIT(1) #define QED_LINK_PARTNER_SPEED_10G BIT(2) #define QED_LINK_PARTNER_SPEED_20G BIT(3) -#define QED_LINK_PARTNER_SPEED_40G BIT(4) -#define QED_LINK_PARTNER_SPEED_50G BIT(5) -#define QED_LINK_PARTNER_SPEED_100G BIT(6) +#define QED_LINK_PARTNER_SPEED_25G BIT(4) +#define QED_LINK_PARTNER_SPEED_40G BIT(5) +#define QED_LINK_PARTNER_SPEED_50G BIT(6) +#define QED_LINK_PARTNER_SPEED_100G BIT(7) u32 partner_adv_speed; bool partner_tx_flow_ctrl_en; @@ -105,6 +106,47 @@ struct qed_mcp_drv_version { u8 name[MCP_DRV_VER_STR_SIZE - 4]; }; +struct qed_mcp_lan_stats { + u64 ucast_rx_pkts; + u64 ucast_tx_pkts; + u32 fcs_err; +}; + +struct qed_mcp_fcoe_stats { + u64 rx_pkts; + u64 tx_pkts; + u32 fcs_err; + u32 login_failure; +}; + +struct qed_mcp_iscsi_stats { + u64 rx_pdus; + u64 tx_pdus; + u64 rx_bytes; + u64 tx_bytes; +}; + +struct qed_mcp_rdma_stats { + u64 rx_pkts; + u64 tx_pkts; + u64 rx_bytes; + u64 tx_byts; +}; + +enum qed_mcp_protocol_type { + QED_MCP_LAN_STATS, + QED_MCP_FCOE_STATS, + QED_MCP_ISCSI_STATS, + QED_MCP_RDMA_STATS +}; + +union qed_mcp_protocol_stats { + struct qed_mcp_lan_stats lan_stats; + struct qed_mcp_fcoe_stats fcoe_stats; + struct qed_mcp_iscsi_stats iscsi_stats; + struct qed_mcp_rdma_stats rdma_stats; +}; + /** * @brief - returns the link params of the hw function * @@ -425,6 +467,29 @@ int qed_mcp_fill_shmem_func_info(struct qed_hwfn *p_hwfn, int qed_mcp_reset(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +/** + * @brief - Sends an NVM read command request to the MFW to get + * a buffer. + * + * @param p_hwfn + * @param p_ptt + * @param cmd - Command: DRV_MSG_CODE_NVM_GET_FILE_DATA or + * DRV_MSG_CODE_NVM_READ_NVRAM commands + * @param param - [0:23] - Offset [24:31] - Size + * @param o_mcp_resp - MCP response + * @param o_mcp_param - MCP response param + * @param o_txn_size - Buffer size output + * @param o_buf - Pointer to the buffer returned by the MFW. + * + * @param return 0 upon success. + */ +int qed_mcp_nvm_rd_cmd(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + u32 cmd, + u32 param, + u32 *o_mcp_resp, + u32 *o_mcp_param, u32 *o_txn_size, u32 *o_buf); + /** * @brief indicates whether the MFW objects [under mcp_info] are accessible * @@ -447,6 +512,26 @@ bool qed_mcp_is_init(struct qed_hwfn *p_hwfn); int qed_mcp_config_vf_msix(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, u8 vf_id, u8 num); +/** + * @brief - Halt the MCP. + * + * @param p_hwfn + * @param p_ptt + * + * @param return 0 upon success. + */ +int qed_mcp_halt(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + +/** + * @brief - Wake up the MCP. + * + * @param p_hwfn + * @param p_ptt + * + * @param return 0 upon success. + */ +int qed_mcp_resume(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); + int qed_configure_pf_min_bandwidth(struct qed_dev *cdev, u8 min_bw); int qed_configure_pf_max_bandwidth(struct qed_dev *cdev, u8 max_bw); int __qed_configure_pf_max_bandwidth(struct qed_hwfn *p_hwfn, @@ -458,6 +543,7 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn, struct qed_mcp_link_state *p_link, u8 min_bw); -int qed_hw_init_first_eth(struct qed_hwfn *p_hwfn, - struct qed_ptt *p_ptt, u8 *p_pf); +int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, u32 mask_parities); + #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h index f6b86ca1ff79..b414a0542177 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h +++ b/drivers/net/ethernet/qlogic/qed/qed_reg_addr.h @@ -116,8 +116,14 @@ 0x1009c4UL #define QM_REG_PF_EN \ 0x2f2ea4UL +#define TCFC_REG_WEAK_ENABLE_VF \ + 0x2d0704UL #define TCFC_REG_STRONG_ENABLE_PF \ 0x2d0708UL +#define TCFC_REG_STRONG_ENABLE_VF \ + 0x2d070cUL +#define CCFC_REG_WEAK_ENABLE_VF \ + 0x2e0704UL #define CCFC_REG_STRONG_ENABLE_PF \ 0x2e0708UL #define PGLUE_B_REG_PGL_ADDR_88_F0 \ @@ -202,6 +208,26 @@ 0x50196cUL #define NIG_REG_LLH_CLS_TYPE_DUALMODE \ 0x501964UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE \ + 0x501a00UL +#define NIG_REG_LLH_FUNC_FILTER_VALUE_SIZE \ + 32 +#define NIG_REG_LLH_FUNC_FILTER_EN \ + 0x501a80UL +#define NIG_REG_LLH_FUNC_FILTER_EN_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_MODE \ + 0x501ac0UL +#define NIG_REG_LLH_FUNC_FILTER_MODE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE \ + 0x501b00UL +#define NIG_REG_LLH_FUNC_FILTER_PROTOCOL_TYPE_SIZE \ + 16 +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL \ + 0x501b40UL +#define NIG_REG_LLH_FUNC_FILTER_HDR_SEL_SIZE \ + 16 #define NCSI_REG_CONFIG \ 0x040200UL #define PBF_REG_INIT \ @@ -258,6 +284,8 @@ 0x1f0a1cUL #define PRS_REG_ROCE_DEST_QP_MAX_PF \ 0x1f0430UL +#define PRS_REG_USE_LIGHT_L2 \ + 0x1f096cUL #define PSDM_REG_ENABLE_IN1 \ 0xfa0004UL #define PSEM_REG_ENABLE_IN \ @@ -521,4 +549,910 @@ #define QM_REG_WFQPFWEIGHT 0x2f4e80UL #define QM_REG_WFQVPWEIGHT 0x2fa000UL + +#define PGLCS_REG_DBG_SELECT \ + 0x001d14UL +#define PGLCS_REG_DBG_DWORD_ENABLE \ + 0x001d18UL +#define PGLCS_REG_DBG_SHIFT \ + 0x001d1cUL +#define PGLCS_REG_DBG_FORCE_VALID \ + 0x001d20UL +#define PGLCS_REG_DBG_FORCE_FRAME \ + 0x001d24UL +#define MISC_REG_RESET_PL_PDA_VMAIN_1 \ + 0x008070UL +#define MISC_REG_RESET_PL_PDA_VMAIN_2 \ + 0x008080UL +#define MISC_REG_RESET_PL_PDA_VAUX \ + 0x008090UL +#define MISCS_REG_RESET_PL_UA \ + 0x009050UL +#define MISCS_REG_RESET_PL_HV \ + 0x009060UL +#define MISCS_REG_RESET_PL_HV_2 \ + 0x009150UL +#define DMAE_REG_DBG_SELECT \ + 0x00c510UL +#define DMAE_REG_DBG_DWORD_ENABLE \ + 0x00c514UL +#define DMAE_REG_DBG_SHIFT \ + 0x00c518UL +#define DMAE_REG_DBG_FORCE_VALID \ + 0x00c51cUL +#define DMAE_REG_DBG_FORCE_FRAME \ + 0x00c520UL +#define NCSI_REG_DBG_SELECT \ + 0x040474UL +#define NCSI_REG_DBG_DWORD_ENABLE \ + 0x040478UL +#define NCSI_REG_DBG_SHIFT \ + 0x04047cUL +#define NCSI_REG_DBG_FORCE_VALID \ + 0x040480UL +#define NCSI_REG_DBG_FORCE_FRAME \ + 0x040484UL +#define GRC_REG_DBG_SELECT \ + 0x0500a4UL +#define GRC_REG_DBG_DWORD_ENABLE \ + 0x0500a8UL +#define GRC_REG_DBG_SHIFT \ + 0x0500acUL +#define GRC_REG_DBG_FORCE_VALID \ + 0x0500b0UL +#define GRC_REG_DBG_FORCE_FRAME \ + 0x0500b4UL +#define UMAC_REG_DBG_SELECT \ + 0x051094UL +#define UMAC_REG_DBG_DWORD_ENABLE \ + 0x051098UL +#define UMAC_REG_DBG_SHIFT \ + 0x05109cUL +#define UMAC_REG_DBG_FORCE_VALID \ + 0x0510a0UL +#define UMAC_REG_DBG_FORCE_FRAME \ + 0x0510a4UL +#define MCP2_REG_DBG_SELECT \ + 0x052400UL +#define MCP2_REG_DBG_DWORD_ENABLE \ + 0x052404UL +#define MCP2_REG_DBG_SHIFT \ + 0x052408UL +#define MCP2_REG_DBG_FORCE_VALID \ + 0x052440UL +#define MCP2_REG_DBG_FORCE_FRAME \ + 0x052444UL +#define PCIE_REG_DBG_SELECT \ + 0x0547e8UL +#define PCIE_REG_DBG_DWORD_ENABLE \ + 0x0547ecUL +#define PCIE_REG_DBG_SHIFT \ + 0x0547f0UL +#define PCIE_REG_DBG_FORCE_VALID \ + 0x0547f4UL +#define PCIE_REG_DBG_FORCE_FRAME \ + 0x0547f8UL +#define DORQ_REG_DBG_SELECT \ + 0x100ad0UL +#define DORQ_REG_DBG_DWORD_ENABLE \ + 0x100ad4UL +#define DORQ_REG_DBG_SHIFT \ + 0x100ad8UL +#define DORQ_REG_DBG_FORCE_VALID \ + 0x100adcUL +#define DORQ_REG_DBG_FORCE_FRAME \ + 0x100ae0UL +#define IGU_REG_DBG_SELECT \ + 0x181578UL +#define IGU_REG_DBG_DWORD_ENABLE \ + 0x18157cUL +#define IGU_REG_DBG_SHIFT \ + 0x181580UL +#define IGU_REG_DBG_FORCE_VALID \ + 0x181584UL +#define IGU_REG_DBG_FORCE_FRAME \ + 0x181588UL +#define CAU_REG_DBG_SELECT \ + 0x1c0ea8UL +#define CAU_REG_DBG_DWORD_ENABLE \ + 0x1c0eacUL +#define CAU_REG_DBG_SHIFT \ + 0x1c0eb0UL +#define CAU_REG_DBG_FORCE_VALID \ + 0x1c0eb4UL +#define CAU_REG_DBG_FORCE_FRAME \ + 0x1c0eb8UL +#define PRS_REG_DBG_SELECT \ + 0x1f0b6cUL +#define PRS_REG_DBG_DWORD_ENABLE \ + 0x1f0b70UL +#define PRS_REG_DBG_SHIFT \ + 0x1f0b74UL +#define PRS_REG_DBG_FORCE_VALID \ + 0x1f0ba0UL +#define PRS_REG_DBG_FORCE_FRAME \ + 0x1f0ba4UL +#define CNIG_REG_DBG_SELECT_K2 \ + 0x218254UL +#define CNIG_REG_DBG_DWORD_ENABLE_K2 \ + 0x218258UL +#define CNIG_REG_DBG_SHIFT_K2 \ + 0x21825cUL +#define CNIG_REG_DBG_FORCE_VALID_K2 \ + 0x218260UL +#define CNIG_REG_DBG_FORCE_FRAME_K2 \ + 0x218264UL +#define PRM_REG_DBG_SELECT \ + 0x2306a8UL +#define PRM_REG_DBG_DWORD_ENABLE \ + 0x2306acUL +#define PRM_REG_DBG_SHIFT \ + 0x2306b0UL +#define PRM_REG_DBG_FORCE_VALID \ + 0x2306b4UL +#define PRM_REG_DBG_FORCE_FRAME \ + 0x2306b8UL +#define SRC_REG_DBG_SELECT \ + 0x238700UL +#define SRC_REG_DBG_DWORD_ENABLE \ + 0x238704UL +#define SRC_REG_DBG_SHIFT \ + 0x238708UL +#define SRC_REG_DBG_FORCE_VALID \ + 0x23870cUL +#define SRC_REG_DBG_FORCE_FRAME \ + 0x238710UL +#define RSS_REG_DBG_SELECT \ + 0x238c4cUL +#define RSS_REG_DBG_DWORD_ENABLE \ + 0x238c50UL +#define RSS_REG_DBG_SHIFT \ + 0x238c54UL +#define RSS_REG_DBG_FORCE_VALID \ + 0x238c58UL +#define RSS_REG_DBG_FORCE_FRAME \ + 0x238c5cUL +#define RPB_REG_DBG_SELECT \ + 0x23c728UL +#define RPB_REG_DBG_DWORD_ENABLE \ + 0x23c72cUL +#define RPB_REG_DBG_SHIFT \ + 0x23c730UL +#define RPB_REG_DBG_FORCE_VALID \ + 0x23c734UL +#define RPB_REG_DBG_FORCE_FRAME \ + 0x23c738UL +#define PSWRQ2_REG_DBG_SELECT \ + 0x240100UL +#define PSWRQ2_REG_DBG_DWORD_ENABLE \ + 0x240104UL +#define PSWRQ2_REG_DBG_SHIFT \ + 0x240108UL +#define PSWRQ2_REG_DBG_FORCE_VALID \ + 0x24010cUL +#define PSWRQ2_REG_DBG_FORCE_FRAME \ + 0x240110UL +#define PSWRQ_REG_DBG_SELECT \ + 0x280020UL +#define PSWRQ_REG_DBG_DWORD_ENABLE \ + 0x280024UL +#define PSWRQ_REG_DBG_SHIFT \ + 0x280028UL +#define PSWRQ_REG_DBG_FORCE_VALID \ + 0x28002cUL +#define PSWRQ_REG_DBG_FORCE_FRAME \ + 0x280030UL +#define PSWWR_REG_DBG_SELECT \ + 0x29a084UL +#define PSWWR_REG_DBG_DWORD_ENABLE \ + 0x29a088UL +#define PSWWR_REG_DBG_SHIFT \ + 0x29a08cUL +#define PSWWR_REG_DBG_FORCE_VALID \ + 0x29a090UL +#define PSWWR_REG_DBG_FORCE_FRAME \ + 0x29a094UL +#define PSWRD_REG_DBG_SELECT \ + 0x29c040UL +#define PSWRD_REG_DBG_DWORD_ENABLE \ + 0x29c044UL +#define PSWRD_REG_DBG_SHIFT \ + 0x29c048UL +#define PSWRD_REG_DBG_FORCE_VALID \ + 0x29c04cUL +#define PSWRD_REG_DBG_FORCE_FRAME \ + 0x29c050UL +#define PSWRD2_REG_DBG_SELECT \ + 0x29d400UL +#define PSWRD2_REG_DBG_DWORD_ENABLE \ + 0x29d404UL +#define PSWRD2_REG_DBG_SHIFT \ + 0x29d408UL +#define PSWRD2_REG_DBG_FORCE_VALID \ + 0x29d40cUL +#define PSWRD2_REG_DBG_FORCE_FRAME \ + 0x29d410UL +#define PSWHST2_REG_DBG_SELECT \ + 0x29e058UL +#define PSWHST2_REG_DBG_DWORD_ENABLE \ + 0x29e05cUL +#define PSWHST2_REG_DBG_SHIFT \ + 0x29e060UL +#define PSWHST2_REG_DBG_FORCE_VALID \ + 0x29e064UL +#define PSWHST2_REG_DBG_FORCE_FRAME \ + 0x29e068UL +#define PSWHST_REG_DBG_SELECT \ + 0x2a0100UL +#define PSWHST_REG_DBG_DWORD_ENABLE \ + 0x2a0104UL +#define PSWHST_REG_DBG_SHIFT \ + 0x2a0108UL +#define PSWHST_REG_DBG_FORCE_VALID \ + 0x2a010cUL +#define PSWHST_REG_DBG_FORCE_FRAME \ + 0x2a0110UL +#define PGLUE_B_REG_DBG_SELECT \ + 0x2a8400UL +#define PGLUE_B_REG_DBG_DWORD_ENABLE \ + 0x2a8404UL +#define PGLUE_B_REG_DBG_SHIFT \ + 0x2a8408UL +#define PGLUE_B_REG_DBG_FORCE_VALID \ + 0x2a840cUL +#define PGLUE_B_REG_DBG_FORCE_FRAME \ + 0x2a8410UL +#define TM_REG_DBG_SELECT \ + 0x2c07a8UL +#define TM_REG_DBG_DWORD_ENABLE \ + 0x2c07acUL +#define TM_REG_DBG_SHIFT \ + 0x2c07b0UL +#define TM_REG_DBG_FORCE_VALID \ + 0x2c07b4UL +#define TM_REG_DBG_FORCE_FRAME \ + 0x2c07b8UL +#define TCFC_REG_DBG_SELECT \ + 0x2d0500UL +#define TCFC_REG_DBG_DWORD_ENABLE \ + 0x2d0504UL +#define TCFC_REG_DBG_SHIFT \ + 0x2d0508UL +#define TCFC_REG_DBG_FORCE_VALID \ + 0x2d050cUL +#define TCFC_REG_DBG_FORCE_FRAME \ + 0x2d0510UL +#define CCFC_REG_DBG_SELECT \ + 0x2e0500UL +#define CCFC_REG_DBG_DWORD_ENABLE \ + 0x2e0504UL +#define CCFC_REG_DBG_SHIFT \ + 0x2e0508UL +#define CCFC_REG_DBG_FORCE_VALID \ + 0x2e050cUL +#define CCFC_REG_DBG_FORCE_FRAME \ + 0x2e0510UL +#define QM_REG_DBG_SELECT \ + 0x2f2e74UL +#define QM_REG_DBG_DWORD_ENABLE \ + 0x2f2e78UL +#define QM_REG_DBG_SHIFT \ + 0x2f2e7cUL +#define QM_REG_DBG_FORCE_VALID \ + 0x2f2e80UL +#define QM_REG_DBG_FORCE_FRAME \ + 0x2f2e84UL +#define RDIF_REG_DBG_SELECT \ + 0x300500UL +#define RDIF_REG_DBG_DWORD_ENABLE \ + 0x300504UL +#define RDIF_REG_DBG_SHIFT \ + 0x300508UL +#define RDIF_REG_DBG_FORCE_VALID \ + 0x30050cUL +#define RDIF_REG_DBG_FORCE_FRAME \ + 0x300510UL +#define TDIF_REG_DBG_SELECT \ + 0x310500UL +#define TDIF_REG_DBG_DWORD_ENABLE \ + 0x310504UL +#define TDIF_REG_DBG_SHIFT \ + 0x310508UL +#define TDIF_REG_DBG_FORCE_VALID \ + 0x31050cUL +#define TDIF_REG_DBG_FORCE_FRAME \ + 0x310510UL +#define BRB_REG_DBG_SELECT \ + 0x340ed0UL +#define BRB_REG_DBG_DWORD_ENABLE \ + 0x340ed4UL +#define BRB_REG_DBG_SHIFT \ + 0x340ed8UL +#define BRB_REG_DBG_FORCE_VALID \ + 0x340edcUL +#define BRB_REG_DBG_FORCE_FRAME \ + 0x340ee0UL +#define XYLD_REG_DBG_SELECT \ + 0x4c1600UL +#define XYLD_REG_DBG_DWORD_ENABLE \ + 0x4c1604UL +#define XYLD_REG_DBG_SHIFT \ + 0x4c1608UL +#define XYLD_REG_DBG_FORCE_VALID \ + 0x4c160cUL +#define XYLD_REG_DBG_FORCE_FRAME \ + 0x4c1610UL +#define YULD_REG_DBG_SELECT \ + 0x4c9600UL +#define YULD_REG_DBG_DWORD_ENABLE \ + 0x4c9604UL +#define YULD_REG_DBG_SHIFT \ + 0x4c9608UL +#define YULD_REG_DBG_FORCE_VALID \ + 0x4c960cUL +#define YULD_REG_DBG_FORCE_FRAME \ + 0x4c9610UL +#define TMLD_REG_DBG_SELECT \ + 0x4d1600UL +#define TMLD_REG_DBG_DWORD_ENABLE \ + 0x4d1604UL +#define TMLD_REG_DBG_SHIFT \ + 0x4d1608UL +#define TMLD_REG_DBG_FORCE_VALID \ + 0x4d160cUL +#define TMLD_REG_DBG_FORCE_FRAME \ + 0x4d1610UL +#define MULD_REG_DBG_SELECT \ + 0x4e1600UL +#define MULD_REG_DBG_DWORD_ENABLE \ + 0x4e1604UL +#define MULD_REG_DBG_SHIFT \ + 0x4e1608UL +#define MULD_REG_DBG_FORCE_VALID \ + 0x4e160cUL +#define MULD_REG_DBG_FORCE_FRAME \ + 0x4e1610UL +#define NIG_REG_DBG_SELECT \ + 0x502140UL +#define NIG_REG_DBG_DWORD_ENABLE \ + 0x502144UL +#define NIG_REG_DBG_SHIFT \ + 0x502148UL +#define NIG_REG_DBG_FORCE_VALID \ + 0x50214cUL +#define NIG_REG_DBG_FORCE_FRAME \ + 0x502150UL +#define BMB_REG_DBG_SELECT \ + 0x540a7cUL +#define BMB_REG_DBG_DWORD_ENABLE \ + 0x540a80UL +#define BMB_REG_DBG_SHIFT \ + 0x540a84UL +#define BMB_REG_DBG_FORCE_VALID \ + 0x540a88UL +#define BMB_REG_DBG_FORCE_FRAME \ + 0x540a8cUL +#define PTU_REG_DBG_SELECT \ + 0x560100UL +#define PTU_REG_DBG_DWORD_ENABLE \ + 0x560104UL +#define PTU_REG_DBG_SHIFT \ + 0x560108UL +#define PTU_REG_DBG_FORCE_VALID \ + 0x56010cUL +#define PTU_REG_DBG_FORCE_FRAME \ + 0x560110UL +#define CDU_REG_DBG_SELECT \ + 0x580704UL +#define CDU_REG_DBG_DWORD_ENABLE \ + 0x580708UL +#define CDU_REG_DBG_SHIFT \ + 0x58070cUL +#define CDU_REG_DBG_FORCE_VALID \ + 0x580710UL +#define CDU_REG_DBG_FORCE_FRAME \ + 0x580714UL +#define WOL_REG_DBG_SELECT \ + 0x600140UL +#define WOL_REG_DBG_DWORD_ENABLE \ + 0x600144UL +#define WOL_REG_DBG_SHIFT \ + 0x600148UL +#define WOL_REG_DBG_FORCE_VALID \ + 0x60014cUL +#define WOL_REG_DBG_FORCE_FRAME \ + 0x600150UL +#define BMBN_REG_DBG_SELECT \ + 0x610140UL +#define BMBN_REG_DBG_DWORD_ENABLE \ + 0x610144UL +#define BMBN_REG_DBG_SHIFT \ + 0x610148UL +#define BMBN_REG_DBG_FORCE_VALID \ + 0x61014cUL +#define BMBN_REG_DBG_FORCE_FRAME \ + 0x610150UL +#define NWM_REG_DBG_SELECT \ + 0x8000ecUL +#define NWM_REG_DBG_DWORD_ENABLE \ + 0x8000f0UL +#define NWM_REG_DBG_SHIFT \ + 0x8000f4UL +#define NWM_REG_DBG_FORCE_VALID \ + 0x8000f8UL +#define NWM_REG_DBG_FORCE_FRAME \ + 0x8000fcUL +#define PBF_REG_DBG_SELECT \ + 0xd80060UL +#define PBF_REG_DBG_DWORD_ENABLE \ + 0xd80064UL +#define PBF_REG_DBG_SHIFT \ + 0xd80068UL +#define PBF_REG_DBG_FORCE_VALID \ + 0xd8006cUL +#define PBF_REG_DBG_FORCE_FRAME \ + 0xd80070UL +#define PBF_PB1_REG_DBG_SELECT \ + 0xda0728UL +#define PBF_PB1_REG_DBG_DWORD_ENABLE \ + 0xda072cUL +#define PBF_PB1_REG_DBG_SHIFT \ + 0xda0730UL +#define PBF_PB1_REG_DBG_FORCE_VALID \ + 0xda0734UL +#define PBF_PB1_REG_DBG_FORCE_FRAME \ + 0xda0738UL +#define PBF_PB2_REG_DBG_SELECT \ + 0xda4728UL +#define PBF_PB2_REG_DBG_DWORD_ENABLE \ + 0xda472cUL +#define PBF_PB2_REG_DBG_SHIFT \ + 0xda4730UL +#define PBF_PB2_REG_DBG_FORCE_VALID \ + 0xda4734UL +#define PBF_PB2_REG_DBG_FORCE_FRAME \ + 0xda4738UL +#define BTB_REG_DBG_SELECT \ + 0xdb08c8UL +#define BTB_REG_DBG_DWORD_ENABLE \ + 0xdb08ccUL +#define BTB_REG_DBG_SHIFT \ + 0xdb08d0UL +#define BTB_REG_DBG_FORCE_VALID \ + 0xdb08d4UL +#define BTB_REG_DBG_FORCE_FRAME \ + 0xdb08d8UL +#define XSDM_REG_DBG_SELECT \ + 0xf80e28UL +#define XSDM_REG_DBG_DWORD_ENABLE \ + 0xf80e2cUL +#define XSDM_REG_DBG_SHIFT \ + 0xf80e30UL +#define XSDM_REG_DBG_FORCE_VALID \ + 0xf80e34UL +#define XSDM_REG_DBG_FORCE_FRAME \ + 0xf80e38UL +#define YSDM_REG_DBG_SELECT \ + 0xf90e28UL +#define YSDM_REG_DBG_DWORD_ENABLE \ + 0xf90e2cUL +#define YSDM_REG_DBG_SHIFT \ + 0xf90e30UL +#define YSDM_REG_DBG_FORCE_VALID \ + 0xf90e34UL +#define YSDM_REG_DBG_FORCE_FRAME \ + 0xf90e38UL +#define PSDM_REG_DBG_SELECT \ + 0xfa0e28UL +#define PSDM_REG_DBG_DWORD_ENABLE \ + 0xfa0e2cUL +#define PSDM_REG_DBG_SHIFT \ + 0xfa0e30UL +#define PSDM_REG_DBG_FORCE_VALID \ + 0xfa0e34UL +#define PSDM_REG_DBG_FORCE_FRAME \ + 0xfa0e38UL +#define TSDM_REG_DBG_SELECT \ + 0xfb0e28UL +#define TSDM_REG_DBG_DWORD_ENABLE \ + 0xfb0e2cUL +#define TSDM_REG_DBG_SHIFT \ + 0xfb0e30UL +#define TSDM_REG_DBG_FORCE_VALID \ + 0xfb0e34UL +#define TSDM_REG_DBG_FORCE_FRAME \ + 0xfb0e38UL +#define MSDM_REG_DBG_SELECT \ + 0xfc0e28UL +#define MSDM_REG_DBG_DWORD_ENABLE \ + 0xfc0e2cUL +#define MSDM_REG_DBG_SHIFT \ + 0xfc0e30UL +#define MSDM_REG_DBG_FORCE_VALID \ + 0xfc0e34UL +#define MSDM_REG_DBG_FORCE_FRAME \ + 0xfc0e38UL +#define USDM_REG_DBG_SELECT \ + 0xfd0e28UL +#define USDM_REG_DBG_DWORD_ENABLE \ + 0xfd0e2cUL +#define USDM_REG_DBG_SHIFT \ + 0xfd0e30UL +#define USDM_REG_DBG_FORCE_VALID \ + 0xfd0e34UL +#define USDM_REG_DBG_FORCE_FRAME \ + 0xfd0e38UL +#define XCM_REG_DBG_SELECT \ + 0x1000040UL +#define XCM_REG_DBG_DWORD_ENABLE \ + 0x1000044UL +#define XCM_REG_DBG_SHIFT \ + 0x1000048UL +#define XCM_REG_DBG_FORCE_VALID \ + 0x100004cUL +#define XCM_REG_DBG_FORCE_FRAME \ + 0x1000050UL +#define YCM_REG_DBG_SELECT \ + 0x1080040UL +#define YCM_REG_DBG_DWORD_ENABLE \ + 0x1080044UL +#define YCM_REG_DBG_SHIFT \ + 0x1080048UL +#define YCM_REG_DBG_FORCE_VALID \ + 0x108004cUL +#define YCM_REG_DBG_FORCE_FRAME \ + 0x1080050UL +#define PCM_REG_DBG_SELECT \ + 0x1100040UL +#define PCM_REG_DBG_DWORD_ENABLE \ + 0x1100044UL +#define PCM_REG_DBG_SHIFT \ + 0x1100048UL +#define PCM_REG_DBG_FORCE_VALID \ + 0x110004cUL +#define PCM_REG_DBG_FORCE_FRAME \ + 0x1100050UL +#define TCM_REG_DBG_SELECT \ + 0x1180040UL +#define TCM_REG_DBG_DWORD_ENABLE \ + 0x1180044UL +#define TCM_REG_DBG_SHIFT \ + 0x1180048UL +#define TCM_REG_DBG_FORCE_VALID \ + 0x118004cUL +#define TCM_REG_DBG_FORCE_FRAME \ + 0x1180050UL +#define MCM_REG_DBG_SELECT \ + 0x1200040UL +#define MCM_REG_DBG_DWORD_ENABLE \ + 0x1200044UL +#define MCM_REG_DBG_SHIFT \ + 0x1200048UL +#define MCM_REG_DBG_FORCE_VALID \ + 0x120004cUL +#define MCM_REG_DBG_FORCE_FRAME \ + 0x1200050UL +#define UCM_REG_DBG_SELECT \ + 0x1280050UL +#define UCM_REG_DBG_DWORD_ENABLE \ + 0x1280054UL +#define UCM_REG_DBG_SHIFT \ + 0x1280058UL +#define UCM_REG_DBG_FORCE_VALID \ + 0x128005cUL +#define UCM_REG_DBG_FORCE_FRAME \ + 0x1280060UL +#define XSEM_REG_DBG_SELECT \ + 0x1401528UL +#define XSEM_REG_DBG_DWORD_ENABLE \ + 0x140152cUL +#define XSEM_REG_DBG_SHIFT \ + 0x1401530UL +#define XSEM_REG_DBG_FORCE_VALID \ + 0x1401534UL +#define XSEM_REG_DBG_FORCE_FRAME \ + 0x1401538UL +#define YSEM_REG_DBG_SELECT \ + 0x1501528UL +#define YSEM_REG_DBG_DWORD_ENABLE \ + 0x150152cUL +#define YSEM_REG_DBG_SHIFT \ + 0x1501530UL +#define YSEM_REG_DBG_FORCE_VALID \ + 0x1501534UL +#define YSEM_REG_DBG_FORCE_FRAME \ + 0x1501538UL +#define PSEM_REG_DBG_SELECT \ + 0x1601528UL +#define PSEM_REG_DBG_DWORD_ENABLE \ + 0x160152cUL +#define PSEM_REG_DBG_SHIFT \ + 0x1601530UL +#define PSEM_REG_DBG_FORCE_VALID \ + 0x1601534UL +#define PSEM_REG_DBG_FORCE_FRAME \ + 0x1601538UL +#define TSEM_REG_DBG_SELECT \ + 0x1701528UL +#define TSEM_REG_DBG_DWORD_ENABLE \ + 0x170152cUL +#define TSEM_REG_DBG_SHIFT \ + 0x1701530UL +#define TSEM_REG_DBG_FORCE_VALID \ + 0x1701534UL +#define TSEM_REG_DBG_FORCE_FRAME \ + 0x1701538UL +#define MSEM_REG_DBG_SELECT \ + 0x1801528UL +#define MSEM_REG_DBG_DWORD_ENABLE \ + 0x180152cUL +#define MSEM_REG_DBG_SHIFT \ + 0x1801530UL +#define MSEM_REG_DBG_FORCE_VALID \ + 0x1801534UL +#define MSEM_REG_DBG_FORCE_FRAME \ + 0x1801538UL +#define USEM_REG_DBG_SELECT \ + 0x1901528UL +#define USEM_REG_DBG_DWORD_ENABLE \ + 0x190152cUL +#define USEM_REG_DBG_SHIFT \ + 0x1901530UL +#define USEM_REG_DBG_FORCE_VALID \ + 0x1901534UL +#define USEM_REG_DBG_FORCE_FRAME \ + 0x1901538UL +#define PCIE_REG_DBG_COMMON_SELECT \ + 0x054398UL +#define PCIE_REG_DBG_COMMON_DWORD_ENABLE \ + 0x05439cUL +#define PCIE_REG_DBG_COMMON_SHIFT \ + 0x0543a0UL +#define PCIE_REG_DBG_COMMON_FORCE_VALID \ + 0x0543a4UL +#define PCIE_REG_DBG_COMMON_FORCE_FRAME \ + 0x0543a8UL +#define MISC_REG_RESET_PL_UA \ + 0x008050UL +#define MISC_REG_RESET_PL_HV \ + 0x008060UL +#define XCM_REG_CTX_RBC_ACCS \ + 0x1001800UL +#define XCM_REG_AGG_CON_CTX \ + 0x1001804UL +#define XCM_REG_SM_CON_CTX \ + 0x1001808UL +#define YCM_REG_CTX_RBC_ACCS \ + 0x1081800UL +#define YCM_REG_AGG_CON_CTX \ + 0x1081804UL +#define YCM_REG_AGG_TASK_CTX \ + 0x1081808UL +#define YCM_REG_SM_CON_CTX \ + 0x108180cUL +#define YCM_REG_SM_TASK_CTX \ + 0x1081810UL +#define PCM_REG_CTX_RBC_ACCS \ + 0x1101440UL +#define PCM_REG_SM_CON_CTX \ + 0x1101444UL +#define TCM_REG_CTX_RBC_ACCS \ + 0x11814c0UL +#define TCM_REG_AGG_CON_CTX \ + 0x11814c4UL +#define TCM_REG_AGG_TASK_CTX \ + 0x11814c8UL +#define TCM_REG_SM_CON_CTX \ + 0x11814ccUL +#define TCM_REG_SM_TASK_CTX \ + 0x11814d0UL +#define MCM_REG_CTX_RBC_ACCS \ + 0x1201800UL +#define MCM_REG_AGG_CON_CTX \ + 0x1201804UL +#define MCM_REG_AGG_TASK_CTX \ + 0x1201808UL +#define MCM_REG_SM_CON_CTX \ + 0x120180cUL +#define MCM_REG_SM_TASK_CTX \ + 0x1201810UL +#define UCM_REG_CTX_RBC_ACCS \ + 0x1281700UL +#define UCM_REG_AGG_CON_CTX \ + 0x1281704UL +#define UCM_REG_AGG_TASK_CTX \ + 0x1281708UL +#define UCM_REG_SM_CON_CTX \ + 0x128170cUL +#define UCM_REG_SM_TASK_CTX \ + 0x1281710UL +#define XSEM_REG_SLOW_DBG_EMPTY \ + 0x1401140UL +#define XSEM_REG_SYNC_DBG_EMPTY \ + 0x1401160UL +#define XSEM_REG_SLOW_DBG_ACTIVE \ + 0x1401400UL +#define XSEM_REG_SLOW_DBG_MODE \ + 0x1401404UL +#define XSEM_REG_DBG_FRAME_MODE \ + 0x1401408UL +#define XSEM_REG_DBG_MODE1_CFG \ + 0x1401420UL +#define XSEM_REG_FAST_MEMORY \ + 0x1440000UL +#define YSEM_REG_SYNC_DBG_EMPTY \ + 0x1501160UL +#define YSEM_REG_SLOW_DBG_ACTIVE \ + 0x1501400UL +#define YSEM_REG_SLOW_DBG_MODE \ + 0x1501404UL +#define YSEM_REG_DBG_FRAME_MODE \ + 0x1501408UL +#define YSEM_REG_DBG_MODE1_CFG \ + 0x1501420UL +#define YSEM_REG_FAST_MEMORY \ + 0x1540000UL +#define PSEM_REG_SLOW_DBG_EMPTY \ + 0x1601140UL +#define PSEM_REG_SYNC_DBG_EMPTY \ + 0x1601160UL +#define PSEM_REG_SLOW_DBG_ACTIVE \ + 0x1601400UL +#define PSEM_REG_SLOW_DBG_MODE \ + 0x1601404UL +#define PSEM_REG_DBG_FRAME_MODE \ + 0x1601408UL +#define PSEM_REG_DBG_MODE1_CFG \ + 0x1601420UL +#define PSEM_REG_FAST_MEMORY \ + 0x1640000UL +#define TSEM_REG_SLOW_DBG_EMPTY \ + 0x1701140UL +#define TSEM_REG_SYNC_DBG_EMPTY \ + 0x1701160UL +#define TSEM_REG_SLOW_DBG_ACTIVE \ + 0x1701400UL +#define TSEM_REG_SLOW_DBG_MODE \ + 0x1701404UL +#define TSEM_REG_DBG_FRAME_MODE \ + 0x1701408UL +#define TSEM_REG_DBG_MODE1_CFG \ + 0x1701420UL +#define TSEM_REG_FAST_MEMORY \ + 0x1740000UL +#define MSEM_REG_SLOW_DBG_EMPTY \ + 0x1801140UL +#define MSEM_REG_SYNC_DBG_EMPTY \ + 0x1801160UL +#define MSEM_REG_SLOW_DBG_ACTIVE \ + 0x1801400UL +#define MSEM_REG_SLOW_DBG_MODE \ + 0x1801404UL +#define MSEM_REG_DBG_FRAME_MODE \ + 0x1801408UL +#define MSEM_REG_DBG_MODE1_CFG \ + 0x1801420UL +#define MSEM_REG_FAST_MEMORY \ + 0x1840000UL +#define USEM_REG_SLOW_DBG_EMPTY \ + 0x1901140UL +#define USEM_REG_SYNC_DBG_EMPTY \ + 0x1901160UL +#define USEM_REG_SLOW_DBG_ACTIVE \ + 0x1901400UL +#define USEM_REG_SLOW_DBG_MODE \ + 0x1901404UL +#define USEM_REG_DBG_FRAME_MODE \ + 0x1901408UL +#define USEM_REG_DBG_MODE1_CFG \ + 0x1901420UL +#define USEM_REG_FAST_MEMORY \ + 0x1940000UL +#define SEM_FAST_REG_INT_RAM \ + 0x020000UL +#define SEM_FAST_REG_INT_RAM_SIZE \ + 20480 +#define GRC_REG_TRACE_FIFO_VALID_DATA \ + 0x050064UL +#define GRC_REG_NUMBER_VALID_OVERRIDE_WINDOW \ + 0x05040cUL +#define GRC_REG_PROTECTION_OVERRIDE_WINDOW \ + 0x050500UL +#define IGU_REG_ERROR_HANDLING_MEMORY \ + 0x181520UL +#define MCP_REG_CPU_MODE \ + 0xe05000UL +#define MCP_REG_CPU_MODE_SOFT_HALT \ + (0x1 << 10) +#define BRB_REG_BIG_RAM_ADDRESS \ + 0x340800UL +#define BRB_REG_BIG_RAM_DATA \ + 0x341500UL +#define SEM_FAST_REG_STALL_0 \ + 0x000488UL +#define SEM_FAST_REG_STALLED \ + 0x000494UL +#define BTB_REG_BIG_RAM_ADDRESS \ + 0xdb0800UL +#define BTB_REG_BIG_RAM_DATA \ + 0xdb0c00UL +#define BMB_REG_BIG_RAM_ADDRESS \ + 0x540800UL +#define BMB_REG_BIG_RAM_DATA \ + 0x540f00UL +#define SEM_FAST_REG_STORM_REG_FILE \ + 0x008000UL +#define RSS_REG_RSS_RAM_ADDR \ + 0x238c30UL +#define MISCS_REG_BLOCK_256B_EN \ + 0x009074UL +#define MCP_REG_SCRATCH_SIZE \ + 57344 +#define MCP_REG_CPU_REG_FILE \ + 0xe05200UL +#define MCP_REG_CPU_REG_FILE_SIZE \ + 32 +#define DBG_REG_DEBUG_TARGET \ + 0x01005cUL +#define DBG_REG_FULL_MODE \ + 0x010060UL +#define DBG_REG_CALENDAR_OUT_DATA \ + 0x010480UL +#define GRC_REG_TRACE_FIFO \ + 0x050068UL +#define IGU_REG_ERROR_HANDLING_DATA_VALID \ + 0x181530UL +#define DBG_REG_DBG_BLOCK_ON \ + 0x010454UL +#define DBG_REG_FRAMING_MODE \ + 0x010058UL +#define SEM_FAST_REG_VFC_DATA_WR \ + 0x000b40UL +#define SEM_FAST_REG_VFC_ADDR \ + 0x000b44UL +#define SEM_FAST_REG_VFC_DATA_RD \ + 0x000b48UL +#define RSS_REG_RSS_RAM_DATA \ + 0x238c20UL +#define MISC_REG_BLOCK_256B_EN \ + 0x008c14UL +#define NWS_REG_NWS_CMU \ + 0x720000UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_7_0 \ + 0x000680UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_ADDR_15_8 \ + 0x000684UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_7_0 \ + 0x0006c0UL +#define PHY_NW_IP_REG_PHY0_TOP_TBUS_DATA_11_8 \ + 0x0006c4UL +#define MS_REG_MS_CMU \ + 0x6a4000UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_SGMII_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X130 \ + 0x000208UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X131 \ + 0x00020cUL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X132 \ + 0x000210UL +#define PHY_PCIE_IP_REG_AHB_CMU_CSR_0_X133 \ + 0x000214UL +#define PHY_PCIE_REG_PHY0 \ + 0x620000UL +#define PHY_PCIE_REG_PHY1 \ + 0x624000UL +#define NIG_REG_ROCE_DUPLICATE_TO_HOST 0x5088f0UL +#define PRS_REG_LIGHT_L2_ETHERTYPE_EN 0x1f0968UL +#define NIG_REG_LLH_ENG_CLS_ENG_ID_TBL 0x501b90UL +#define DORQ_REG_PF_DPM_ENABLE 0x100510UL +#define DORQ_REG_PF_ICID_BIT_SHIFT_NORM 0x100448UL +#define DORQ_REG_PF_MIN_ADDR_REG1 0x100400UL +#define DORQ_REG_PF_DPI_BIT_SHIFT 0x100450UL #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c new file mode 100644 index 000000000000..23430059471c --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c @@ -0,0 +1,2954 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_cxt.h" +#include "qed_hsi.h" +#include "qed_hw.h" +#include "qed_init_ops.h" +#include "qed_int.h" +#include "qed_ll2.h" +#include "qed_mcp.h" +#include "qed_reg_addr.h" +#include "qed_sp.h" +#include "qed_roce.h" +#include "qed_ll2.h" + +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + p_rdma_info->events.affiliated_event(p_rdma_info->events.context, + p_eqe->opcode, &p_eqe->data); +} + +static int qed_rdma_bmap_alloc(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 max_count) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "max_count = %08x\n", max_count); + + bmap->max_count = max_count; + + bmap->bitmap = kzalloc(BITS_TO_LONGS(max_count) * sizeof(long), + GFP_KERNEL); + if (!bmap->bitmap) { + DP_NOTICE(p_hwfn, + "qed bmap alloc failed: cannot allocate memory (bitmap)\n"); + return -ENOMEM; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocated bitmap %p\n", + bmap->bitmap); + return 0; +} + +static int qed_rdma_bmap_alloc_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 *id_num) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "bmap = %p\n", bmap); + + *id_num = find_first_zero_bit(bmap->bitmap, bmap->max_count); + + if (*id_num >= bmap->max_count) { + DP_NOTICE(p_hwfn, "no id available max_count=%d\n", + bmap->max_count); + return -EINVAL; + } + + __set_bit(*id_num, bmap->bitmap); + + return 0; +} + +static void qed_bmap_release_id(struct qed_hwfn *p_hwfn, + struct qed_bmap *bmap, u32 id_num) +{ + bool b_acquired; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "id_num = %08x", id_num); + if (id_num >= bmap->max_count) + return; + + b_acquired = test_and_clear_bit(id_num, bmap->bitmap); + if (!b_acquired) { + DP_NOTICE(p_hwfn, "ID %d already released\n", id_num); + return; + } +} + +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id) +{ + /* First sb id for RoCE is after all the l2 sb */ + return FEAT_NUM((struct qed_hwfn *)p_hwfn, QED_PF_L2_QUE) + rel_sb_id; +} + +u32 qed_rdma_query_cau_timer_res(void *rdma_cxt) +{ + return QED_CAU_DEF_RX_TIMER_RES; +} + +static int qed_rdma_alloc(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_info *p_rdma_info; + u32 num_cons, num_tasks; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocating RDMA\n"); + + /* Allocate a struct with current pf rdma info */ + p_rdma_info = kzalloc(sizeof(*p_rdma_info), GFP_KERNEL); + if (!p_rdma_info) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info). rc = %d\n", + rc); + return rc; + } + + p_hwfn->p_rdma_info = p_rdma_info; + p_rdma_info->proto = PROTOCOLID_ROCE; + + num_cons = qed_cxt_get_proto_cid_count(p_hwfn, p_rdma_info->proto, 0); + + p_rdma_info->num_qps = num_cons / 2; + + num_tasks = qed_cxt_get_proto_tid_count(p_hwfn, PROTOCOLID_ROCE); + + /* Each MR uses a single task */ + p_rdma_info->num_mrs = num_tasks; + + /* Queue zone lines are shared between RoCE and L2 in such a way that + * they can be used by each without obstructing the other. + */ + p_rdma_info->queue_zone_base = (u16)FEAT_NUM(p_hwfn, QED_L2_QUEUE); + + /* Allocate a struct with device params and fill it */ + p_rdma_info->dev = kzalloc(sizeof(*p_rdma_info->dev), GFP_KERNEL); + if (!p_rdma_info->dev) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info dev). rc = %d\n", + rc); + goto free_rdma_info; + } + + /* Allocate a struct with port params and fill it */ + p_rdma_info->port = kzalloc(sizeof(*p_rdma_info->port), GFP_KERNEL); + if (!p_rdma_info->port) { + DP_NOTICE(p_hwfn, + "qed rdma alloc failed: cannot allocate memory (rdma info port). rc = %d\n", + rc); + goto free_rdma_dev; + } + + /* Allocate bit map for pd's */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->pd_map, RDMA_MAX_PDS); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate pd_map, rc = %d\n", + rc); + goto free_rdma_port; + } + + /* Allocate DPI bitmap */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map, + p_hwfn->dpi_count); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate DPI bitmap, rc = %d\n", rc); + goto free_pd_map; + } + + /* Allocate bitmap for cq's. The maximum number of CQs is bounded to + * twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cq_map, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cq bitmap, rc = %d\n", rc); + goto free_dpi_map; + } + + /* Allocate bitmap for toggle bit for cq icids + * We toggle the bit every time we create or resize cq for a given icid. + * The maximum number of CQs is bounded to twice the number of QPs. + */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->toggle_bits, + p_rdma_info->num_qps * 2); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate toogle bits, rc = %d\n", rc); + goto free_cq_map; + } + + /* Allocate bitmap for itids */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->tid_map, + p_rdma_info->num_mrs); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate itids bitmaps, rc = %d\n", rc); + goto free_toggle_map; + } + + /* Allocate bitmap for cids used for qps. */ + rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->cid_map, num_cons); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to allocate cid bitmap, rc = %d\n", rc); + goto free_tid_map; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocation successful\n"); + return 0; + +free_tid_map: + kfree(p_rdma_info->tid_map.bitmap); +free_toggle_map: + kfree(p_rdma_info->toggle_bits.bitmap); +free_cq_map: + kfree(p_rdma_info->cq_map.bitmap); +free_dpi_map: + kfree(p_rdma_info->dpi_map.bitmap); +free_pd_map: + kfree(p_rdma_info->pd_map.bitmap); +free_rdma_port: + kfree(p_rdma_info->port); +free_rdma_dev: + kfree(p_rdma_info->dev); +free_rdma_info: + kfree(p_rdma_info); + + return rc; +} + +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + + kfree(p_rdma_info->cid_map.bitmap); + kfree(p_rdma_info->tid_map.bitmap); + kfree(p_rdma_info->toggle_bits.bitmap); + kfree(p_rdma_info->cq_map.bitmap); + kfree(p_rdma_info->dpi_map.bitmap); + kfree(p_rdma_info->pd_map.bitmap); + + kfree(p_rdma_info->port); + kfree(p_rdma_info->dev); + + kfree(p_rdma_info); +} + +static void qed_rdma_free(struct qed_hwfn *p_hwfn) +{ + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Freeing RDMA\n"); + + qed_rdma_resc_free(p_hwfn); +} + +static void qed_rdma_get_guid(struct qed_hwfn *p_hwfn, u8 *guid) +{ + guid[0] = p_hwfn->hw_info.hw_mac_addr[0] ^ 2; + guid[1] = p_hwfn->hw_info.hw_mac_addr[1]; + guid[2] = p_hwfn->hw_info.hw_mac_addr[2]; + guid[3] = 0xff; + guid[4] = 0xfe; + guid[5] = p_hwfn->hw_info.hw_mac_addr[3]; + guid[6] = p_hwfn->hw_info.hw_mac_addr[4]; + guid[7] = p_hwfn->hw_info.hw_mac_addr[5]; +} + +static void qed_rdma_init_events(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_events *events; + + events = &p_hwfn->p_rdma_info->events; + + events->unaffiliated_event = params->events->unaffiliated_event; + events->affiliated_event = params->events->affiliated_event; + events->context = params->events->context; +} + +static void qed_rdma_init_devinfo(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + struct qed_dev *cdev = p_hwfn->cdev; + u32 pci_status_control; + u32 num_qps; + + /* Vendor specific information */ + dev->vendor_id = cdev->vendor_id; + dev->vendor_part_id = cdev->device_id; + dev->hw_ver = 0; + dev->fw_ver = (FW_MAJOR_VERSION << 24) | (FW_MINOR_VERSION << 16) | + (FW_REVISION_VERSION << 8) | (FW_ENGINEERING_VERSION); + + qed_rdma_get_guid(p_hwfn, (u8 *)&dev->sys_image_guid); + dev->node_guid = dev->sys_image_guid; + + dev->max_sge = min_t(u32, RDMA_MAX_SGE_PER_SQ_WQE, + RDMA_MAX_SGE_PER_RQ_WQE); + + if (cdev->rdma_max_sge) + dev->max_sge = min_t(u32, cdev->rdma_max_sge, dev->max_sge); + + dev->max_inline = ROCE_REQ_MAX_INLINE_DATA_SIZE; + + dev->max_inline = (cdev->rdma_max_inline) ? + min_t(u32, cdev->rdma_max_inline, dev->max_inline) : + dev->max_inline; + + dev->max_wqe = QED_RDMA_MAX_WQE; + dev->max_cnq = (u8)FEAT_NUM(p_hwfn, QED_RDMA_CNQ); + + /* The number of QPs may be higher than QED_ROCE_MAX_QPS, because + * it is up-aligned to 16 and then to ILT page size within qed cxt. + * This is OK in terms of ILT but we don't want to configure the FW + * above its abilities + */ + num_qps = ROCE_MAX_QPS; + num_qps = min_t(u64, num_qps, p_hwfn->p_rdma_info->num_qps); + dev->max_qp = num_qps; + + /* CQs uses the same icids that QPs use hence they are limited by the + * number of icids. There are two icids per QP. + */ + dev->max_cq = num_qps * 2; + + /* The number of mrs is smaller by 1 since the first is reserved */ + dev->max_mr = p_hwfn->p_rdma_info->num_mrs - 1; + dev->max_mr_size = QED_RDMA_MAX_MR_SIZE; + + /* The maximum CQE capacity per CQ supported. + * max number of cqes will be in two layer pbl, + * 8 is the pointer size in bytes + * 32 is the size of cq element in bytes + */ + if (params->cq_mode == QED_RDMA_CQ_MODE_32_BITS) + dev->max_cqe = QED_RDMA_MAX_CQE_32_BIT; + else + dev->max_cqe = QED_RDMA_MAX_CQE_16_BIT; + + dev->max_mw = 0; + dev->max_fmr = QED_RDMA_MAX_FMR; + dev->max_mr_mw_fmr_pbl = (PAGE_SIZE / 8) * (PAGE_SIZE / 8); + dev->max_mr_mw_fmr_size = dev->max_mr_mw_fmr_pbl * PAGE_SIZE; + dev->max_pkey = QED_RDMA_MAX_P_KEY; + + dev->max_qp_resp_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + (RDMA_RESP_RD_ATOMIC_ELM_SIZE * 2); + dev->max_qp_req_rd_atomic_resc = RDMA_RING_PAGE_SIZE / + RDMA_REQ_RD_ATOMIC_ELM_SIZE; + dev->max_dev_resp_rd_atomic_resc = dev->max_qp_resp_rd_atomic_resc * + p_hwfn->p_rdma_info->num_qps; + dev->page_size_caps = QED_RDMA_PAGE_SIZE_CAPS; + dev->dev_ack_delay = QED_RDMA_ACK_DELAY; + dev->max_pd = RDMA_MAX_PDS; + dev->max_ah = p_hwfn->p_rdma_info->num_qps; + dev->max_stats_queues = (u8)RESC_NUM(p_hwfn, QED_RDMA_STATS_QUEUE); + + /* Set capablities */ + dev->dev_caps = 0; + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RNR_NAK, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_RESIZE_CQ, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_MEMORY_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_BASE_QUEUE_EXT, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ZBVA, 1); + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1); + + /* Check atomic operations support in PCI configuration space. */ + pci_read_config_dword(cdev->pdev, + cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2, + &pci_status_control); + + if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN) + SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1); +} + +static void qed_rdma_init_port(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_port *port = p_hwfn->p_rdma_info->port; + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + port->max_msg_size = min_t(u64, + (dev->max_mr_mw_fmr_size * + p_hwfn->cdev->rdma_max_sge), + BIT(31)); + + port->pkey_bad_counter = 0; +} + +static int qed_rdma_init_hw(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 ll2_ethertype_en; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW\n"); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + p_hwfn->rdma_prs_search_reg = PRS_REG_SEARCH_ROCE; + + /* We delay writing to this reg until first cid is allocated. See + * qed_cxt_dynamic_ilt_alloc function for more details + */ + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en | 0x01)); + + if (qed_cxt_get_proto_cid_start(p_hwfn, PROTOCOLID_ROCE) % 2) { + DP_NOTICE(p_hwfn, "The first RoCE's cid should be even\n"); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Initializing HW - Done\n"); + return 0; +} + +static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn, + struct qed_rdma_start_in_params *params, + struct qed_ptt *p_ptt) +{ + struct rdma_init_func_ramrod_data *p_ramrod; + struct qed_rdma_cnq_params *p_cnq_pbl_list; + struct rdma_init_func_hdr *p_params_header; + struct rdma_cnq_params *p_cnq_params; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 cnq_id, sb_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Starting FW\n"); + + /* Save the number of cnqs for the function close ramrod */ + p_hwfn->p_rdma_info->num_cnqs = params->desired_cnq; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_INIT, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_init_func.rdma; + + p_params_header = &p_ramrod->params_header; + p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn, + QED_RDMA_CNQ_RAM); + p_params_header->num_cnqs = params->desired_cnq; + + if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS) + p_params_header->cq_ring_mode = 1; + else + p_params_header->cq_ring_mode = 0; + + for (cnq_id = 0; cnq_id < params->desired_cnq; cnq_id++) { + sb_id = qed_rdma_get_sb_id(p_hwfn, cnq_id); + p_cnq_params = &p_ramrod->cnq_params[cnq_id]; + p_cnq_pbl_list = ¶ms->cnq_pbl_list[cnq_id]; + p_cnq_params->sb_num = + cpu_to_le16(p_hwfn->sbs_info[sb_id]->igu_sb_id); + + p_cnq_params->sb_index = p_hwfn->pf_params.rdma_pf_params.gl_pi; + p_cnq_params->num_pbl_pages = p_cnq_pbl_list->num_pbl_pages; + + DMA_REGPAIR_LE(p_cnq_params->pbl_base_addr, + p_cnq_pbl_list->pbl_ptr); + + /* we assume here that cnq_id and qz_offset are the same */ + p_cnq_params->queue_zone_num = + cpu_to_le16(p_hwfn->p_rdma_info->queue_zone_base + + cnq_id); + } + + return qed_spq_post(p_hwfn, p_ent, NULL); +} + +static int qed_rdma_reserve_lkey(struct qed_hwfn *p_hwfn) +{ + struct qed_rdma_device *dev = p_hwfn->p_rdma_info->dev; + + /* The first DPI is reserved for the Kernel */ + __set_bit(0, p_hwfn->p_rdma_info->dpi_map.bitmap); + + /* Tid 0 will be used as the key for "reserved MR". + * The driver should allocate memory for it so it can be loaded but no + * ramrod should be passed on it. + */ + qed_rdma_alloc_tid(p_hwfn, &dev->reserved_lkey); + if (dev->reserved_lkey != RDMA_RESERVED_LKEY) { + DP_NOTICE(p_hwfn, + "Reserved lkey should be equal to RDMA_RESERVED_LKEY\n"); + return -EINVAL; + } + + return 0; +} + +static int qed_rdma_setup(struct qed_hwfn *p_hwfn, + struct qed_ptt *p_ptt, + struct qed_rdma_start_in_params *params) +{ + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA setup\n"); + + spin_lock_init(&p_hwfn->p_rdma_info->lock); + + qed_rdma_init_devinfo(p_hwfn, params); + qed_rdma_init_port(p_hwfn); + qed_rdma_init_events(p_hwfn, params); + + rc = qed_rdma_reserve_lkey(p_hwfn); + if (rc) + return rc; + + rc = qed_rdma_init_hw(p_hwfn, p_ptt); + if (rc) + return rc; + + return qed_rdma_start_fw(p_hwfn, params, p_ptt); +} + +int qed_rdma_stop(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_close_func_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u32 ll2_ethertype_en; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop\n"); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Failed to acquire PTT\n"); + return rc; + } + + /* Disable RoCE search */ + qed_wr(p_hwfn, p_ptt, p_hwfn->rdma_prs_search_reg, 0); + p_hwfn->b_rdma_enabled_in_prs = false; + + qed_wr(p_hwfn, p_ptt, PRS_REG_ROCE_DEST_QP_MAX_PF, 0); + + ll2_ethertype_en = qed_rd(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN); + + qed_wr(p_hwfn, p_ptt, PRS_REG_LIGHT_L2_ETHERTYPE_EN, + (ll2_ethertype_en & 0xFFFE)); + + qed_ptt_release(p_hwfn, p_ptt); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Stop RoCE */ + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_FUNC_CLOSE, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto out; + + p_ramrod = &p_ent->ramrod.rdma_close_func; + + p_ramrod->num_cnqs = p_hwfn->p_rdma_info->num_cnqs; + p_ramrod->cnq_start_offset = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + +out: + qed_rdma_free(p_hwfn); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA stop done, rc = %d\n", rc); + return rc; +} + +int qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 dpi_start_offset; + u32 returned_id = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding User\n"); + + /* Allocate DPI */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, + &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + out_params->dpi = (u16)returned_id; + + /* Calculate the corresponding DPI address */ + dpi_start_offset = p_hwfn->dpi_start_offset; + + out_params->dpi_addr = (u64)((u8 __iomem *)p_hwfn->doorbells + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size)); + + out_params->dpi_phys_addr = p_hwfn->cdev->db_phys_addr + + dpi_start_offset + + ((out_params->dpi) * p_hwfn->dpi_size); + + out_params->dpi_size = p_hwfn->dpi_size; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Adding user - done, rc = %d\n", rc); + return rc; +} + +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_port *p_port = p_hwfn->p_rdma_info->port; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA Query port\n"); + + /* Link may have changed */ + p_port->port_state = p_hwfn->mcp_info->link_output.link_up ? + QED_RDMA_PORT_UP : QED_RDMA_PORT_DOWN; + + p_port->link_speed = p_hwfn->mcp_info->link_output.speed; + + return p_port; +} + +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query device\n"); + + /* Return struct with device parameters */ + return p_hwfn->p_rdma_info->dev; +} + +void qed_rdma_free_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID\n"); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->tid_map, itid); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + if (rc) + goto out; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_TASK, *itid); +out: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Allocate TID - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 qz_offset, u16 prod) +{ + struct qed_hwfn *p_hwfn; + u16 qz_num; + u32 addr; + + p_hwfn = (struct qed_hwfn *)rdma_cxt; + qz_num = p_hwfn->p_rdma_info->queue_zone_base + qz_offset; + addr = GTT_BAR0_MAP_REG_USDM_RAM + + USTORM_COMMON_QUEUE_CONS_OFFSET(qz_num); + + REG_WR16(p_hwfn, addr, prod); + + /* keep prod updates ordered */ + wmb(); +} + +static int qed_fill_rdma_dev_info(struct qed_dev *cdev, + struct qed_dev_rdma_info *info) +{ + memset(info, 0, sizeof(*info)); + + info->rdma_type = QED_RDMA_TYPE_ROCE; + + qed_fill_dev_info(cdev, &info->common); + + return 0; +} + +static int qed_rdma_get_sb_start(struct qed_dev *cdev) +{ + int feat_num; + + if (cdev->num_hwfns > 1) + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE); + else + feat_num = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_PF_L2_QUE) * + cdev->num_hwfns; + + return feat_num; +} + +static int qed_rdma_get_min_cnq_msix(struct qed_dev *cdev) +{ + int n_cnq = FEAT_NUM(QED_LEADING_HWFN(cdev), QED_RDMA_CNQ); + int n_msix = cdev->int_params.rdma_msix_cnt; + + return min_t(int, n_cnq, n_msix); +} + +static int qed_rdma_set_int(struct qed_dev *cdev, u16 cnt) +{ + int limit = 0; + + /* Mark the fastpath as free/used */ + cdev->int_params.fp_initialized = cnt ? true : false; + + if (cdev->int_params.out.int_mode != QED_INT_MODE_MSIX) { + DP_ERR(cdev, + "qed roce supports only MSI-X interrupts (detected %d).\n", + cdev->int_params.out.int_mode); + return -EINVAL; + } else if (cdev->int_params.fp_msix_cnt) { + limit = cdev->int_params.rdma_msix_cnt; + } + + if (!limit) + return -ENOMEM; + + return min_t(int, cnt, limit); +} + +static int qed_rdma_get_int(struct qed_dev *cdev, struct qed_int_info *info) +{ + memset(info, 0, sizeof(*info)); + + if (!cdev->int_params.fp_initialized) { + DP_INFO(cdev, + "Protocol driver requested interrupt information, but its support is not yet configured\n"); + return -EINVAL; + } + + if (cdev->int_params.out.int_mode == QED_INT_MODE_MSIX) { + int msix_base = cdev->int_params.rdma_msix_base; + + info->msix_cnt = cdev->int_params.rdma_msix_cnt; + info->msix = &cdev->int_params.msix_table[msix_base]; + + DP_VERBOSE(cdev, QED_MSG_RDMA, "msix_cnt = %d msix_base=%d\n", + info->msix_cnt, msix_base); + } + + return 0; +} + +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + u32 returned_id; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD\n"); + + /* Allocates an unused protection domain */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_hwfn->p_rdma_info->pd_map, &returned_id); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + *pd = (u16)returned_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc PD - done, rc = %d\n", rc); + return rc; +} + +void qed_rdma_free_pd(void *rdma_cxt, u16 pd) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "pd = %08x\n", pd); + + /* Returns a previously allocated protection domain for reuse */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->pd_map, pd); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +static enum qed_rdma_toggle_bit +qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid) +{ + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + enum qed_rdma_toggle_bit toggle_bit; + u32 bmap_id; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", icid); + + /* the function toggle the bit that is related to a given icid + * and returns the new toggle bit's value + */ + bmap_id = icid - qed_cxt_get_proto_cid_start(p_hwfn, p_info->proto); + + spin_lock_bh(&p_info->lock); + toggle_bit = !test_and_change_bit(bmap_id, + p_info->toggle_bits.bitmap); + spin_unlock_bh(&p_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QED_RDMA_TOGGLE_BIT_= %d\n", + toggle_bit); + + return toggle_bit; +} + +int qed_rdma_create_cq(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, u16 *icid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_info *p_info = p_hwfn->p_rdma_info; + struct rdma_create_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + u32 returned_id, start_cid; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "cq_handle = %08x%08x\n", + params->cq_handle_hi, params->cq_handle_lo); + + /* Allocate icid */ + spin_lock_bh(&p_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, + &p_info->cq_map, &returned_id); + spin_unlock_bh(&p_info->lock); + + if (rc) { + DP_NOTICE(p_hwfn, "Can't create CQ, rc = %d\n", rc); + return rc; + } + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_info->proto); + *icid = returned_id + start_cid; + + /* Check if icid requires a page allocation */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, *icid); + if (rc) + goto err; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = *icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send create CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_CREATE_CQ, + p_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_create_cq; + + p_ramrod->cq_handle.hi = cpu_to_le32(params->cq_handle_hi); + p_ramrod->cq_handle.lo = cpu_to_le32(params->cq_handle_lo); + p_ramrod->dpi = cpu_to_le16(params->dpi); + p_ramrod->is_two_level_pbl = params->pbl_two_level; + p_ramrod->max_cqes = cpu_to_le32(params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, params->pbl_ptr); + p_ramrod->pbl_num_pages = cpu_to_le16(params->pbl_num_pages); + p_ramrod->cnq_id = (u8)RESC_START(p_hwfn, QED_RDMA_CNQ_RAM) + + params->cnq_id; + p_ramrod->int_timeout = params->int_timeout; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + + p_ramrod->toggle_bit = toggle_bit; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) { + /* restore toggle bit */ + qed_rdma_toggle_bit_create_resize_cq(p_hwfn, *icid); + goto err; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Created CQ, rc = %d\n", rc); + return rc; + +err: + /* release allocated icid */ + qed_bmap_release_id(p_hwfn, &p_info->cq_map, returned_id); + DP_NOTICE(p_hwfn, "Create CQ failed, rc = %d\n", rc); + + return rc; +} + +int qed_rdma_resize_cq(void *rdma_cxt, + struct qed_rdma_resize_cq_in_params *in_params, + struct qed_rdma_resize_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_resize_cq_output_params *p_ramrod_res; + struct rdma_resize_cq_ramrod_data *p_ramrod; + enum qed_rdma_toggle_bit toggle_bit; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + u8 fw_return_code; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_resize_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed resize cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_RESIZE_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_resize_cq; + + p_ramrod->flags = 0; + + /* toggle the bit for every resize or create cq for a given icid */ + toggle_bit = qed_rdma_toggle_bit_create_resize_cq(p_hwfn, + in_params->icid); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_TOGGLE_BIT, toggle_bit); + + SET_FIELD(p_ramrod->flags, + RDMA_RESIZE_CQ_RAMROD_DATA_IS_TWO_LEVEL_PBL, + in_params->pbl_two_level); + + p_ramrod->pbl_log_page_size = in_params->pbl_page_size_log - 12; + p_ramrod->pbl_num_pages = cpu_to_le16(in_params->pbl_num_pages); + p_ramrod->max_cqes = cpu_to_le32(in_params->cq_size); + DMA_REGPAIR_LE(p_ramrod->pbl_addr, in_params->pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) + goto err; + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + rc = -EINVAL; + goto err; + } + + out_params->prod = le32_to_cpu(p_ramrod_res->old_cq_prod); + out_params->cons = le32_to_cpu(p_ramrod_res->old_cq_cons); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Resized CQ, rc = %d\n", rc); + + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_resize_cq_output_params), + p_ramrod_res, ramrod_res_phys); + DP_NOTICE(p_hwfn, "Resized CQ, Failed - rc = %d\n", rc); + + return rc; +} + +int qed_rdma_destroy_cq(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *in_params, + struct qed_rdma_destroy_cq_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_destroy_cq_output_params *p_ramrod_res; + struct rdma_destroy_cq_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", in_params->icid); + + p_ramrod_res = + (struct rdma_destroy_cq_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy cq failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = in_params->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + /* Send destroy CQ ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DESTROY_CQ, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.rdma_destroy_cq; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + out_params->num_cq_notif = le16_to_cpu(p_ramrod_res->cnq_num); + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + /* Free icid */ + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + qed_bmap_release_id(p_hwfn, + &p_hwfn->p_rdma_info->cq_map, + (in_params->icid - + qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn-> + p_rdma_info->proto))); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroyed CQ, rc = %d\n", rc); + return rc; + +err: dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct rdma_destroy_cq_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +static void qed_rdma_set_fw_mac(u16 *p_fw_mac, u8 *p_qed_mac) +{ + p_fw_mac[0] = cpu_to_le16((p_qed_mac[0] << 8) + p_qed_mac[1]); + p_fw_mac[1] = cpu_to_le16((p_qed_mac[2] << 8) + p_qed_mac[3]); + p_fw_mac[2] = cpu_to_le16((p_qed_mac[4] << 8) + p_qed_mac[5]); +} + +static void qed_rdma_copy_gids(struct qed_rdma_qp *qp, __le32 *src_gid, + __le32 *dst_gid) +{ + u32 i; + + if (qp->roce_mode == ROCE_V2_IPV4) { + /* The IPv4 addresses shall be aligned to the highest word. + * The lower words must be zero. + */ + memset(src_gid, 0, sizeof(union qed_gid)); + memset(dst_gid, 0, sizeof(union qed_gid)); + src_gid[3] = cpu_to_le32(qp->sgid.ipv4_addr); + dst_gid[3] = cpu_to_le32(qp->dgid.ipv4_addr); + } else { + /* GIDs and IPv6 addresses coincide in location and size */ + for (i = 0; i < ARRAY_SIZE(qp->sgid.dwords); i++) { + src_gid[i] = cpu_to_le32(qp->sgid.dwords[i]); + dst_gid[i] = cpu_to_le32(qp->dgid.dwords[i]); + } + } +} + +static enum roce_flavor qed_roce_mode_to_flavor(enum roce_mode roce_mode) +{ + enum roce_flavor flavor; + + switch (roce_mode) { + case ROCE_V1: + flavor = PLAIN_ROCE; + break; + case ROCE_V2_IPV4: + flavor = RROCE_IPV4; + break; + case ROCE_V2_IPV6: + flavor = ROCE_V2_IPV6; + break; + default: + flavor = MAX_ROCE_MODE; + break; + } + return flavor; +} + +int qed_roce_alloc_cid(struct qed_hwfn *p_hwfn, u16 *cid) +{ + struct qed_rdma_info *p_rdma_info = p_hwfn->p_rdma_info; + u32 responder_icid; + u32 requester_icid; + int rc; + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &responder_icid); + if (rc) { + spin_unlock_bh(&p_rdma_info->lock); + return rc; + } + + rc = qed_rdma_bmap_alloc_id(p_hwfn, &p_rdma_info->cid_map, + &requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + if (rc) + goto err; + + /* the two icid's should be adjacent */ + if ((requester_icid - responder_icid) != 1) { + DP_NOTICE(p_hwfn, "Failed to allocate two adjacent qp's'\n"); + rc = -EINVAL; + goto err; + } + + responder_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + requester_icid += qed_cxt_get_proto_cid_start(p_hwfn, + p_rdma_info->proto); + + /* If these icids require a new ILT line allocate DMA-able context for + * an ILT page + */ + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, responder_icid); + if (rc) + goto err; + + rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, QED_ELEM_CXT, requester_icid); + if (rc) + goto err; + + *cid = (u16)responder_icid; + return rc; + +err: + spin_lock_bh(&p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, responder_icid); + qed_bmap_release_id(p_hwfn, &p_rdma_info->cid_map, requester_icid); + + spin_unlock_bh(&p_rdma_info->lock); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Allocate CID - failed, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for IRQ */ + qp->irq_num_pages = 1; + qp->irq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->irq_phys_addr, GFP_KERNEL); + if (!qp->irq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create responder failed: cannot allocate memory (irq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_resp; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_SRQ_FLG, qp->use_srq); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_RESERVED_KEY_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->irq_num_pages = qp->irq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->rq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->rq_num_pages = cpu_to_le16(qp->rq_num_pages); + DMA_REGPAIR_LE(p_ramrod->rq_pbl_addr, qp->rq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->irq_pbl_addr, qp->irq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->rq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->srq_id.srq_idx = cpu_to_le16(qp->srq_id); + p_ramrod->srq_id.opaque_fid = cpu_to_le16(p_hwfn->hw_info.opaque_fid); + + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d physical_queue0 = 0x%x\n", + rc, physical_queue0); + + if (rc) + goto err; + + qp->resp_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "create responder - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + return rc; +} + +static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp) +{ + struct roce_create_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + union qed_qm_pq_params qm_params; + enum roce_flavor roce_flavor; + struct qed_spq_entry *p_ent; + u16 physical_queue0 = 0; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* Allocate DMA-able memory for ORQ */ + qp->orq_num_pages = 1; + qp->orq = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + RDMA_RING_PAGE_SIZE, + &qp->orq_phys_addr, GFP_KERNEL); + if (!qp->orq) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed create requester failed: cannot allocate memory (orq). rc = %d\n", + rc); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_CREATE_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_create_qp_req; + + p_ramrod->flags = 0; + + roce_flavor = qed_roce_mode_to_flavor(qp->roce_mode); + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ROCE_FLAVOR, roce_flavor); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_FMR_AND_RESERVED_EN, + qp->fmr_and_reserved_lkey); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_SIGNALED_COMP, qp->signal_all); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->orq_num_pages = qp->orq_num_pages; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->dst_qp_id = cpu_to_le32(qp->dest_qp); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + p_ramrod->initial_psn = cpu_to_le32(qp->sq_psn); + p_ramrod->pd = cpu_to_le16(qp->pd); + p_ramrod->sq_num_pages = cpu_to_le16(qp->sq_num_pages); + DMA_REGPAIR_LE(p_ramrod->sq_pbl_addr, qp->sq_pbl_ptr); + DMA_REGPAIR_LE(p_ramrod->orq_pbl_addr, qp->orq_phys_addr); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + p_ramrod->qp_handle_for_async.hi = cpu_to_le32(qp->qp_handle_async.hi); + p_ramrod->qp_handle_for_async.lo = cpu_to_le32(qp->qp_handle_async.lo); + p_ramrod->qp_handle_for_cqe.hi = cpu_to_le32(qp->qp_handle.hi); + p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo); + p_ramrod->stats_counter_id = p_hwfn->rel_pf_id; + p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) | + qp->sq_cq_id); + + memset(&qm_params, 0, sizeof(qm_params)); + qm_params.roce.qpid = qp->icid >> 1; + physical_queue0 = qed_get_qm_pq(p_hwfn, PROTOCOLID_ROCE, &qm_params); + + p_ramrod->physical_queue0 = cpu_to_le16(physical_queue0); + p_ramrod->dpi = cpu_to_le16(qp->dpi); + + qed_rdma_set_fw_mac(p_ramrod->remote_mac_addr, qp->remote_mac_addr); + qed_rdma_set_fw_mac(p_ramrod->local_mac_addr, qp->local_mac_addr); + + p_ramrod->udp_src_port = qp->udp_src_port; + p_ramrod->vlan_id = cpu_to_le16(qp->vlan_id); + p_ramrod->stats_counter_id = RESC_START(p_hwfn, QED_RDMA_STATS_QUEUE) + + qp->stats_queue; + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + + if (rc) + goto err; + + qp->req_offloaded = true; + + return rc; + +err: + DP_NOTICE(p_hwfn, "Create requested - failed, rc = %d\n", rc); + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + return rc; +} + +static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_resp; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_RD_EN, + qp->incoming_rdma_read_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_WR_EN, + qp->incoming_rdma_write_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ATOMIC_EN, + qp->incoming_atomic_en); + + SET_FIELD(p_ramrod->flags, + ROCE_CREATE_QP_RESP_RAMROD_DATA_E2E_FLOW_CONTROL_EN, + qp->e2e_flow_control_en); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_RDMA_OPS_EN_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MAX_IRD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER, + qp->min_rnr_nak_timer); + + p_ramrod->max_ird = qp->max_rd_atomic_resp; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify responder, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + bool move_to_sqd, + bool move_to_err, u32 modify_flags) +{ + struct roce_modify_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (move_to_err && !(qp->req_offloaded)) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_EVENT_MODIFY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) { + DP_NOTICE(p_hwfn, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.roce_modify_qp_req; + + p_ramrod->flags = 0; + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_ERR_FLG, move_to_err); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MOVE_TO_SQD_FLG, move_to_sqd); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_EN_SQD_ASYNC_NOTIFY, + qp->sqd_async); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_P_KEY_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ADDRESS_VECTOR_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_MAX_ORD_FLG, + GET_FIELD(modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT_FLG, + GET_FIELD(modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)); + + SET_FIELD(p_ramrod->flags, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ACK_TIMEOUT_FLG, + GET_FIELD(modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)); + + p_ramrod->fields = 0; + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_ERR_RETRY_CNT, qp->retry_cnt); + + SET_FIELD(p_ramrod->fields, + ROCE_MODIFY_QP_REQ_RAMROD_DATA_RNR_NAK_CNT, + qp->rnr_retry_cnt); + + p_ramrod->max_ord = qp->max_rd_atomic_req; + p_ramrod->traffic_class = qp->traffic_class_tos; + p_ramrod->hop_limit = qp->hop_limit_ttl; + p_ramrod->p_key = cpu_to_le16(qp->pkey); + p_ramrod->flow_label = cpu_to_le32(qp->flow_label); + p_ramrod->ack_timeout_val = cpu_to_le32(qp->ack_timeout); + p_ramrod->mtu = cpu_to_le16(qp->mtu); + qed_rdma_copy_gids(qp, p_ramrod->src_gid, p_ramrod->dst_gid); + rc = qed_spq_post(p_hwfn, p_ent, NULL); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify requester, rc = %d\n", rc); + return rc; +} + +static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_invalidated_mw) +{ + struct roce_destroy_qp_resp_output_params *p_ramrod_res; + struct roce_destroy_qp_resp_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->resp_offloaded) + return 0; + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, + ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + return rc; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp; + + p_ramrod_res = (struct roce_destroy_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + + if (!p_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed destroy responder failed: cannot allocate memory (ramrod). rc = %d\n", + rc); + return rc; + } + + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_invalidated_mw = le32_to_cpu(p_ramrod_res->num_invalidated_mw); + + /* Free IRQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->irq_num_pages * RDMA_RING_PAGE_SIZE, + qp->irq, qp->irq_phys_addr); + + qp->resp_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy responder, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(struct roce_destroy_qp_resp_output_params), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + u32 *num_bound_mw) +{ + struct roce_destroy_qp_req_output_params *p_ramrod_res; + struct roce_destroy_qp_req_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + dma_addr_t ramrod_res_phys; + int rc = -ENOMEM; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + if (!qp->req_offloaded) + return 0; + + p_ramrod_res = (struct roce_destroy_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_ramrod_res), + &ramrod_res_phys, GFP_KERNEL); + if (!p_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed destroy requester failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid + 1; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_DESTROY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err; + + p_ramrod = &p_ent->ramrod.roce_destroy_qp_req; + DMA_REGPAIR_LE(p_ramrod->output_params_addr, ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err; + + *num_bound_mw = le32_to_cpu(p_ramrod_res->num_bound_mw); + + /* Free ORQ - only if ramrod succeeded, in case FW is still using it */ + dma_free_coherent(&p_hwfn->cdev->pdev->dev, + qp->orq_num_pages * RDMA_RING_PAGE_SIZE, + qp->orq, qp->orq_phys_addr); + + qp->req_offloaded = false; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Destroy requester, rc = %d\n", rc); + +err: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res), + p_ramrod_res, ramrod_res_phys); + + return rc; +} + +int qed_roce_query_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct roce_query_qp_resp_output_params *p_resp_ramrod_res; + struct roce_query_qp_req_output_params *p_req_ramrod_res; + struct roce_query_qp_resp_ramrod_data *p_resp_ramrod; + struct roce_query_qp_req_ramrod_data *p_req_ramrod; + struct qed_sp_init_data init_data; + dma_addr_t resp_ramrod_res_phys; + dma_addr_t req_ramrod_res_phys; + struct qed_spq_entry *p_ent; + bool rq_err_state; + bool sq_err_state; + bool sq_draining; + int rc = -ENOMEM; + + if ((!(qp->resp_offloaded)) && (!(qp->req_offloaded))) { + /* We can't send ramrod to the fw since this qp wasn't offloaded + * to the fw yet + */ + out_params->draining = false; + out_params->rq_psn = qp->rq_psn; + out_params->sq_psn = qp->sq_psn; + out_params->state = qp->cur_state; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "No QPs as no offload\n"); + return 0; + } + + if (!(qp->resp_offloaded)) { + DP_NOTICE(p_hwfn, + "The responder's qp should be offloded before requester's\n"); + return -EINVAL; + } + + /* Send a query responder ramrod to FW to get RQ-PSN and state */ + p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_resp_ramrod_res), + &resp_ramrod_res_phys, GFP_KERNEL); + if (!p_resp_ramrod_res) { + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.cid = qp->icid; + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_resp; + + p_resp_ramrod = &p_ent->ramrod.roce_query_qp_resp; + DMA_REGPAIR_LE(p_resp_ramrod->output_params_addr, resp_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_resp; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + + out_params->rq_psn = le32_to_cpu(p_resp_ramrod_res->psn); + rq_err_state = GET_FIELD(le32_to_cpu(p_resp_ramrod_res->err_flag), + ROCE_QUERY_QP_RESP_OUTPUT_PARAMS_ERROR_FLG); + + if (!(qp->req_offloaded)) { + /* Don't send query qp for the requester */ + out_params->sq_psn = qp->sq_psn; + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + + out_params->state = qp->cur_state; + + return 0; + } + + /* Send a query requester ramrod to FW to get SQ-PSN and state */ + p_req_ramrod_res = (struct roce_query_qp_req_output_params *) + dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, + sizeof(*p_req_ramrod_res), + &req_ramrod_res_phys, + GFP_KERNEL); + if (!p_req_ramrod_res) { + rc = -ENOMEM; + DP_NOTICE(p_hwfn, + "qed query qp failed: cannot allocate memory (ramrod)\n"); + return rc; + } + + /* Get SPQ entry */ + init_data.cid = qp->icid + 1; + rc = qed_sp_init_request(p_hwfn, &p_ent, ROCE_RAMROD_QUERY_QP, + PROTOCOLID_ROCE, &init_data); + if (rc) + goto err_req; + + p_req_ramrod = &p_ent->ramrod.roce_query_qp_req; + DMA_REGPAIR_LE(p_req_ramrod->output_params_addr, req_ramrod_res_phys); + + rc = qed_spq_post(p_hwfn, p_ent, NULL); + if (rc) + goto err_req; + + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + + out_params->sq_psn = le32_to_cpu(p_req_ramrod_res->psn); + sq_err_state = GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_ERR_FLG); + sq_draining = + GET_FIELD(le32_to_cpu(p_req_ramrod_res->flags), + ROCE_QUERY_QP_REQ_OUTPUT_PARAMS_SQ_DRAINING_FLG); + + out_params->draining = false; + + if (rq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_ERR; + else if (sq_err_state) + qp->cur_state = QED_ROCE_QP_STATE_SQE; + else if (sq_draining) + out_params->draining = true; + out_params->state = qp->cur_state; + + return 0; + +err_req: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_req_ramrod_res), + p_req_ramrod_res, req_ramrod_res_phys); + return rc; +err_resp: + dma_free_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_resp_ramrod_res), + p_resp_ramrod_res, resp_ramrod_res_phys); + return rc; +} + +int qed_roce_destroy_qp(struct qed_hwfn *p_hwfn, struct qed_rdma_qp *qp) +{ + u32 num_invalidated_mw = 0; + u32 num_bound_mw = 0; + u32 start_cid; + int rc; + + /* Destroys the specified QP */ + if ((qp->cur_state != QED_ROCE_QP_STATE_RESET) && + (qp->cur_state != QED_ROCE_QP_STATE_ERR) && + (qp->cur_state != QED_ROCE_QP_STATE_INIT)) { + DP_NOTICE(p_hwfn, + "QP must be in error, reset or init state before destroying it\n"); + return -EINVAL; + } + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, &num_invalidated_mw); + if (rc) + return rc; + + /* Send destroy requester ramrod */ + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, &num_bound_mw); + if (rc) + return rc; + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + + start_cid = qed_cxt_get_proto_cid_start(p_hwfn, + p_hwfn->p_rdma_info->proto); + + /* Release responder's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid - start_cid); + + /* Release requester's icid */ + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->cid_map, + qp->icid + 1 - start_cid); + + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); + + return 0; +} + +int qed_rdma_query_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + /* The following fields are filled in from qp and not FW as they can't + * be modified by FW + */ + out_params->mtu = qp->mtu; + out_params->dest_qp = qp->dest_qp; + out_params->incoming_atomic_en = qp->incoming_atomic_en; + out_params->e2e_flow_control_en = qp->e2e_flow_control_en; + out_params->incoming_rdma_read_en = qp->incoming_rdma_read_en; + out_params->incoming_rdma_write_en = qp->incoming_rdma_write_en; + out_params->dgid = qp->dgid; + out_params->flow_label = qp->flow_label; + out_params->hop_limit_ttl = qp->hop_limit_ttl; + out_params->traffic_class_tos = qp->traffic_class_tos; + out_params->timeout = qp->ack_timeout; + out_params->rnr_retry = qp->rnr_retry_cnt; + out_params->retry_cnt = qp->retry_cnt; + out_params->min_rnr_nak_timer = qp->min_rnr_nak_timer; + out_params->pkey_index = 0; + out_params->max_rd_atomic = qp->max_rd_atomic_req; + out_params->max_dest_rd_atomic = qp->max_rd_atomic_resp; + out_params->sqd_async = qp->sqd_async; + + rc = qed_roce_query_qp(p_hwfn, qp, out_params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Query QP, rc = %d\n", rc); + return rc; +} + +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid); + + rc = qed_roce_destroy_qp(p_hwfn, qp); + + /* free qp params struct */ + kfree(qp); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "QP destroyed\n"); + return rc; +} + +struct qed_rdma_qp * +qed_rdma_create_qp(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *in_params, + struct qed_rdma_create_qp_out_params *out_params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_rdma_qp *qp; + u8 max_stats_queues; + int rc; + + if (!rdma_cxt || !in_params || !out_params || !p_hwfn->p_rdma_info) { + DP_ERR(p_hwfn->cdev, + "qed roce create qp failed due to NULL entry (rdma_cxt=%p, in=%p, out=%p, roce_info=?\n", + rdma_cxt, in_params, out_params); + return NULL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "qed rdma create qp called with qp_handle = %08x%08x\n", + in_params->qp_handle_hi, in_params->qp_handle_lo); + + /* Some sanity checks... */ + max_stats_queues = p_hwfn->p_rdma_info->dev->max_stats_queues; + if (in_params->stats_queue >= max_stats_queues) { + DP_ERR(p_hwfn->cdev, + "qed rdma create qp failed due to invalid statistics queue %d. maximum is %d\n", + in_params->stats_queue, max_stats_queues); + return NULL; + } + + qp = kzalloc(sizeof(*qp), GFP_KERNEL); + if (!qp) { + DP_NOTICE(p_hwfn, "Failed to allocate qed_rdma_qp\n"); + return NULL; + } + + rc = qed_roce_alloc_cid(p_hwfn, &qp->icid); + qp->qpid = ((0xFF << 16) | qp->icid); + + DP_INFO(p_hwfn, "ROCE qpid=%x\n", qp->qpid); + + if (rc) { + kfree(qp); + return NULL; + } + + qp->cur_state = QED_ROCE_QP_STATE_RESET; + qp->qp_handle.hi = cpu_to_le32(in_params->qp_handle_hi); + qp->qp_handle.lo = cpu_to_le32(in_params->qp_handle_lo); + qp->qp_handle_async.hi = cpu_to_le32(in_params->qp_handle_async_hi); + qp->qp_handle_async.lo = cpu_to_le32(in_params->qp_handle_async_lo); + qp->use_srq = in_params->use_srq; + qp->signal_all = in_params->signal_all; + qp->fmr_and_reserved_lkey = in_params->fmr_and_reserved_lkey; + qp->pd = in_params->pd; + qp->dpi = in_params->dpi; + qp->sq_cq_id = in_params->sq_cq_id; + qp->sq_num_pages = in_params->sq_num_pages; + qp->sq_pbl_ptr = in_params->sq_pbl_ptr; + qp->rq_cq_id = in_params->rq_cq_id; + qp->rq_num_pages = in_params->rq_num_pages; + qp->rq_pbl_ptr = in_params->rq_pbl_ptr; + qp->srq_id = in_params->srq_id; + qp->req_offloaded = false; + qp->resp_offloaded = false; + qp->e2e_flow_control_en = qp->use_srq ? false : true; + qp->stats_queue = in_params->stats_queue; + + out_params->icid = qp->icid; + out_params->qp_id = qp->qpid; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Create QP, rc = %d\n", rc); + return qp; +} + +static int qed_roce_modify_qp(struct qed_hwfn *p_hwfn, + struct qed_rdma_qp *qp, + enum qed_roce_qp_state prev_state, + struct qed_rdma_modify_qp_in_params *params) +{ + u32 num_invalidated_mw = 0, num_bound_mw = 0; + int rc = 0; + + /* Perform additional operations according to the current state and the + * next state + */ + if (((prev_state == QED_ROCE_QP_STATE_INIT) || + (prev_state == QED_ROCE_QP_STATE_RESET)) && + (qp->cur_state == QED_ROCE_QP_STATE_RTR)) { + /* Init->RTR or Reset->RTR */ + rc = qed_roce_sp_create_responder(p_hwfn, qp); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTR) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTR-> RTS */ + rc = qed_roce_sp_create_requester(p_hwfn, qp); + if (rc) + return rc; + + /* Send modify responder ramrod */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* RTS->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_RTS) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* RTS->SQD */ + rc = qed_roce_sp_modify_requester(p_hwfn, qp, true, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_SQD)) { + /* SQD->SQD */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + return rc; + } else if ((prev_state == QED_ROCE_QP_STATE_SQD) && + (qp->cur_state == QED_ROCE_QP_STATE_RTS)) { + /* SQD->RTS */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, false, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, false, + params->modify_flags); + + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_ERR || + qp->cur_state == QED_ROCE_QP_STATE_SQE) { + /* ->ERR */ + rc = qed_roce_sp_modify_responder(p_hwfn, qp, true, + params->modify_flags); + if (rc) + return rc; + + rc = qed_roce_sp_modify_requester(p_hwfn, qp, false, true, + params->modify_flags); + return rc; + } else if (qp->cur_state == QED_ROCE_QP_STATE_RESET) { + /* Any state -> RESET */ + + rc = qed_roce_sp_destroy_qp_responder(p_hwfn, qp, + &num_invalidated_mw); + if (rc) + return rc; + + rc = qed_roce_sp_destroy_qp_requester(p_hwfn, qp, + &num_bound_mw); + + if (num_invalidated_mw != num_bound_mw) { + DP_NOTICE(p_hwfn, + "number of invalidate memory windows is different from bounded ones\n"); + return -EINVAL; + } + } else { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "0\n"); + } + + return rc; +} + +int qed_rdma_modify_qp(void *rdma_cxt, + struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + enum qed_roce_qp_state prev_state; + int rc = 0; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x params->new_state=%d\n", + qp->icid, params->new_state); + + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN)) { + qp->incoming_rdma_read_en = params->incoming_rdma_read_en; + qp->incoming_rdma_write_en = params->incoming_rdma_write_en; + qp->incoming_atomic_en = params->incoming_atomic_en; + } + + /* Update QP structure with the updated values */ + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_ROCE_MODE)) + qp->roce_mode = params->roce_mode; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_PKEY)) + qp->pkey = params->pkey; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN)) + qp->e2e_flow_control_en = params->e2e_flow_control_en; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_DEST_QP)) + qp->dest_qp = params->dest_qp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR)) { + /* Indicates that the following parameters have changed: + * Traffic class, flow label, hop limit, source GID, + * destination GID, loopback indicator + */ + qp->traffic_class_tos = params->traffic_class_tos; + qp->flow_label = params->flow_label; + qp->hop_limit_ttl = params->hop_limit_ttl; + + qp->sgid = params->sgid; + qp->dgid = params->dgid; + qp->udp_src_port = 0; + qp->vlan_id = params->vlan_id; + qp->mtu = params->mtu; + qp->lb_indication = params->lb_indication; + memcpy((u8 *)&qp->remote_mac_addr[0], + (u8 *)¶ms->remote_mac_addr[0], ETH_ALEN); + if (params->use_local_mac) { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)¶ms->local_mac_addr[0], ETH_ALEN); + } else { + memcpy((u8 *)&qp->local_mac_addr[0], + (u8 *)&p_hwfn->hw_info.hw_mac_addr, ETH_ALEN); + } + } + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RQ_PSN)) + qp->rq_psn = params->rq_psn; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_SQ_PSN)) + qp->sq_psn = params->sq_psn; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ)) + qp->max_rd_atomic_req = params->max_rd_atomic_req; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP)) + qp->max_rd_atomic_resp = params->max_rd_atomic_resp; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT)) + qp->ack_timeout = params->ack_timeout; + if (GET_FIELD(params->modify_flags, QED_ROCE_MODIFY_QP_VALID_RETRY_CNT)) + qp->retry_cnt = params->retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT)) + qp->rnr_retry_cnt = params->rnr_retry_cnt; + if (GET_FIELD(params->modify_flags, + QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER)) + qp->min_rnr_nak_timer = params->min_rnr_nak_timer; + + qp->sqd_async = params->sqd_async; + + prev_state = qp->cur_state; + if (GET_FIELD(params->modify_flags, + QED_RDMA_MODIFY_QP_VALID_NEW_STATE)) { + qp->cur_state = params->new_state; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "qp->cur_state=%d\n", + qp->cur_state); + } + + rc = qed_roce_modify_qp(p_hwfn, qp, prev_state, params); + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Modify QP, rc = %d\n", rc); + return rc; +} + +int qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_register_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + enum rdma_tid_type tid_type; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", params->itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_REGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (p_hwfn->p_rdma_info->last_tid < params->itid) + p_hwfn->p_rdma_info->last_tid = params->itid; + + p_ramrod = &p_ent->ramrod.rdma_register_tid; + + p_ramrod->flags = 0; + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_TWO_LEVEL_PBL, + params->pbl_two_level); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ZERO_BASED, params->zbva); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PHY_MR, params->phy_mr); + + /* Don't initialize D/C field, as it may override other bits. */ + if (!(params->tid_type == QED_RDMA_TID_FMR) && !(params->dma_mr)) + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_PAGE_SIZE_LOG, + params->page_size_log - 12); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_MAX_ID, + p_hwfn->p_rdma_info->last_tid); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_READ, + params->remote_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_WRITE, + params->remote_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_REMOTE_ATOMIC, + params->remote_atomic); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_WRITE, + params->local_write); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_LOCAL_READ, params->local_read); + + SET_FIELD(p_ramrod->flags, + RDMA_REGISTER_TID_RAMROD_DATA_ENABLE_MW_BIND, + params->mw_bind); + + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_PBL_PAGE_SIZE_LOG, + params->pbl_page_size_log - 12); + + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DMA_MR, params->dma_mr); + + switch (params->tid_type) { + case QED_RDMA_TID_REGISTERED_MR: + tid_type = RDMA_TID_REGISTERED_MR; + break; + case QED_RDMA_TID_FMR: + tid_type = RDMA_TID_FMR; + break; + case QED_RDMA_TID_MW_TYPE1: + tid_type = RDMA_TID_MW_TYPE1; + break; + case QED_RDMA_TID_MW_TYPE2A: + tid_type = RDMA_TID_MW_TYPE2A; + break; + default: + rc = -EINVAL; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + SET_FIELD(p_ramrod->flags1, + RDMA_REGISTER_TID_RAMROD_DATA_TID_TYPE, tid_type); + + p_ramrod->itid = cpu_to_le32(params->itid); + p_ramrod->key = params->key; + p_ramrod->pd = cpu_to_le16(params->pd); + p_ramrod->length_hi = (u8)(params->length >> 32); + p_ramrod->length_lo = DMA_LO_LE(params->length); + if (params->zbva) { + /* Lower 32 bits of the registered MR address. + * In case of zero based MR, will hold FBO + */ + p_ramrod->va.hi = 0; + p_ramrod->va.lo = cpu_to_le32(params->fbo); + } else { + DMA_REGPAIR_LE(p_ramrod->va, params->vaddr); + } + DMA_REGPAIR_LE(p_ramrod->pbl_base, params->pbl_ptr); + + /* DIF */ + if (params->dif_enabled) { + SET_FIELD(p_ramrod->flags2, + RDMA_REGISTER_TID_RAMROD_DATA_DIF_ON_HOST_FLG, 1); + DMA_REGPAIR_LE(p_ramrod->dif_error_addr, + params->dif_error_addr); + DMA_REGPAIR_LE(p_ramrod->dif_runt_addr, params->dif_runt_addr); + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Register TID, rc = %d\n", rc); + return rc; +} + +int qed_rdma_deregister_tid(void *rdma_cxt, u32 itid) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct rdma_deregister_tid_ramrod_data *p_ramrod; + struct qed_sp_init_data init_data; + struct qed_spq_entry *p_ent; + struct qed_ptt *p_ptt; + u8 fw_return_code; + int rc; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "itid = %08x\n", itid); + + /* Get SPQ entry */ + memset(&init_data, 0, sizeof(init_data)); + init_data.opaque_fid = p_hwfn->hw_info.opaque_fid; + init_data.comp_mode = QED_SPQ_MODE_EBLOCK; + + rc = qed_sp_init_request(p_hwfn, &p_ent, RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + p_ramrod = &p_ent->ramrod.rdma_deregister_tid; + p_ramrod->itid = cpu_to_le32(itid); + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "rc = %d\n", rc); + return rc; + } + + if (fw_return_code == RDMA_RETURN_DEREGISTER_MR_BAD_STATE_ERR) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", fw_return_code); + return -EINVAL; + } else if (fw_return_code == RDMA_RETURN_NIG_DRAIN_REQ) { + /* Bit indicating that the TID is in use and a nig drain is + * required before sending the ramrod again + */ + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) { + rc = -EBUSY; + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to acquire PTT\n"); + return rc; + } + + rc = qed_mcp_drain(p_hwfn, p_ptt); + if (rc) { + qed_ptt_release(p_hwfn, p_ptt); + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Drain failed\n"); + return rc; + } + + qed_ptt_release(p_hwfn, p_ptt); + + /* Resend the ramrod */ + rc = qed_sp_init_request(p_hwfn, &p_ent, + RDMA_RAMROD_DEREGISTER_MR, + p_hwfn->p_rdma_info->proto, + &init_data); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Failed to init sp-element\n"); + return rc; + } + + rc = qed_spq_post(p_hwfn, p_ent, &fw_return_code); + if (rc) { + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "Ramrod failed\n"); + return rc; + } + + if (fw_return_code != RDMA_RETURN_OK) { + DP_NOTICE(p_hwfn, "fw_return_code = %d\n", + fw_return_code); + return rc; + } + } + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "De-registered TID, rc = %d\n", rc); + return rc; +} + +static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev) +{ + return QED_LEADING_HWFN(cdev); +} + +static void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + u32 val; + + val = (p_hwfn->dcbx_no_edpm || p_hwfn->db_bar_no_edpm) ? 0 : 1; + + qed_wr(p_hwfn, p_ptt, DORQ_REG_PF_DPM_ENABLE, val); + DP_VERBOSE(p_hwfn, (QED_MSG_DCB | QED_MSG_RDMA), + "Changing DPM_EN state to %d (DCBX=%d, DB_BAR=%d)\n", + val, p_hwfn->dcbx_no_edpm, p_hwfn->db_bar_no_edpm); +} + +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +{ + p_hwfn->db_bar_no_edpm = true; + + qed_rdma_dpm_conf(p_hwfn, p_ptt); +} + +int qed_rdma_start(void *rdma_cxt, struct qed_rdma_start_in_params *params) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + struct qed_ptt *p_ptt; + int rc = -EBUSY; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, + "desired_cnq = %08x\n", params->desired_cnq); + + p_ptt = qed_ptt_acquire(p_hwfn); + if (!p_ptt) + goto err; + + rc = qed_rdma_alloc(p_hwfn, p_ptt, params); + if (rc) + goto err1; + + rc = qed_rdma_setup(p_hwfn, p_ptt, params); + if (rc) + goto err2; + + qed_ptt_release(p_hwfn, p_ptt); + + return rc; + +err2: + qed_rdma_free(p_hwfn); +err1: + qed_ptt_release(p_hwfn, p_ptt); +err: + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "RDMA start - error, rc = %d\n", rc); + return rc; +} + +static int qed_rdma_init(struct qed_dev *cdev, + struct qed_rdma_start_in_params *params) +{ + return qed_rdma_start(QED_LEADING_HWFN(cdev), params); +} + +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi) +{ + struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt; + + DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "dpi = %08x\n", dpi); + + spin_lock_bh(&p_hwfn->p_rdma_info->lock); + qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->dpi_map, dpi); + spin_unlock_bh(&p_hwfn->p_rdma_info->lock); +} + +void qed_ll2b_complete_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + struct qed_roce_ll2_packet *packet = cookie; + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + + roce_ll2->cbs.tx_cb(roce_ll2->cb_cookie, packet); +} + +void qed_ll2b_release_tx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t first_frag_addr, + bool b_last_fragment, bool b_last_packet) +{ + qed_ll2b_complete_tx_gsi_packet(p_hwfn, connection_handle, + cookie, first_frag_addr, + b_last_fragment, b_last_packet); +} + +void qed_ll2b_complete_rx_gsi_packet(struct qed_hwfn *p_hwfn, + u8 connection_handle, + void *cookie, + dma_addr_t rx_buf_addr, + u16 data_length, + u8 data_length_error, + u16 parse_flags, + u16 vlan, + u32 src_mac_addr_hi, + u16 src_mac_addr_lo, bool b_last_packet) +{ + struct qed_roce_ll2_info *roce_ll2 = p_hwfn->ll2; + struct qed_roce_ll2_rx_params params; + struct qed_dev *cdev = p_hwfn->cdev; + struct qed_roce_ll2_packet pkt; + + DP_VERBOSE(cdev, + QED_MSG_LL2, + "roce ll2 rx complete: bus_addr=%p, len=%d, data_len_err=%d\n", + (void *)(uintptr_t)rx_buf_addr, + data_length, data_length_error); + + memset(&pkt, 0, sizeof(pkt)); + pkt.n_seg = 1; + pkt.payload[0].baddr = rx_buf_addr; + pkt.payload[0].len = data_length; + + memset(¶ms, 0, sizeof(params)); + params.vlan_id = vlan; + *((u32 *)¶ms.smac[0]) = ntohl(src_mac_addr_hi); + *((u16 *)¶ms.smac[4]) = ntohs(src_mac_addr_lo); + + if (data_length_error) { + DP_ERR(cdev, + "roce ll2 rx complete: data length error %d, length=%d\n", + data_length_error, data_length); + params.rc = -EINVAL; + } + + roce_ll2->cbs.rx_cb(roce_ll2->cb_cookie, &pkt, ¶ms); +} + +static int qed_roce_ll2_set_mac_filter(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_ptt *p_ptt; + int rc = 0; + + if (!hwfn->ll2 || hwfn->ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, + "qed roce mac filter failed - roce_info/ll2 NULL\n"); + return -EINVAL; + } + + p_ptt = qed_ptt_acquire(QED_LEADING_HWFN(cdev)); + if (!p_ptt) { + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to acquire PTT\n"); + return -EINVAL; + } + + mutex_lock(&hwfn->ll2->lock); + if (old_mac_address) + qed_llh_remove_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + old_mac_address); + if (new_mac_address) + rc = qed_llh_add_mac_filter(QED_LEADING_HWFN(cdev), p_ptt, + new_mac_address); + mutex_unlock(&hwfn->ll2->lock); + + qed_ptt_release(QED_LEADING_HWFN(cdev), p_ptt); + + if (rc) + DP_ERR(cdev, + "qed roce ll2 mac filter set: failed to add mac filter\n"); + + return rc; +} + +static int qed_roce_ll2_start(struct qed_dev *cdev, + struct qed_roce_ll2_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2; + struct qed_ll2_info ll2_params; + int rc; + + if (!params) { + DP_ERR(cdev, "qed roce ll2 start: failed due to NULL params\n"); + return -EINVAL; + } + if (!params->cbs.tx_cb || !params->cbs.rx_cb) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to NULL tx/rx. tx_cb=%p, rx_cb=%p\n", + params->cbs.tx_cb, params->cbs.rx_cb); + return -EINVAL; + } + if (!is_valid_ether_addr(params->mac_address)) { + DP_ERR(cdev, + "qed roce ll2 start: failed due to invalid Ethernet address %pM\n", + params->mac_address); + return -EINVAL; + } + + /* Initialize */ + roce_ll2 = kzalloc(sizeof(*roce_ll2), GFP_ATOMIC); + if (!roce_ll2) { + DP_ERR(cdev, "qed roce ll2 start: failed memory allocation\n"); + return -ENOMEM; + } + memset(roce_ll2, 0, sizeof(*roce_ll2)); + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + roce_ll2->cbs = params->cbs; + roce_ll2->cb_cookie = params->cb_cookie; + mutex_init(&roce_ll2->lock); + + memset(&ll2_params, 0, sizeof(ll2_params)); + ll2_params.conn_type = QED_LL2_TYPE_ROCE; + ll2_params.mtu = params->mtu; + ll2_params.rx_drop_ttl0_flg = true; + ll2_params.rx_vlan_removal_en = false; + ll2_params.tx_dest = CORE_TX_DEST_NW; + ll2_params.ai_err_packet_too_big = LL2_DROP_PACKET; + ll2_params.ai_err_no_buf = LL2_DROP_PACKET; + ll2_params.gsi_enable = true; + + rc = qed_ll2_acquire_connection(QED_LEADING_HWFN(cdev), &ll2_params, + params->max_rx_buffers, + params->max_tx_buffers, + &roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to acquire LL2 connection (rc=%d)\n", + rc); + goto err; + } + + rc = qed_ll2_establish_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) { + DP_ERR(cdev, + "qed roce ll2 start: failed to establish LL2 connection (rc=%d)\n", + rc); + goto err1; + } + + hwfn->ll2 = roce_ll2; + + rc = qed_roce_ll2_set_mac_filter(cdev, NULL, params->mac_address); + if (rc) { + hwfn->ll2 = NULL; + goto err2; + } + ether_addr_copy(roce_ll2->mac_address, params->mac_address); + + return 0; + +err2: + qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err1: + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); +err: + kfree(roce_ll2); + return rc; +} + +static int qed_roce_ll2_stop(struct qed_dev *cdev) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + int rc; + + if (!cdev) { + DP_ERR(cdev, "qed roce ll2 stop: invalid cdev\n"); + return -EINVAL; + } + + if (roce_ll2->handle == QED_LL2_UNUSED_HANDLE) { + DP_ERR(cdev, "qed roce ll2 stop: cannot stop an unused LL2\n"); + return -EINVAL; + } + + /* remove LL2 MAC address filter */ + rc = qed_roce_ll2_set_mac_filter(cdev, roce_ll2->mac_address, NULL); + eth_zero_addr(roce_ll2->mac_address); + + rc = qed_ll2_terminate_connection(QED_LEADING_HWFN(cdev), + roce_ll2->handle); + if (rc) + DP_ERR(cdev, + "qed roce ll2 stop: failed to terminate LL2 connection (rc=%d)\n", + rc); + + qed_ll2_release_connection(QED_LEADING_HWFN(cdev), roce_ll2->handle); + + roce_ll2->handle = QED_LL2_UNUSED_HANDLE; + + kfree(roce_ll2); + + return rc; +} + +static int qed_roce_ll2_tx(struct qed_dev *cdev, + struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_tx_params *params) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + enum qed_ll2_roce_flavor_type qed_roce_flavor; + u8 flags = 0; + int rc; + int i; + + if (!cdev || !pkt || !params) { + DP_ERR(cdev, + "roce ll2 tx: failed tx because one of the following is NULL - drv=%p, pkt=%p, params=%p\n", + cdev, pkt, params); + return -EINVAL; + } + + qed_roce_flavor = (pkt->roce_mode == ROCE_V1) ? QED_LL2_ROCE + : QED_LL2_RROCE; + + if (pkt->roce_mode == ROCE_V2_IPV4) + flags |= BIT(CORE_TX_BD_FLAGS_IP_CSUM_SHIFT); + + /* Tx header */ + rc = qed_ll2_prepare_tx_packet(QED_LEADING_HWFN(cdev), roce_ll2->handle, + 1 + pkt->n_seg, 0, flags, 0, + qed_roce_flavor, pkt->header.baddr, + pkt->header.len, pkt, 1); + if (rc) { + DP_ERR(cdev, "roce ll2 tx: header failed (rc=%d)\n", rc); + return QED_ROCE_TX_HEAD_FAILURE; + } + + /* Tx payload */ + for (i = 0; i < pkt->n_seg; i++) { + rc = qed_ll2_set_fragment_of_tx_packet(QED_LEADING_HWFN(cdev), + roce_ll2->handle, + pkt->payload[i].baddr, + pkt->payload[i].len); + if (rc) { + /* If failed not much to do here, partial packet has + * been posted * we can't free memory, will need to wait + * for completion + */ + DP_ERR(cdev, + "roce ll2 tx: payload failed (rc=%d)\n", rc); + return QED_ROCE_TX_FRAG_FAILURE; + } + } + + return 0; +} + +static int qed_roce_ll2_post_rx_buffer(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw) +{ + return qed_ll2_post_rx_buffer(QED_LEADING_HWFN(cdev), + QED_LEADING_HWFN(cdev)->ll2->handle, + buf->baddr, buf->len, + (void *)(uintptr_t)cookie, notify_fw); +} + +static int qed_roce_ll2_stats(struct qed_dev *cdev, struct qed_ll2_stats *stats) +{ + struct qed_hwfn *hwfn = QED_LEADING_HWFN(cdev); + struct qed_roce_ll2_info *roce_ll2 = hwfn->ll2; + + return qed_ll2_get_stats(QED_LEADING_HWFN(cdev), + roce_ll2->handle, stats); +} + +static const struct qed_rdma_ops qed_rdma_ops_pass = { + .common = &qed_common_ops_pass, + .fill_dev_info = &qed_fill_rdma_dev_info, + .rdma_get_rdma_ctx = &qed_rdma_get_rdma_ctx, + .rdma_init = &qed_rdma_init, + .rdma_add_user = &qed_rdma_add_user, + .rdma_remove_user = &qed_rdma_remove_user, + .rdma_stop = &qed_rdma_stop, + .rdma_query_port = &qed_rdma_query_port, + .rdma_query_device = &qed_rdma_query_device, + .rdma_get_start_sb = &qed_rdma_get_sb_start, + .rdma_get_rdma_int = &qed_rdma_get_int, + .rdma_set_rdma_int = &qed_rdma_set_int, + .rdma_get_min_cnq_msix = &qed_rdma_get_min_cnq_msix, + .rdma_cnq_prod_update = &qed_rdma_cnq_prod_update, + .rdma_alloc_pd = &qed_rdma_alloc_pd, + .rdma_dealloc_pd = &qed_rdma_free_pd, + .rdma_create_cq = &qed_rdma_create_cq, + .rdma_destroy_cq = &qed_rdma_destroy_cq, + .rdma_create_qp = &qed_rdma_create_qp, + .rdma_modify_qp = &qed_rdma_modify_qp, + .rdma_query_qp = &qed_rdma_query_qp, + .rdma_destroy_qp = &qed_rdma_destroy_qp, + .rdma_alloc_tid = &qed_rdma_alloc_tid, + .rdma_free_tid = &qed_rdma_free_tid, + .rdma_register_tid = &qed_rdma_register_tid, + .rdma_deregister_tid = &qed_rdma_deregister_tid, + .roce_ll2_start = &qed_roce_ll2_start, + .roce_ll2_stop = &qed_roce_ll2_stop, + .roce_ll2_tx = &qed_roce_ll2_tx, + .roce_ll2_post_rx_buffer = &qed_roce_ll2_post_rx_buffer, + .roce_ll2_set_mac_filter = &qed_roce_ll2_set_mac_filter, + .roce_ll2_stats = &qed_roce_ll2_stats, +}; + +const struct qed_rdma_ops *qed_get_rdma_ops() +{ + return &qed_rdma_ops_pass; +} +EXPORT_SYMBOL(qed_get_rdma_ops); diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.h b/drivers/net/ethernet/qlogic/qed/qed_roce.h new file mode 100644 index 000000000000..2f091e8a0f40 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qed/qed_roce.h @@ -0,0 +1,216 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_H +#define _QED_ROCE_H +#include +#include +#include +#include +#include +#include +#include +#include +#include "qed.h" +#include "qed_dev_api.h" +#include "qed_hsi.h" +#include "qed_ll2.h" + +#define QED_RDMA_MAX_FMR (RDMA_MAX_TIDS) +#define QED_RDMA_MAX_P_KEY (1) +#define QED_RDMA_MAX_WQE (0x7FFF) +#define QED_RDMA_MAX_SRQ_WQE_ELEM (0x7FFF) +#define QED_RDMA_PAGE_SIZE_CAPS (0xFFFFF000) +#define QED_RDMA_ACK_DELAY (15) +#define QED_RDMA_MAX_MR_SIZE (0x10000000000ULL) +#define QED_RDMA_MAX_CQS (RDMA_MAX_CQS) +#define QED_RDMA_MAX_MRS (RDMA_MAX_TIDS) +/* Add 1 for header element */ +#define QED_RDMA_MAX_SRQ_ELEM_PER_WQE (RDMA_MAX_SGE_PER_RQ_WQE + 1) +#define QED_RDMA_MAX_SGE_PER_SRQ_WQE (RDMA_MAX_SGE_PER_RQ_WQE) +#define QED_RDMA_SRQ_WQE_ELEM_SIZE (16) +#define QED_RDMA_MAX_SRQS (32 * 1024) + +#define QED_RDMA_MAX_CQE_32_BIT (0x7FFFFFFF - 1) +#define QED_RDMA_MAX_CQE_16_BIT (0x7FFF - 1) + +enum qed_rdma_toggle_bit { + QED_RDMA_TOGGLE_BIT_CLEAR = 0, + QED_RDMA_TOGGLE_BIT_SET = 1 +}; + +struct qed_bmap { + unsigned long *bitmap; + u32 max_count; +}; + +struct qed_rdma_info { + /* spin lock to protect bitmaps */ + spinlock_t lock; + + struct qed_bmap cq_map; + struct qed_bmap pd_map; + struct qed_bmap tid_map; + struct qed_bmap qp_map; + struct qed_bmap srq_map; + struct qed_bmap cid_map; + struct qed_bmap dpi_map; + struct qed_bmap toggle_bits; + struct qed_rdma_events events; + struct qed_rdma_device *dev; + struct qed_rdma_port *port; + u32 last_tid; + u8 num_cnqs; + u32 num_qps; + u32 num_mrs; + u16 queue_zone_base; + enum protocol_type proto; +}; + +struct qed_rdma_resize_cq_in_params { + u16 icid; + u32 cq_size; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; +}; + +struct qed_rdma_resize_cq_out_params { + u32 prod; + u32 cons; +}; + +struct qed_rdma_resize_cnq_in_params { + u32 cnq_id; + u32 pbl_page_size_log; + u64 pbl_ptr; +}; + +struct qed_rdma_qp { + struct regpair qp_handle; + struct regpair qp_handle_async; + u32 qpid; + u16 icid; + enum qed_roce_qp_state cur_state; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + + u16 pd; + u16 pkey; + u32 dest_qp; + u16 mtu; + u16 srq_id; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u16 dpi; + u32 flow_label; + bool lb_indication; + u16 vlan_id; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + union qed_gid sgid; + union qed_gid dgid; + enum roce_mode roce_mode; + u16 udp_src_port; + u8 stats_queue; + + /* requeseter */ + u8 max_rd_atomic_req; + u32 sq_psn; + u16 sq_cq_id; + u16 sq_num_pages; + dma_addr_t sq_pbl_ptr; + void *orq; + dma_addr_t orq_phys_addr; + u8 orq_num_pages; + bool req_offloaded; + + /* responder */ + u8 max_rd_atomic_resp; + u32 rq_psn; + u16 rq_cq_id; + u16 rq_num_pages; + dma_addr_t rq_pbl_ptr; + void *irq; + dma_addr_t irq_phys_addr; + u8 irq_num_pages; + bool resp_offloaded; + + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + + void *shared_queue; + dma_addr_t shared_queue_phys_addr; +}; + +int +qed_rdma_add_user(void *rdma_cxt, + struct qed_rdma_add_user_out_params *out_params); +int qed_rdma_alloc_pd(void *rdma_cxt, u16 *pd); +int qed_rdma_alloc_tid(void *rdma_cxt, u32 *tid); +int qed_rdma_deregister_tid(void *rdma_cxt, u32 tid); +void qed_rdma_free_tid(void *rdma_cxt, u32 tid); +struct qed_rdma_device *qed_rdma_query_device(void *rdma_cxt); +struct qed_rdma_port *qed_rdma_query_port(void *rdma_cxt); +int +qed_rdma_register_tid(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *params); +void qed_rdma_remove_user(void *rdma_cxt, u16 dpi); +int qed_rdma_start(void *p_hwfn, struct qed_rdma_start_in_params *params); +int qed_rdma_stop(void *rdma_cxt); +u32 qed_rdma_get_sb_id(void *p_hwfn, u32 rel_sb_id); +u32 qed_rdma_query_cau_timer_res(void *p_hwfn); +void qed_rdma_cnq_prod_update(void *rdma_cxt, u8 cnq_index, u16 prod); +void qed_rdma_resc_free(struct qed_hwfn *p_hwfn); +void qed_async_roce_event(struct qed_hwfn *p_hwfn, + struct event_ring_entry *p_eqe); +int qed_rdma_destroy_qp(void *rdma_cxt, struct qed_rdma_qp *qp); +int qed_rdma_modify_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *params); +int qed_rdma_query_qp(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *out_params); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt); +#else +void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) {} +#endif +#endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_selftest.c b/drivers/net/ethernet/qlogic/qed/qed_selftest.c index a342bfe4280d..9b7678f26909 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_selftest.c +++ b/drivers/net/ethernet/qlogic/qed/qed_selftest.c @@ -2,6 +2,7 @@ #include "qed_dev_api.h" #include "qed_mcp.h" #include "qed_sp.h" +#include "qed_selftest.h" int qed_selftest_memory(struct qed_dev *cdev) { diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp.h b/drivers/net/ethernet/qlogic/qed/qed_sp.h index a548504c3420..652c90819758 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp.h +++ b/drivers/net/ethernet/qlogic/qed/qed_sp.h @@ -61,6 +61,10 @@ union ramrod_data { struct vport_start_ramrod_data vport_start; struct vport_stop_ramrod_data vport_stop; struct vport_update_ramrod_data vport_update; + struct core_rx_start_ramrod_data core_rx_queue_start; + struct core_rx_stop_ramrod_data core_rx_queue_stop; + struct core_tx_start_ramrod_data core_tx_queue_start; + struct core_tx_stop_ramrod_data core_tx_queue_stop; struct vport_filter_update_ramrod_data vport_filter_update; struct rdma_init_func_ramrod_data rdma_init_func; @@ -81,6 +85,7 @@ union ramrod_data { struct rdma_srq_create_ramrod_data rdma_create_srq; struct rdma_srq_destroy_ramrod_data rdma_destroy_srq; struct rdma_srq_modify_ramrod_data rdma_modify_srq; + struct roce_init_func_ramrod_data roce_init_func; struct iscsi_slow_path_hdr iscsi_empty; struct iscsi_init_ramrod_params iscsi_init; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c index a52f3fc051f5..2888eb0628f8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sp_commands.c @@ -25,9 +25,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent, - u8 cmd, - u8 protocol, - struct qed_sp_init_data *p_data) + u8 cmd, u8 protocol, struct qed_sp_init_data *p_data) { u32 opaque_cid = p_data->opaque_fid << 16 | p_data->cid; struct qed_spq_entry *p_ent = NULL; @@ -38,7 +36,7 @@ int qed_sp_init_request(struct qed_hwfn *p_hwfn, rc = qed_spq_get_entry(p_hwfn, pp_ent); - if (rc != 0) + if (rc) return rc; p_ent = *pp_ent; @@ -321,8 +319,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, rc = qed_sp_init_request(p_hwfn, &p_ent, COMMON_RAMROD_PF_START, - PROTOCOLID_COMMON, - &init_data); + PROTOCOLID_COMMON, &init_data); if (rc) return rc; @@ -356,8 +353,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DMA_REGPAIR_LE(p_ramrod->consolid_q_pbl_addr, p_hwfn->p_consq->chain.pbl.p_phys_table); - qed_tunn_set_pf_start_params(p_hwfn, p_tunn, - &p_ramrod->tunnel_config); + qed_tunn_set_pf_start_params(p_hwfn, p_tunn, &p_ramrod->tunnel_config); if (IS_MF_SI(p_hwfn)) p_ramrod->allow_npar_tx_switching = allow_npar_tx_switch; @@ -389,8 +385,7 @@ int qed_sp_pf_start(struct qed_hwfn *p_hwfn, DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Setting event_ring_sb [id %04x index %02x], outer_tag [%d]\n", - sb, sb_index, - p_ramrod->outer_tag); + sb, sb_index, p_ramrod->outer_tag); rc = qed_spq_post(p_hwfn, p_ent, NULL); diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index d73456eab1d7..caff41544898 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -28,6 +28,9 @@ #include "qed_reg_addr.h" #include "qed_sp.h" #include "qed_sriov.h" +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +#include "qed_roce.h" +#endif /*************************************************************************** * Structures & Definitions @@ -41,8 +44,7 @@ ***************************************************************************/ static void qed_spq_blocking_cb(struct qed_hwfn *p_hwfn, void *cookie, - union event_ring_data *data, - u8 fw_return_code) + union event_ring_data *data, u8 fw_return_code) { struct qed_spq_comp_done *comp_done; @@ -109,9 +111,8 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn, /*************************************************************************** * SPQ entries inner API ***************************************************************************/ -static int -qed_spq_fill_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent) +static int qed_spq_fill_entry(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent) { p_ent->flags = 0; @@ -189,8 +190,7 @@ static void qed_spq_hw_initialize(struct qed_hwfn *p_hwfn, } static int qed_spq_hw_post(struct qed_hwfn *p_hwfn, - struct qed_spq *p_spq, - struct qed_spq_entry *p_ent) + struct qed_spq *p_spq, struct qed_spq_entry *p_ent) { struct qed_chain *p_chain = &p_hwfn->p_spq->chain; u16 echo = qed_chain_get_prod_idx(p_chain); @@ -240,6 +240,11 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, struct event_ring_entry *p_eqe) { switch (p_eqe->protocol_id) { +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) + case PROTOCOLID_ROCE: + qed_async_roce_event(p_hwfn, p_eqe); + return 0; +#endif case PROTOCOLID_COMMON: return qed_sriov_eqe_event(p_hwfn, p_eqe->opcode, @@ -255,8 +260,7 @@ qed_async_event_completion(struct qed_hwfn *p_hwfn, /*************************************************************************** * EQ API ***************************************************************************/ -void qed_eq_prod_update(struct qed_hwfn *p_hwfn, - u16 prod) +void qed_eq_prod_update(struct qed_hwfn *p_hwfn, u16 prod) { u32 addr = GTT_BAR0_MAP_REG_USDM_RAM + USTORM_EQE_CONS_OFFSET(p_hwfn->rel_pf_id); @@ -267,9 +271,7 @@ void qed_eq_prod_update(struct qed_hwfn *p_hwfn, mmiowb(); } -int qed_eq_completion(struct qed_hwfn *p_hwfn, - void *cookie) - +int qed_eq_completion(struct qed_hwfn *p_hwfn, void *cookie) { struct qed_eq *p_eq = cookie; struct qed_chain *p_chain = &p_eq->chain; @@ -323,17 +325,14 @@ int qed_eq_completion(struct qed_hwfn *p_hwfn, return rc; } -struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, - u16 num_elem) +struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, u16 num_elem) { struct qed_eq *p_eq; /* Allocate EQ struct */ p_eq = kzalloc(sizeof(*p_eq), GFP_KERNEL); - if (!p_eq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_eq'\n"); + if (!p_eq) return NULL; - } /* Allocate and initialize EQ chain*/ if (qed_chain_alloc(p_hwfn->cdev, @@ -342,17 +341,12 @@ struct qed_eq *qed_eq_alloc(struct qed_hwfn *p_hwfn, QED_CHAIN_CNT_TYPE_U16, num_elem, sizeof(union event_ring_element), - &p_eq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate eq chain\n"); + &p_eq->chain)) goto eq_allocate_fail; - } /* register EQ completion on the SP SB */ - qed_int_register_cb(p_hwfn, - qed_eq_completion, - p_eq, - &p_eq->eq_sb_index, - &p_eq->p_fw_cons); + qed_int_register_cb(p_hwfn, qed_eq_completion, + p_eq, &p_eq->eq_sb_index, &p_eq->p_fw_cons); return p_eq; @@ -361,14 +355,12 @@ eq_allocate_fail: return NULL; } -void qed_eq_setup(struct qed_hwfn *p_hwfn, - struct qed_eq *p_eq) +void qed_eq_setup(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq) { qed_chain_reset(&p_eq->chain); } -void qed_eq_free(struct qed_hwfn *p_hwfn, - struct qed_eq *p_eq) +void qed_eq_free(struct qed_hwfn *p_hwfn, struct qed_eq *p_eq) { if (!p_eq) return; @@ -379,10 +371,9 @@ void qed_eq_free(struct qed_hwfn *p_hwfn, /*************************************************************************** * CQE API - manipulate EQ functionality ***************************************************************************/ -static int qed_cqe_completion( - struct qed_hwfn *p_hwfn, - struct eth_slow_path_rx_cqe *cqe, - enum protocol_type protocol) +static int qed_cqe_completion(struct qed_hwfn *p_hwfn, + struct eth_slow_path_rx_cqe *cqe, + enum protocol_type protocol) { if (IS_VF(p_hwfn->cdev)) return 0; @@ -463,12 +454,9 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) u32 capacity; /* SPQ struct */ - p_spq = - kzalloc(sizeof(struct qed_spq), GFP_KERNEL); - if (!p_spq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_spq'\n"); + p_spq = kzalloc(sizeof(struct qed_spq), GFP_KERNEL); + if (!p_spq) return -ENOMEM; - } /* SPQ ring */ if (qed_chain_alloc(p_hwfn->cdev, @@ -477,18 +465,14 @@ int qed_spq_alloc(struct qed_hwfn *p_hwfn) QED_CHAIN_CNT_TYPE_U16, 0, /* N/A when the mode is SINGLE */ sizeof(struct slow_path_element), - &p_spq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate spq chain\n"); + &p_spq->chain)) goto spq_allocate_fail; - } /* allocate and fill the SPQ elements (incl. ramrod data list) */ capacity = qed_chain_get_capacity(&p_spq->chain); p_virt = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, - capacity * - sizeof(struct qed_spq_entry), + capacity * sizeof(struct qed_spq_entry), &p_phys, GFP_KERNEL); - if (!p_virt) goto spq_allocate_fail; @@ -525,9 +509,7 @@ void qed_spq_free(struct qed_hwfn *p_hwfn) kfree(p_spq); } -int -qed_spq_get_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry **pp_ent) +int qed_spq_get_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry **pp_ent) { struct qed_spq *p_spq = p_hwfn->p_spq; struct qed_spq_entry *p_ent = NULL; @@ -538,14 +520,15 @@ qed_spq_get_entry(struct qed_hwfn *p_hwfn, if (list_empty(&p_spq->free_pool)) { p_ent = kzalloc(sizeof(*p_ent), GFP_ATOMIC); if (!p_ent) { + DP_NOTICE(p_hwfn, + "Failed to allocate an SPQ entry for a pending ramrod\n"); rc = -ENOMEM; goto out_unlock; } p_ent->queue = &p_spq->unlimited_pending; } else { p_ent = list_first_entry(&p_spq->free_pool, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); list_del(&p_ent->list); p_ent->queue = &p_spq->pending; } @@ -564,8 +547,7 @@ static void __qed_spq_return_entry(struct qed_hwfn *p_hwfn, list_add_tail(&p_ent->list, &p_hwfn->p_spq->free_pool); } -void qed_spq_return_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent) +void qed_spq_return_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_ent) { spin_lock_bh(&p_hwfn->p_spq->lock); __qed_spq_return_entry(p_hwfn, p_ent); @@ -586,10 +568,9 @@ void qed_spq_return_entry(struct qed_hwfn *p_hwfn, * * @return int */ -static int -qed_spq_add_entry(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent, - enum spq_priority priority) +static int qed_spq_add_entry(struct qed_hwfn *p_hwfn, + struct qed_spq_entry *p_ent, + enum spq_priority priority) { struct qed_spq *p_spq = p_hwfn->p_spq; @@ -604,8 +585,7 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn, struct qed_spq_entry *p_en2; p_en2 = list_first_entry(&p_spq->free_pool, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); list_del(&p_en2->list); /* Copy the ring element physical pointer to the new @@ -655,8 +635,7 @@ u32 qed_spq_get_cid(struct qed_hwfn *p_hwfn) * Posting new Ramrods ***************************************************************************/ static int qed_spq_post_list(struct qed_hwfn *p_hwfn, - struct list_head *head, - u32 keep_reserve) + struct list_head *head, u32 keep_reserve) { struct qed_spq *p_spq = p_hwfn->p_spq; int rc; @@ -690,8 +669,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) break; p_ent = list_first_entry(&p_spq->unlimited_pending, - struct qed_spq_entry, - list); + struct qed_spq_entry, list); if (!p_ent) return -EINVAL; @@ -705,8 +683,7 @@ static int qed_spq_pend_post(struct qed_hwfn *p_hwfn) } int qed_spq_post(struct qed_hwfn *p_hwfn, - struct qed_spq_entry *p_ent, - u8 *fw_return_code) + struct qed_spq_entry *p_ent, u8 *fw_return_code) { int rc = 0; struct qed_spq *p_spq = p_hwfn ? p_hwfn->p_spq : NULL; @@ -803,8 +780,7 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, return -EINVAL; spin_lock_bh(&p_spq->lock); - list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, - list) { + list_for_each_entry_safe(p_ent, tmp, &p_spq->completion_pending, list) { if (p_ent->elem.hdr.echo == echo) { u16 pos = le16_to_cpu(echo) % SPQ_RING_SIZE; @@ -846,15 +822,22 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, if (!found) { DP_NOTICE(p_hwfn, - "Failed to find an entry this EQE completes\n"); + "Failed to find an entry this EQE [echo %04x] completes\n", + le16_to_cpu(echo)); return -EEXIST; } - DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Complete: func %p cookie %p)\n", + DP_VERBOSE(p_hwfn, QED_MSG_SPQ, + "Complete EQE [echo %04x]: func %p cookie %p)\n", + le16_to_cpu(echo), p_ent->comp_cb.function, p_ent->comp_cb.cookie); if (found->comp_cb.function) found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data, fw_return_code); + else + DP_VERBOSE(p_hwfn, + QED_MSG_SPQ, + "Got a completion without a callback function\n"); if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || (found->queue == &p_spq->unlimited_pending)) @@ -878,10 +861,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn) /* Allocate ConsQ struct */ p_consq = kzalloc(sizeof(*p_consq), GFP_KERNEL); - if (!p_consq) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_consq'\n"); + if (!p_consq) return NULL; - } /* Allocate and initialize EQ chain*/ if (qed_chain_alloc(p_hwfn->cdev, @@ -889,10 +870,8 @@ struct qed_consq *qed_consq_alloc(struct qed_hwfn *p_hwfn) QED_CHAIN_MODE_PBL, QED_CHAIN_CNT_TYPE_U16, QED_CHAIN_PAGE_SIZE / 0x80, - 0x80, &p_consq->chain)) { - DP_NOTICE(p_hwfn, "Failed to allocate consq chain"); + 0x80, &p_consq->chain)) goto consq_allocate_fail; - } return p_consq; @@ -901,14 +880,12 @@ consq_allocate_fail: return NULL; } -void qed_consq_setup(struct qed_hwfn *p_hwfn, - struct qed_consq *p_consq) +void qed_consq_setup(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq) { qed_chain_reset(&p_consq->chain); } -void qed_consq_free(struct qed_hwfn *p_hwfn, - struct qed_consq *p_consq) +void qed_consq_free(struct qed_hwfn *p_hwfn, struct qed_consq *p_consq) { if (!p_consq) return; diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.c b/drivers/net/ethernet/qlogic/qed/qed_sriov.c index 15399da268d9..d2d6621fe0e5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_sriov.c +++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.c @@ -60,7 +60,8 @@ static int qed_sp_vf_start(struct qed_hwfn *p_hwfn, struct qed_vf_info *p_vf) } fp_minor = p_vf->acquire.vfdev_info.eth_fp_hsi_minor; - if (fp_minor > ETH_HSI_VER_MINOR) { + if (fp_minor > ETH_HSI_VER_MINOR && + fp_minor != ETH_HSI_VER_NO_PKT_LEN_TUNN) { DP_VERBOSE(p_hwfn, QED_MSG_IOV, "VF [%d] - Requested fp hsi %02x.%02x which is slightly newer than PF's %02x.%02x; Configuring PFs version\n", @@ -107,8 +108,8 @@ static int qed_sp_vf_stop(struct qed_hwfn *p_hwfn, return qed_spq_post(p_hwfn, p_ent, NULL); } -bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, - int rel_vf_id, bool b_enabled_only) +static bool qed_iov_is_valid_vfid(struct qed_hwfn *p_hwfn, + int rel_vf_id, bool b_enabled_only) { if (!p_hwfn->pf_iov_info) { DP_NOTICE(p_hwfn->cdev, "No iov info\n"); @@ -185,8 +186,8 @@ static bool qed_iov_validate_sb(struct qed_hwfn *p_hwfn, return false; } -int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, - int vfid, struct qed_ptt *p_ptt) +static int qed_iov_post_vf_bulletin(struct qed_hwfn *p_hwfn, + int vfid, struct qed_ptt *p_ptt) { struct qed_bulletin_content *p_bulletin; int crc_size = sizeof(p_bulletin->crc); @@ -454,10 +455,8 @@ int qed_iov_alloc(struct qed_hwfn *p_hwfn) } p_sriov = kzalloc(sizeof(*p_sriov), GFP_KERNEL); - if (!p_sriov) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n"); + if (!p_sriov) return -ENOMEM; - } p_hwfn->pf_iov_info = p_sriov; @@ -506,10 +505,9 @@ int qed_iov_hw_info(struct qed_hwfn *p_hwfn) /* Allocate a new struct for IOV information */ cdev->p_iov_info = kzalloc(sizeof(*cdev->p_iov_info), GFP_KERNEL); - if (!cdev->p_iov_info) { - DP_NOTICE(p_hwfn, "Can't support IOV due to lack of memory\n"); + if (!cdev->p_iov_info) return -ENOMEM; - } + cdev->p_iov_info->pos = pos; rc = qed_iov_pci_cfg_info(cdev); @@ -575,7 +573,7 @@ static void qed_iov_set_vf_to_disable(struct qed_dev *cdev, } } -void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) +static void qed_iov_set_vfs_to_disable(struct qed_dev *cdev, u8 to_disable) { u16 i; @@ -699,7 +697,7 @@ static void qed_iov_config_perm_table(struct qed_hwfn *p_hwfn, &qzone_id); reg_addr = PSWHST_REG_ZONE_PERMISSION_TABLE + qzone_id * 4; - val = enable ? (vf->abs_vf_id | (1 << 8)) : 0; + val = enable ? (vf->abs_vf_id | BIT(8)) : 0; qed_wr(p_hwfn, p_ptt, reg_addr, val); } } @@ -1090,13 +1088,13 @@ static u16 qed_iov_prep_vp_update_resp_tlvs(struct qed_hwfn *p_hwfn, /* Prepare response for all extended tlvs if they are found by PF */ for (i = 0; i < QED_IOV_VP_UPDATE_MAX; i++) { - if (!(tlvs_mask & (1 << i))) + if (!(tlvs_mask & BIT(i))) continue; resp = qed_add_tlv(p_hwfn, &p_mbx->offset, qed_iov_vport_to_tlv(p_hwfn, i), size); - if (tlvs_accepted & (1 << i)) + if (tlvs_accepted & BIT(i)) resp->hdr.status = status; else resp->hdr.status = PFVF_STATUS_NOT_SUPPORTED; @@ -1132,9 +1130,10 @@ static void qed_iov_prepare_resp(struct qed_hwfn *p_hwfn, qed_iov_send_response(p_hwfn, p_ptt, vf_info, length, status); } -struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, - u16 relative_vf_id, - bool b_enabled_only) +static struct +qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, + u16 relative_vf_id, + bool b_enabled_only) { struct qed_vf_info *vf = NULL; @@ -1145,7 +1144,7 @@ struct qed_public_vf_info *qed_iov_get_public_vf_info(struct qed_hwfn *p_hwfn, return &vf->p_vf_info; } -void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_clean_vf(struct qed_hwfn *p_hwfn, u8 vfid) { struct qed_public_vf_info *vf_info; @@ -1241,6 +1240,16 @@ static u8 qed_iov_vf_mbx_acquire_resc(struct qed_hwfn *p_hwfn, p_req->num_vlan_filters, p_resp->num_vlan_filters, p_req->num_mc_filters, p_resp->num_mc_filters); + + /* Some legacy OSes are incapable of correctly handling this + * failure. + */ + if ((p_vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) && + (p_vf->acquire.vfdev_info.os_type == + VFPF_ACQUIRE_OS_WINDOWS)) + return PFVF_STATUS_SUCCESS; + return PFVF_STATUS_NO_RESOURCE; } @@ -1280,22 +1289,42 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, memset(resp, 0, sizeof(*resp)); + /* Write the PF version so that VF would know which version + * is supported - might be later overriden. This guarantees that + * VF could recognize legacy PF based on lack of versions in reply. + */ + pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR; + pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR; + + if (vf->state != VF_FREE && vf->state != VF_STOPPED) { + DP_VERBOSE(p_hwfn, + QED_MSG_IOV, + "VF[%d] sent ACQUIRE but is already in state %d - fail request\n", + vf->abs_vf_id, vf->state); + goto out; + } + /* Validate FW compatibility */ if (req->vfdev_info.eth_fp_hsi_major != ETH_HSI_VER_MAJOR) { - DP_INFO(p_hwfn, - "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", - vf->abs_vf_id, - req->vfdev_info.eth_fp_hsi_major, - req->vfdev_info.eth_fp_hsi_minor, - ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR); + if (req->vfdev_info.capabilities & + VFPF_ACQUIRE_CAP_PRE_FP_HSI) { + struct vf_pf_vfdev_info *p_vfdev = &req->vfdev_info; - /* Write the PF version so that VF would know which version - * is supported. - */ - pfdev_info->major_fp_hsi = ETH_HSI_VER_MAJOR; - pfdev_info->minor_fp_hsi = ETH_HSI_VER_MINOR; + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF[%d] is pre-fastpath HSI\n", + vf->abs_vf_id); + p_vfdev->eth_fp_hsi_major = ETH_HSI_VER_MAJOR; + p_vfdev->eth_fp_hsi_minor = ETH_HSI_VER_NO_PKT_LEN_TUNN; + } else { + DP_INFO(p_hwfn, + "VF[%d] needs fastpath HSI %02x.%02x, which is incompatible with loaded FW's faspath HSI %02x.%02x\n", + vf->abs_vf_id, + req->vfdev_info.eth_fp_hsi_major, + req->vfdev_info.eth_fp_hsi_minor, + ETH_HSI_VER_MAJOR, ETH_HSI_VER_MINOR); - goto out; + goto out; + } } /* On 100g PFs, prevent old VFs from loading */ @@ -1334,8 +1363,11 @@ static void qed_iov_vf_mbx_acquire(struct qed_hwfn *p_hwfn, pfdev_info->fw_minor = FW_MINOR_VERSION; pfdev_info->fw_rev = FW_REVISION_VERSION; pfdev_info->fw_eng = FW_ENGINEERING_VERSION; - pfdev_info->minor_fp_hsi = min_t(u8, - ETH_HSI_VER_MINOR, + + /* Incorrect when legacy, but doesn't matter as legacy isn't reading + * this field. + */ + pfdev_info->minor_fp_hsi = min_t(u8, ETH_HSI_VER_MINOR, req->vfdev_info.eth_fp_hsi_minor); pfdev_info->os_type = VFPF_ACQUIRE_OS_LINUX; qed_mcp_get_mfw_ver(p_hwfn, p_ptt, &pfdev_info->mfw_ver, NULL); @@ -1438,14 +1470,11 @@ static int qed_iov_reconfigure_unicast_vlan(struct qed_hwfn *p_hwfn, filter.type = QED_FILTER_VLAN; filter.vlan = p_vf->shadow_config.vlans[i].vid; - DP_VERBOSE(p_hwfn, - QED_MSG_IOV, + DP_VERBOSE(p_hwfn, QED_MSG_IOV, "Reconfiguring VLAN [0x%04x] for VF [%04x]\n", filter.vlan, p_vf->relative_vf_id); - rc = qed_sp_eth_filter_ucast(p_hwfn, - p_vf->opaque_fid, - &filter, - QED_SPQ_MODE_CB, NULL); + rc = qed_sp_eth_filter_ucast(p_hwfn, p_vf->opaque_fid, + &filter, QED_SPQ_MODE_CB, NULL); if (rc) { DP_NOTICE(p_hwfn, "Failed to configure VLAN [%04x] to VF [%04x]\n", @@ -1463,7 +1492,7 @@ qed_iov_reconfigure_unicast_shadow(struct qed_hwfn *p_hwfn, { int rc = 0; - if ((events & (1 << VLAN_ADDR_FORCED)) && + if ((events & BIT(VLAN_ADDR_FORCED)) && !(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) rc = qed_iov_reconfigure_unicast_vlan(p_hwfn, p_vf); @@ -1479,7 +1508,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (!p_vf->vport_instance) return -EINVAL; - if (events & (1 << MAC_ADDR_FORCED)) { + if (events & BIT(MAC_ADDR_FORCED)) { /* Since there's no way [currently] of removing the MAC, * we can always assume this means we need to force it. */ @@ -1502,7 +1531,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, p_vf->configured_features |= 1 << MAC_ADDR_FORCED; } - if (events & (1 << VLAN_ADDR_FORCED)) { + if (events & BIT(VLAN_ADDR_FORCED)) { struct qed_sp_vport_update_params vport_update; u8 removal; int i; @@ -1572,7 +1601,7 @@ static int qed_iov_configure_vport_forced(struct qed_hwfn *p_hwfn, if (filter.vlan) p_vf->configured_features |= 1 << VLAN_ADDR_FORCED; else - p_vf->configured_features &= ~(1 << VLAN_ADDR_FORCED); + p_vf->configured_features &= ~BIT(VLAN_ADDR_FORCED); } /* If forced features are terminated, we need to configure the shadow @@ -1619,8 +1648,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, qed_int_cau_conf_sb(p_hwfn, p_ptt, start->sb_addr[sb_id], - vf->igu_sbs[sb_id], - vf->abs_vf_id, 1); + vf->igu_sbs[sb_id], vf->abs_vf_id, 1); } qed_iov_enable_vf_traffic(p_hwfn, p_ptt, vf); @@ -1632,7 +1660,7 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, * vfs that would still be fine, since they passed '0' as padding]. */ p_bitmap = &vf_info->bulletin.p_virt->valid_bitmap; - if (!(*p_bitmap & (1 << VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) { + if (!(*p_bitmap & BIT(VFPF_BULLETIN_UNTAGGED_DEFAULT_FORCED))) { u8 vf_req = start->only_untagged; vf_info->bulletin.p_virt->default_only_untagged = vf_req; @@ -1650,9 +1678,10 @@ static void qed_iov_vf_mbx_start_vport(struct qed_hwfn *p_hwfn, params.vport_id = vf->vport_id; params.max_buffers_per_cqe = start->max_buffers_per_cqe; params.mtu = vf->mtu; + params.check_mac = true; rc = qed_sp_eth_vport_start(p_hwfn, ¶ms); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "qed_iov_vf_mbx_start_vport returned error %d\n", rc); status = PFVF_STATUS_FAILURE; @@ -1679,7 +1708,7 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn, vf->spoof_chk = false; rc = qed_sp_vport_stop(p_hwfn, vf->opaque_fid, vf->vport_id); - if (rc != 0) { + if (rc) { DP_ERR(p_hwfn, "qed_iov_vf_mbx_stop_vport returned error %d\n", rc); status = PFVF_STATUS_FAILURE; @@ -1695,21 +1724,32 @@ static void qed_iov_vf_mbx_stop_vport(struct qed_hwfn *p_hwfn, static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - struct qed_vf_info *vf, u8 status) + struct qed_vf_info *vf, + u8 status, bool b_legacy) { struct qed_iov_vf_mbx *mbx = &vf->vf_mbx; struct pfvf_start_queue_resp_tlv *p_tlv; struct vfpf_start_rxq_tlv *req; + u16 length; mbx->offset = (u8 *)mbx->reply_virt; + /* Taking a bigger struct instead of adding a TLV to list was a + * mistake, but one which we're now stuck with, as some older + * clients assume the size of the previous response. + */ + if (!b_legacy) + length = sizeof(*p_tlv); + else + length = sizeof(struct pfvf_def_resp_tlv); + p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_RXQ, - sizeof(*p_tlv)); + length); qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); /* Update the TLV with the response */ - if (status == PFVF_STATUS_SUCCESS) { + if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { req = &mbx->req_virt->start_rxq; p_tlv->offset = PXP_VF_BAR0_START_MSDM_ZONE_B + offsetof(struct mstorm_vf_zone, @@ -1717,7 +1757,7 @@ static void qed_iov_vf_mbx_start_rxq_resp(struct qed_hwfn *p_hwfn, sizeof(struct eth_rx_prod_data) * req->rx_qid; } - qed_iov_send_response(p_hwfn, p_ptt, vf, sizeof(*p_tlv), status); + qed_iov_send_response(p_hwfn, p_ptt, vf, length, status); } static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, @@ -1728,6 +1768,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, struct qed_iov_vf_mbx *mbx = &vf->vf_mbx; u8 status = PFVF_STATUS_NO_RESOURCE; struct vfpf_start_rxq_tlv *req; + bool b_legacy_vf = false; int rc; memset(¶ms, 0, sizeof(params)); @@ -1743,13 +1784,27 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, params.sb = req->hw_sb; params.sb_idx = req->sb_index; + /* Legacy VFs have their Producers in a different location, which they + * calculate on their own and clean the producer prior to this. + */ + if (vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) { + b_legacy_vf = true; + } else { + REG_WR(p_hwfn, + GTT_BAR0_MAP_REG_MSDM_RAM + + MSTORM_ETH_VF_PRODS_OFFSET(vf->abs_vf_id, req->rx_qid), + 0); + } + rc = qed_sp_eth_rxq_start_ramrod(p_hwfn, vf->opaque_fid, vf->vf_queues[req->rx_qid].fw_cid, ¶ms, vf->abs_vf_id + 0x10, req->bd_max_bytes, req->rxq_addr, - req->cqe_pbl_addr, req->cqe_pbl_size); + req->cqe_pbl_addr, req->cqe_pbl_size, + b_legacy_vf); if (rc) { status = PFVF_STATUS_FAILURE; @@ -1760,7 +1815,7 @@ static void qed_iov_vf_mbx_start_rxq(struct qed_hwfn *p_hwfn, } out: - qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status); + qed_iov_vf_mbx_start_rxq_resp(p_hwfn, p_ptt, vf, status, b_legacy_vf); } static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, @@ -1769,23 +1824,38 @@ static void qed_iov_vf_mbx_start_txq_resp(struct qed_hwfn *p_hwfn, { struct qed_iov_vf_mbx *mbx = &p_vf->vf_mbx; struct pfvf_start_queue_resp_tlv *p_tlv; + bool b_legacy = false; + u16 length; mbx->offset = (u8 *)mbx->reply_virt; + /* Taking a bigger struct instead of adding a TLV to list was a + * mistake, but one which we're now stuck with, as some older + * clients assume the size of the previous response. + */ + if (p_vf->acquire.vfdev_info.eth_fp_hsi_minor == + ETH_HSI_VER_NO_PKT_LEN_TUNN) + b_legacy = true; + + if (!b_legacy) + length = sizeof(*p_tlv); + else + length = sizeof(struct pfvf_def_resp_tlv); + p_tlv = qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_START_TXQ, - sizeof(*p_tlv)); + length); qed_add_tlv(p_hwfn, &mbx->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); /* Update the TLV with the response */ - if (status == PFVF_STATUS_SUCCESS) { + if ((status == PFVF_STATUS_SUCCESS) && !b_legacy) { u16 qid = mbx->req_virt->start_txq.tx_qid; - p_tlv->offset = qed_db_addr(p_vf->vf_queues[qid].fw_cid, - DQ_DEMS_LEGACY); + p_tlv->offset = qed_db_addr_vf(p_vf->vf_queues[qid].fw_cid, + DQ_DEMS_LEGACY); } - qed_iov_send_response(p_hwfn, p_ptt, p_vf, sizeof(*p_tlv), status); + qed_iov_send_response(p_hwfn, p_ptt, p_vf, length, status); } static void qed_iov_vf_mbx_start_txq(struct qed_hwfn *p_hwfn, @@ -2045,7 +2115,7 @@ qed_iov_vp_update_vlan_param(struct qed_hwfn *p_hwfn, p_vf->shadow_config.inner_vlan_removal = p_vlan_tlv->remove_vlan; /* Ignore the VF request if we're forcing a vlan */ - if (!(p_vf->configured_features & (1 << VLAN_ADDR_FORCED))) { + if (!(p_vf->configured_features & BIT(VLAN_ADDR_FORCED))) { p_data->update_inner_vlan_removal_flg = 1; p_data->inner_vlan_removal_flg = p_vlan_tlv->remove_vlan; } @@ -2340,7 +2410,7 @@ static int qed_iov_vf_update_vlan_shadow(struct qed_hwfn *p_hwfn, /* In forced mode, we're willing to remove entries - but we don't add * new ones. */ - if (p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED)) + if (p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED)) return 0; if (p_params->opcode == QED_FILTER_ADD || @@ -2374,7 +2444,7 @@ static int qed_iov_vf_update_mac_shadow(struct qed_hwfn *p_hwfn, int i; /* If we're in forced-mode, we don't allow any change */ - if (p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED)) + if (p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED)) return 0; /* First remove entries and then add new ones */ @@ -2441,8 +2511,8 @@ qed_iov_vf_update_unicast_shadow(struct qed_hwfn *p_hwfn, return rc; } -int qed_iov_chk_ucast(struct qed_hwfn *hwfn, - int vfid, struct qed_filter_ucast *params) +static int qed_iov_chk_ucast(struct qed_hwfn *hwfn, + int vfid, struct qed_filter_ucast *params) { struct qed_public_vf_info *vf; @@ -2509,7 +2579,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, } /* Determine if the unicast filtering is acceptible by PF */ - if ((p_bulletin->valid_bitmap & (1 << VLAN_ADDR_FORCED)) && + if ((p_bulletin->valid_bitmap & BIT(VLAN_ADDR_FORCED)) && (params.type == QED_FILTER_VLAN || params.type == QED_FILTER_MAC_VLAN)) { /* Once VLAN is forced or PVID is set, do not allow @@ -2521,7 +2591,7 @@ static void qed_iov_vf_mbx_ucast_filter(struct qed_hwfn *p_hwfn, goto out; } - if ((p_bulletin->valid_bitmap & (1 << MAC_ADDR_FORCED)) && + if ((p_bulletin->valid_bitmap & BIT(MAC_ADDR_FORCED)) && (params.type == QED_FILTER_MAC || params.type == QED_FILTER_MAC_VLAN)) { if (!ether_addr_equal(p_bulletin->mac, params.mac) || @@ -2749,7 +2819,7 @@ cleanup: /* Mark VF for ack and clean pending state */ if (p_vf->state == VF_RESET) p_vf->state = VF_STOPPED; - ack_vfs[vfid / 32] |= (1 << (vfid % 32)); + ack_vfs[vfid / 32] |= BIT((vfid % 32)); p_hwfn->pf_iov_info->pending_flr[rel_vf_id / 64] &= ~(1ULL << (rel_vf_id % 64)); p_hwfn->pf_iov_info->pending_events[rel_vf_id / 64] &= @@ -2759,7 +2829,8 @@ cleanup: return rc; } -int qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) +static int +qed_iov_vf_flr_cleanup(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt) { u32 ack_vfs[VF_MAX_STATIC / 32]; int rc = 0; @@ -2805,7 +2876,7 @@ int qed_iov_mark_vf_flr(struct qed_hwfn *p_hwfn, u32 *p_disabled_vfs) continue; vfid = p_vf->abs_vf_id; - if ((1 << (vfid % 32)) & p_disabled_vfs[vfid / 32]) { + if (BIT((vfid % 32)) & p_disabled_vfs[vfid / 32]) { u64 *p_flr = p_hwfn->pf_iov_info->pending_flr; u16 rel_vf_id = p_vf->relative_vf_id; @@ -2946,7 +3017,7 @@ static void qed_iov_process_mbx_req(struct qed_hwfn *p_hwfn, } } -void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) +static void qed_iov_pf_add_pending_events(struct qed_hwfn *p_hwfn, u8 vfid) { u64 add_bit = 1ULL << (vfid % 64); @@ -3064,14 +3135,13 @@ static void qed_iov_bulletin_set_forced_mac(struct qed_hwfn *p_hwfn, vf_info->bulletin.p_virt->valid_bitmap |= feature; /* Forced MAC will disable MAC_ADDR */ - vf_info->bulletin.p_virt->valid_bitmap &= - ~(1 << VFPF_BULLETIN_MAC_ADDR); + vf_info->bulletin.p_virt->valid_bitmap &= ~BIT(VFPF_BULLETIN_MAC_ADDR); qed_iov_configure_vport_forced(p_hwfn, vf_info, feature); } -void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, - u16 pvid, int vfid) +static void qed_iov_bulletin_set_forced_vlan(struct qed_hwfn *p_hwfn, + u16 pvid, int vfid) { struct qed_vf_info *vf_info; u64 feature; @@ -3104,7 +3174,7 @@ static bool qed_iov_vf_has_vport_instance(struct qed_hwfn *p_hwfn, int vfid) return !!p_vf_info->vport_instance; } -bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) +static bool qed_iov_is_vf_stopped(struct qed_hwfn *p_hwfn, int vfid) { struct qed_vf_info *p_vf_info; @@ -3126,7 +3196,7 @@ static bool qed_iov_spoofchk_get(struct qed_hwfn *p_hwfn, int vfid) return vf_info->spoof_chk; } -int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) +static int qed_iov_spoofchk_set(struct qed_hwfn *p_hwfn, int vfid, bool val) { struct qed_vf_info *vf; int rc = -EINVAL; @@ -3163,13 +3233,14 @@ static u8 *qed_iov_bulletin_get_forced_mac(struct qed_hwfn *p_hwfn, if (!p_vf || !p_vf->bulletin.p_virt) return NULL; - if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << MAC_ADDR_FORCED))) + if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(MAC_ADDR_FORCED))) return NULL; return p_vf->bulletin.p_virt->mac; } -u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) +static u16 +qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) { struct qed_vf_info *p_vf; @@ -3177,7 +3248,7 @@ u16 qed_iov_bulletin_get_forced_vlan(struct qed_hwfn *p_hwfn, u16 rel_vf_id) if (!p_vf || !p_vf->bulletin.p_virt) return 0; - if (!(p_vf->bulletin.p_virt->valid_bitmap & (1 << VLAN_ADDR_FORCED))) + if (!(p_vf->bulletin.p_virt->valid_bitmap & BIT(VLAN_ADDR_FORCED))) return 0; return p_vf->bulletin.p_virt->pvid; @@ -3201,7 +3272,8 @@ static int qed_iov_configure_tx_rate(struct qed_hwfn *p_hwfn, return qed_init_vport_rl(p_hwfn, p_ptt, abs_vp_id, (u32)val); } -int qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) +static int +qed_iov_configure_min_tx_rate(struct qed_dev *cdev, int vfid, u32 rate) { struct qed_vf_info *vf; u8 vport_id; @@ -3760,7 +3832,8 @@ static void qed_handle_bulletin_post(struct qed_hwfn *hwfn) qed_ptt_release(hwfn, ptt); } -void qed_iov_pf_task(struct work_struct *work) +static void qed_iov_pf_task(struct work_struct *work) + { struct qed_hwfn *hwfn = container_of(work, struct qed_hwfn, iov_task.work); diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c index 9b780b31b15c..abf5bf11f865 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.c +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c @@ -46,6 +46,17 @@ static void *qed_vf_pf_prep(struct qed_hwfn *p_hwfn, u16 type, u16 length) return p_tlv; } +static void qed_vf_pf_req_end(struct qed_hwfn *p_hwfn, int req_status) +{ + union pfvf_tlvs *resp = p_hwfn->vf_iov_info->pf2vf_reply; + + DP_VERBOSE(p_hwfn, QED_MSG_IOV, + "VF request status = 0x%x, PF reply status = 0x%x\n", + req_status, resp->default_resp.hdr.status); + + mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); +} + static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) { union vfpf_tlvs *p_req = p_hwfn->vf_iov_info->vf2pf_request; @@ -103,16 +114,12 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size) "VF <-- PF Timeout [Type %d]\n", p_req->first_tlv.tl.type); rc = -EBUSY; - goto exit; } else { DP_VERBOSE(p_hwfn, QED_MSG_IOV, "PF response: %d [Type %d]\n", *done, p_req->first_tlv.tl.type); } -exit: - mutex_unlock(&(p_hwfn->vf_iov_info->mutex)); - return rc; } @@ -191,6 +198,9 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) DP_VERBOSE(p_hwfn, QED_MSG_IOV, "attempting to acquire resources\n"); + /* Clear response buffer, as this might be a re-send */ + memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs)); + /* send acquire request */ rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) @@ -205,9 +215,12 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) /* PF agrees to allocate our resources */ if (!(resp->pfdev_info.capabilities & PFVF_ACQUIRE_CAP_POST_FW_OVERRIDE)) { - DP_INFO(p_hwfn, - "PF is using old incompatible driver; Either downgrade driver or request provider to update hypervisor version\n"); - return -EINVAL; + /* It's possible legacy PF mistakenly accepted; + * but we don't care - simply mark it as + * legacy and continue. + */ + req->vfdev_info.capabilities |= + VFPF_ACQUIRE_CAP_PRE_FP_HSI; } DP_VERBOSE(p_hwfn, QED_MSG_IOV, "resources acquired\n"); resources_acquired = true; @@ -215,27 +228,55 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) attempts < VF_ACQUIRE_THRESH) { qed_vf_pf_acquire_reduce_resc(p_hwfn, p_resc, &resp->resc); + } else if (resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) { + if (pfdev_info->major_fp_hsi && + (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) { + DP_NOTICE(p_hwfn, + "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n", + pfdev_info->major_fp_hsi, + pfdev_info->minor_fp_hsi, + ETH_HSI_VER_MAJOR, + ETH_HSI_VER_MINOR, + pfdev_info->major_fp_hsi); + rc = -EINVAL; + goto exit; + } - /* Clear response buffer */ - memset(p_iov->pf2vf_reply, 0, sizeof(union pfvf_tlvs)); - } else if ((resp->hdr.status == PFVF_STATUS_NOT_SUPPORTED) && - pfdev_info->major_fp_hsi && - (pfdev_info->major_fp_hsi != ETH_HSI_VER_MAJOR)) { - DP_NOTICE(p_hwfn, - "PF uses an incompatible fastpath HSI %02x.%02x [VF requires %02x.%02x]. Please change to a VF driver using %02x.xx.\n", - pfdev_info->major_fp_hsi, - pfdev_info->minor_fp_hsi, - ETH_HSI_VER_MAJOR, - ETH_HSI_VER_MINOR, pfdev_info->major_fp_hsi); - return -EINVAL; + if (!pfdev_info->major_fp_hsi) { + if (req->vfdev_info.capabilities & + VFPF_ACQUIRE_CAP_PRE_FP_HSI) { + DP_NOTICE(p_hwfn, + "PF uses very old drivers. Please change to a VF driver using no later than 8.8.x.x.\n"); + rc = -EINVAL; + goto exit; + } else { + DP_INFO(p_hwfn, + "PF is old - try re-acquire to see if it supports FW-version override\n"); + req->vfdev_info.capabilities |= + VFPF_ACQUIRE_CAP_PRE_FP_HSI; + continue; + } + } + + /* If PF/VF are using same Major, PF must have had + * it's reasons. Simply fail. + */ + DP_NOTICE(p_hwfn, "PF rejected acquisition by VF\n"); + rc = -EINVAL; + goto exit; } else { DP_ERR(p_hwfn, "PF returned error %d to VF acquisition request\n", resp->hdr.status); - return -EAGAIN; + rc = -EAGAIN; + goto exit; } } + /* Mark the PF as legacy, if needed */ + if (req->vfdev_info.capabilities & VFPF_ACQUIRE_CAP_PRE_FP_HSI) + p_iov->b_pre_fp_hsi = true; + /* Update bulletin board size with response from PF */ p_iov->bulletin.size = resp->bulletin_size; @@ -253,14 +294,18 @@ static int qed_vf_pf_acquire(struct qed_hwfn *p_hwfn) } } - if (ETH_HSI_VER_MINOR && + if (!p_iov->b_pre_fp_hsi && + ETH_HSI_VER_MINOR && (resp->pfdev_info.minor_fp_hsi < ETH_HSI_VER_MINOR)) { DP_INFO(p_hwfn, "PF is using older fastpath HSI; %02x.%02x is configured\n", ETH_HSI_VER_MAJOR, resp->pfdev_info.minor_fp_hsi); } - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) @@ -286,31 +331,23 @@ int qed_vf_hw_prepare(struct qed_hwfn *p_hwfn) /* Allocate vf sriov info */ p_iov = kzalloc(sizeof(*p_iov), GFP_KERNEL); - if (!p_iov) { - DP_NOTICE(p_hwfn, "Failed to allocate `struct qed_sriov'\n"); + if (!p_iov) return -ENOMEM; - } /* Allocate vf2pf msg */ p_iov->vf2pf_request = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union vfpf_tlvs), &p_iov->vf2pf_request_phys, GFP_KERNEL); - if (!p_iov->vf2pf_request) { - DP_NOTICE(p_hwfn, - "Failed to allocate `vf2pf_request' DMA memory\n"); + if (!p_iov->vf2pf_request) goto free_p_iov; - } p_iov->pf2vf_reply = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(union pfvf_tlvs), &p_iov->pf2vf_reply_phys, GFP_KERNEL); - if (!p_iov->pf2vf_reply) { - DP_NOTICE(p_hwfn, - "Failed to allocate `pf2vf_reply' DMA memory\n"); + if (!p_iov->pf2vf_reply) goto free_vf2pf_request; - } DP_VERBOSE(p_hwfn, QED_MSG_IOV, @@ -347,6 +384,9 @@ free_p_iov: return -ENOMEM; } +#define TSTORM_QZONE_START PXP_VF_BAR0_START_SDM_ZONE_A +#define MSTORM_QZONE_START(dev) (TSTORM_QZONE_START + \ + (TSTORM_QZONE_SIZE * NUM_OF_L2_QUEUES(dev))) int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, u8 rx_qid, @@ -374,6 +414,21 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, req->bd_max_bytes = bd_max_bytes; req->stat_id = -1; + /* If PF is legacy, we'll need to calculate producers ourselves + * as well as clean them. + */ + if (pp_prod && p_iov->b_pre_fp_hsi) { + u8 hw_qid = p_iov->acquire_resp.resc.hw_qid[rx_qid]; + u32 init_prod_val = 0; + + *pp_prod = (u8 __iomem *)p_hwfn->regview + + MSTORM_QZONE_START(p_hwfn->cdev) + + hw_qid * MSTORM_QZONE_SIZE; + + /* Init the rcq, rx bd and rx sge (if valid) producers to 0 */ + __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), + (u32 *)(&init_prod_val)); + } /* add list termination tlv */ qed_add_tlv(p_hwfn, &p_iov->offset, CHANNEL_TLV_LIST_END, sizeof(struct channel_list_end_tlv)); @@ -381,13 +436,15 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->queue_start; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } /* Learn the address of the producer from the response */ - if (pp_prod) { + if (pp_prod && !p_iov->b_pre_fp_hsi) { u32 init_prod_val = 0; *pp_prod = (u8 __iomem *)p_hwfn->regview + resp->offset; @@ -399,6 +456,8 @@ int qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn, __internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32), (u32 *)&init_prod_val); } +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -424,10 +483,15 @@ int qed_vf_pf_rxq_stop(struct qed_hwfn *p_hwfn, u16 rx_qid, bool cqe_completion) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -470,13 +534,27 @@ int qed_vf_pf_txq_start(struct qed_hwfn *p_hwfn, } if (pp_doorbell) { - *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + resp->offset; + /* Modern PFs provide the actual offsets, while legacy + * provided only the queue id. + */ + if (!p_iov->b_pre_fp_hsi) { + *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + + resp->offset; + } else { + u8 cid = p_iov->acquire_resp.resc.cid[tx_queue_id]; + u32 db_addr; + + db_addr = qed_db_addr_vf(cid, DQ_DEMS_LEGACY); + *pp_doorbell = (u8 __iomem *)p_hwfn->doorbells + + db_addr; + } DP_VERBOSE(p_hwfn, QED_MSG_IOV, "Txq[0x%02x]: doorbell at %p [offset 0x%08x]\n", tx_queue_id, *pp_doorbell, resp->offset); } exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -501,10 +579,15 @@ int qed_vf_pf_txq_stop(struct qed_hwfn *p_hwfn, u16 tx_qid) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -543,10 +626,15 @@ int qed_vf_pf_vport_start(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -567,10 +655,15 @@ int qed_vf_pf_vport_stop(struct qed_hwfn *p_hwfn) rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } + +exit: + qed_vf_pf_req_end(p_hwfn, rc); return rc; } @@ -770,13 +863,18 @@ int qed_vf_pf_vport_update(struct qed_hwfn *p_hwfn, rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, resp_size); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } qed_vf_handle_vp_update_tlvs_resp(p_hwfn, p_params); +exit: + qed_vf_pf_req_end(p_hwfn, rc); + return rc; } @@ -797,14 +895,19 @@ int qed_vf_pf_reset(struct qed_hwfn *p_hwfn) resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EAGAIN; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EAGAIN; + goto exit; + } p_hwfn->b_int_enabled = 0; - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } int qed_vf_pf_release(struct qed_hwfn *p_hwfn) @@ -828,6 +931,8 @@ int qed_vf_pf_release(struct qed_hwfn *p_hwfn) if (!rc && resp->hdr.status != PFVF_STATUS_SUCCESS) rc = -EAGAIN; + qed_vf_pf_req_end(p_hwfn, rc); + p_hwfn->b_int_enabled = 0; if (p_iov->vf2pf_request) @@ -896,12 +1001,17 @@ int qed_vf_pf_filter_ucast(struct qed_hwfn *p_hwfn, resp = &p_iov->pf2vf_reply->default_resp; rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EAGAIN; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EAGAIN; + goto exit; + } - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn) @@ -920,12 +1030,17 @@ int qed_vf_pf_int_cleanup(struct qed_hwfn *p_hwfn) rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); if (rc) - return rc; + goto exit; - if (resp->hdr.status != PFVF_STATUS_SUCCESS) - return -EINVAL; + if (resp->hdr.status != PFVF_STATUS_SUCCESS) { + rc = -EINVAL; + goto exit; + } - return 0; +exit: + qed_vf_pf_req_end(p_hwfn, rc); + + return rc; } u16 qed_vf_get_igu_sb_id(struct qed_hwfn *p_hwfn, u16 sb_id) @@ -1071,8 +1186,8 @@ bool qed_vf_check_mac(struct qed_hwfn *p_hwfn, u8 *mac) return false; } -bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, - u8 *dst_mac, u8 *p_is_forced) +static bool qed_vf_bulletin_get_forced_mac(struct qed_hwfn *hwfn, + u8 *dst_mac, u8 *p_is_forced) { struct qed_bulletin_content *bulletin; diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.h b/drivers/net/ethernet/qlogic/qed/qed_vf.h index b23ce58e932f..35db7a28aa13 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_vf.h +++ b/drivers/net/ethernet/qlogic/qed/qed_vf.h @@ -86,7 +86,7 @@ struct vfpf_acquire_tlv { struct vfpf_first_tlv first_tlv; struct vf_pf_vfdev_info { -#define VFPF_ACQUIRE_CAP_OBSOLETE (1 << 0) +#define VFPF_ACQUIRE_CAP_PRE_FP_HSI (1 << 0) /* VF pre-FP hsi version */ #define VFPF_ACQUIRE_CAP_100G (1 << 1) /* VF can support 100g */ u64 capabilities; u8 fw_major; @@ -551,6 +551,11 @@ struct qed_vf_iov { /* we set aside a copy of the acquire response */ struct pfvf_acquire_resp_tlv acquire_resp; + + /* In case PF originates prior to the fp-hsi version comparison, + * this has to be propagated as it affects the fastpath. + */ + bool b_pre_fp_hsi; }; #ifdef CONFIG_QED_SRIOV diff --git a/drivers/net/ethernet/qlogic/qede/Makefile b/drivers/net/ethernet/qlogic/qede/Makefile index 74a49850d74d..28dc58919c85 100644 --- a/drivers/net/ethernet/qlogic/qede/Makefile +++ b/drivers/net/ethernet/qlogic/qede/Makefile @@ -2,3 +2,4 @@ obj-$(CONFIG_QEDE) := qede.o qede-y := qede_main.o qede_ethtool.o qede-$(CONFIG_DCB) += qede_dcbnl.o +qede-$(CONFIG_INFINIBAND_QEDR) += qede_roce.o diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h index 02b06d4e40ae..28c0e9f42c9e 100644 --- a/drivers/net/ethernet/qlogic/qede/qede.h +++ b/drivers/net/ethernet/qlogic/qede/qede.h @@ -25,7 +25,7 @@ #define QEDE_MAJOR_VERSION 8 #define QEDE_MINOR_VERSION 10 -#define QEDE_REVISION_VERSION 1 +#define QEDE_REVISION_VERSION 9 #define QEDE_ENGINEERING_VERSION 20 #define DRV_MODULE_VERSION __stringify(QEDE_MAJOR_VERSION) "." \ __stringify(QEDE_MINOR_VERSION) "." \ @@ -36,6 +36,8 @@ struct qede_stats { u64 no_buff_discards; + u64 packet_too_big_discard; + u64 ttl0_discard; u64 rx_ucast_bytes; u64 rx_mcast_bytes; u64 rx_bcast_bytes; @@ -104,6 +106,13 @@ struct qede_vlan { bool configured; }; +struct qede_rdma_dev { + struct qedr_dev *qedr_dev; + struct list_head entry; + struct list_head roce_event_list; + struct workqueue_struct *roce_wq; +}; + struct qede_dev { struct qed_dev *cdev; struct net_device *ndev; @@ -124,16 +133,22 @@ struct qede_dev { (edev)->dev_info.num_tc) struct qede_fastpath *fp_array; - u16 req_rss; - u16 num_rss; + u8 req_num_tx; + u8 fp_num_tx; + u8 req_num_rx; + u8 fp_num_rx; + u16 req_queues; + u16 num_queues; u8 num_tc; -#define QEDE_RSS_CNT(edev) ((edev)->num_rss) -#define QEDE_TSS_CNT(edev) ((edev)->num_rss * \ - (edev)->num_tc) -#define QEDE_TSS_IDX(edev, txqidx) ((txqidx) % (edev)->num_rss) -#define QEDE_TC_IDX(edev, txqidx) ((txqidx) / (edev)->num_rss) +#define QEDE_QUEUE_CNT(edev) ((edev)->num_queues) +#define QEDE_RSS_COUNT(edev) ((edev)->num_queues - (edev)->fp_num_tx) +#define QEDE_TSS_COUNT(edev) (((edev)->num_queues - (edev)->fp_num_rx) * \ + (edev)->num_tc) +#define QEDE_TX_IDX(edev, txqidx) ((edev)->fp_num_rx + (txqidx) % \ + QEDE_TSS_COUNT(edev)) +#define QEDE_TC_IDX(edev, txqidx) ((txqidx) / QEDE_TSS_COUNT(edev)) #define QEDE_TX_QUEUE(edev, txqidx) \ - (&(edev)->fp_array[QEDE_TSS_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX( \ + (&(edev)->fp_array[QEDE_TX_IDX((edev), (txqidx))].txqs[QEDE_TC_IDX(\ (edev), (txqidx))]) struct qed_int_info int_info; @@ -177,6 +192,8 @@ struct qede_dev { unsigned long sp_flags; u16 vxlan_dst_port; u16 geneve_dst_port; + + struct qede_rdma_dev rdma_info; }; enum QEDE_STATE { @@ -235,6 +252,7 @@ struct qede_rx_queue { u16 num_rx_buffers; u16 rxq_id; + u64 rcv_pkts; u64 rx_hw_errors; u64 rx_alloc_errors; u64 rx_ip_frags; @@ -263,6 +281,10 @@ struct qede_tx_queue { union db_prod tx_db; u16 num_tx_buffers; + u64 xmit_pkts; + u64 stopped_cnt; + + bool is_legacy; }; #define BD_UNMAP_ADDR(bd) HILO_U64(le32_to_cpu((bd)->addr.hi), \ @@ -277,7 +299,11 @@ struct qede_tx_queue { struct qede_fastpath { struct qede_dev *edev; - u8 rss_id; +#define QEDE_FASTPATH_TX BIT(0) +#define QEDE_FASTPATH_RX BIT(1) +#define QEDE_FASTPATH_COMBINED (QEDE_FASTPATH_TX | QEDE_FASTPATH_RX) + u8 type; + u8 id; struct napi_struct napi; struct qed_sb_info *sb_info; struct qede_rx_queue *rxq; @@ -337,6 +363,6 @@ void qede_recycle_rx_bd_ring(struct qede_rx_queue *rxq, struct qede_dev *edev, #define QEDE_MIN_PKT_LEN 64 #define QEDE_RX_HDR_SIZE 256 -#define for_each_rss(i) for (i = 0; i < edev->num_rss; i++) +#define for_each_queue(i) for (i = 0; i < edev->num_queues; i++) #endif /* _QEDE_H_ */ diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index f8492cac9290..25a9b293ee8f 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -35,6 +35,7 @@ static const struct { u64 offset; char string[ETH_GSTRING_LEN]; } qede_rqstats_arr[] = { + QEDE_RQSTAT(rcv_pkts), QEDE_RQSTAT(rx_hw_errors), QEDE_RQSTAT(rx_alloc_errors), QEDE_RQSTAT(rx_ip_frags), @@ -44,6 +45,24 @@ static const struct { #define QEDE_RQSTATS_DATA(dev, sindex, rqindex) \ (*((u64 *)(((char *)(dev->fp_array[(rqindex)].rxq)) +\ qede_rqstats_arr[(sindex)].offset))) +#define QEDE_TQSTAT_OFFSET(stat_name) \ + (offsetof(struct qede_tx_queue, stat_name)) +#define QEDE_TQSTAT_STRING(stat_name) (#stat_name) +#define QEDE_TQSTAT(stat_name) \ + {QEDE_TQSTAT_OFFSET(stat_name), QEDE_TQSTAT_STRING(stat_name)} +#define QEDE_NUM_TQSTATS ARRAY_SIZE(qede_tqstats_arr) +static const struct { + u64 offset; + char string[ETH_GSTRING_LEN]; +} qede_tqstats_arr[] = { + QEDE_TQSTAT(xmit_pkts), + QEDE_TQSTAT(stopped_cnt), +}; + +#define QEDE_TQSTATS_DATA(dev, sindex, tssid, tcid) \ + (*((u64 *)(((void *)(&dev->fp_array[tssid].txqs[tcid])) +\ + qede_tqstats_arr[(sindex)].offset))) + static const struct { u64 offset; char string[ETH_GSTRING_LEN]; @@ -107,6 +126,8 @@ static const struct { QEDE_PF_STAT(mftag_filter_discards), QEDE_PF_STAT(mac_filter_discards), QEDE_STAT(tx_err_drop_pkts), + QEDE_STAT(ttl0_discard), + QEDE_STAT(packet_too_big_discard), QEDE_STAT(coalesced_pkts), QEDE_STAT(coalesced_events), @@ -151,17 +172,29 @@ static void qede_get_strings_stats(struct qede_dev *edev, u8 *buf) { int i, j, k; + for (i = 0, k = 0; i < QEDE_QUEUE_CNT(edev); i++) { + int tc; + + for (j = 0; j < QEDE_NUM_RQSTATS; j++) + sprintf(buf + (k + j) * ETH_GSTRING_LEN, + "%d: %s", i, qede_rqstats_arr[j].string); + k += QEDE_NUM_RQSTATS; + for (tc = 0; tc < edev->num_tc; tc++) { + for (j = 0; j < QEDE_NUM_TQSTATS; j++) + sprintf(buf + (k + j) * ETH_GSTRING_LEN, + "%d.%d: %s", i, tc, + qede_tqstats_arr[j].string); + k += QEDE_NUM_TQSTATS; + } + } + for (i = 0, j = 0; i < QEDE_NUM_STATS; i++) { if (IS_VF(edev) && qede_stats_arr[i].pf_only) continue; - strcpy(buf + j * ETH_GSTRING_LEN, + strcpy(buf + (k + j) * ETH_GSTRING_LEN, qede_stats_arr[i].string); j++; } - - for (k = 0; k < QEDE_NUM_RQSTATS; k++, j++) - strcpy(buf + j * ETH_GSTRING_LEN, - qede_rqstats_arr[k].string); } static void qede_get_strings(struct net_device *dev, u32 stringset, u8 *buf) @@ -197,19 +230,30 @@ static void qede_get_ethtool_stats(struct net_device *dev, mutex_lock(&edev->qede_lock); + for (qid = 0; qid < QEDE_QUEUE_CNT(edev); qid++) { + int tc; + + if (edev->fp_array[qid].type & QEDE_FASTPATH_RX) { + for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) + buf[cnt++] = QEDE_RQSTATS_DATA(edev, sidx, qid); + } + + if (edev->fp_array[qid].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + for (sidx = 0; sidx < QEDE_NUM_TQSTATS; sidx++) + buf[cnt++] = QEDE_TQSTATS_DATA(edev, + sidx, + qid, tc); + } + } + } + for (sidx = 0; sidx < QEDE_NUM_STATS; sidx++) { if (IS_VF(edev) && qede_stats_arr[sidx].pf_only) continue; buf[cnt++] = QEDE_STATS_DATA(edev, sidx); } - for (sidx = 0; sidx < QEDE_NUM_RQSTATS; sidx++) { - buf[cnt] = 0; - for (qid = 0; qid < edev->num_rss; qid++) - buf[cnt] += QEDE_RQSTATS_DATA(edev, sidx, qid); - cnt++; - } - mutex_unlock(&edev->qede_lock); } @@ -227,7 +271,8 @@ static int qede_get_sset_count(struct net_device *dev, int stringset) if (qede_stats_arr[i].pf_only) num_stats--; } - return num_stats + QEDE_NUM_RQSTATS; + return num_stats + QEDE_RSS_COUNT(edev) * QEDE_NUM_RQSTATS + + QEDE_TSS_COUNT(edev) * QEDE_NUM_TQSTATS * edev->num_tc; case ETH_SS_PRIV_FLAGS: return QEDE_PRI_FLAG_LEN; case ETH_SS_TEST: @@ -249,78 +294,150 @@ static u32 qede_get_priv_flags(struct net_device *dev) return (!!(edev->dev_info.common.num_hwfns > 1)) << QEDE_PRI_FLAG_CMT; } -static int qede_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +struct qede_link_mode_mapping { + u32 qed_link_mode; + u32 ethtool_link_mode; +}; + +static const struct qede_link_mode_mapping qed_lm_map[] = { + {QED_LM_FIBRE_BIT, ETHTOOL_LINK_MODE_FIBRE_BIT}, + {QED_LM_Autoneg_BIT, ETHTOOL_LINK_MODE_Autoneg_BIT}, + {QED_LM_Asym_Pause_BIT, ETHTOOL_LINK_MODE_Asym_Pause_BIT}, + {QED_LM_Pause_BIT, ETHTOOL_LINK_MODE_Pause_BIT}, + {QED_LM_1000baseT_Half_BIT, ETHTOOL_LINK_MODE_1000baseT_Half_BIT}, + {QED_LM_1000baseT_Full_BIT, ETHTOOL_LINK_MODE_1000baseT_Full_BIT}, + {QED_LM_10000baseKR_Full_BIT, ETHTOOL_LINK_MODE_10000baseKR_Full_BIT}, + {QED_LM_25000baseKR_Full_BIT, ETHTOOL_LINK_MODE_25000baseKR_Full_BIT}, + {QED_LM_40000baseLR4_Full_BIT, ETHTOOL_LINK_MODE_40000baseLR4_Full_BIT}, + {QED_LM_50000baseKR2_Full_BIT, ETHTOOL_LINK_MODE_50000baseKR2_Full_BIT}, + {QED_LM_100000baseKR4_Full_BIT, + ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT}, +}; + +#define QEDE_DRV_TO_ETHTOOL_CAPS(caps, lk_ksettings, name) \ +{ \ + int i; \ + \ + for (i = 0; i < QED_LM_COUNT; i++) { \ + if ((caps) & (qed_lm_map[i].qed_link_mode)) \ + __set_bit(qed_lm_map[i].ethtool_link_mode,\ + lk_ksettings->link_modes.name); \ + } \ +} + +#define QEDE_ETHTOOL_TO_DRV_CAPS(caps, lk_ksettings, name) \ +{ \ + int i; \ + \ + for (i = 0; i < QED_LM_COUNT; i++) { \ + if (test_bit(qed_lm_map[i].ethtool_link_mode, \ + lk_ksettings->link_modes.name)) \ + caps |= qed_lm_map[i].qed_link_mode; \ + } \ +} + +static int qede_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { + struct ethtool_link_settings *base = &cmd->base; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; memset(¤t_link, 0, sizeof(current_link)); edev->ops->common->get_link(edev->cdev, ¤t_link); - cmd->supported = current_link.supported_caps; - cmd->advertising = current_link.advertised_caps; + ethtool_link_ksettings_zero_link_mode(cmd, supported); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.supported_caps, cmd, supported) + + ethtool_link_ksettings_zero_link_mode(cmd, advertising); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.advertised_caps, cmd, advertising) + + ethtool_link_ksettings_zero_link_mode(cmd, lp_advertising); + QEDE_DRV_TO_ETHTOOL_CAPS(current_link.lp_caps, cmd, lp_advertising) + if ((edev->state == QEDE_STATE_OPEN) && (current_link.link_up)) { - ethtool_cmd_speed_set(cmd, current_link.speed); - cmd->duplex = current_link.duplex; + base->speed = current_link.speed; + base->duplex = current_link.duplex; } else { - cmd->duplex = DUPLEX_UNKNOWN; - ethtool_cmd_speed_set(cmd, SPEED_UNKNOWN); + base->speed = SPEED_UNKNOWN; + base->duplex = DUPLEX_UNKNOWN; } - cmd->port = current_link.port; - cmd->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE : - AUTONEG_DISABLE; - cmd->lp_advertising = current_link.lp_caps; + base->port = current_link.port; + base->autoneg = (current_link.autoneg) ? AUTONEG_ENABLE : + AUTONEG_DISABLE; return 0; } -static int qede_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int qede_set_link_ksettings(struct net_device *dev, + const struct ethtool_link_ksettings *cmd) { + const struct ethtool_link_settings *base = &cmd->base; struct qede_dev *edev = netdev_priv(dev); struct qed_link_output current_link; struct qed_link_params params; - u32 speed; if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { - DP_INFO(edev, - "Link settings are not allowed to be changed\n"); + DP_INFO(edev, "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } - memset(¤t_link, 0, sizeof(current_link)); memset(¶ms, 0, sizeof(params)); edev->ops->common->get_link(edev->cdev, ¤t_link); - speed = ethtool_cmd_speed(cmd); params.override_flags |= QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS; params.override_flags |= QED_LINK_OVERRIDE_SPEED_AUTONEG; - if (cmd->autoneg == AUTONEG_ENABLE) { + if (base->autoneg == AUTONEG_ENABLE) { params.autoneg = true; params.forced_speed = 0; - params.adv_speeds = cmd->advertising; - } else { /* forced speed */ + QEDE_ETHTOOL_TO_DRV_CAPS(params.adv_speeds, cmd, advertising) + } else { /* forced speed */ params.override_flags |= QED_LINK_OVERRIDE_SPEED_FORCED_SPEED; params.autoneg = false; - params.forced_speed = speed; - switch (speed) { + params.forced_speed = base->speed; + switch (base->speed) { case SPEED_10000: if (!(current_link.supported_caps & - SUPPORTED_10000baseKR_Full)) { + QED_LM_10000baseKR_Full_BIT)) { DP_INFO(edev, "10G speed not supported\n"); return -EINVAL; } - params.adv_speeds = SUPPORTED_10000baseKR_Full; + params.adv_speeds = QED_LM_10000baseKR_Full_BIT; + break; + case SPEED_25000: + if (!(current_link.supported_caps & + QED_LM_25000baseKR_Full_BIT)) { + DP_INFO(edev, "25G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_25000baseKR_Full_BIT; break; case SPEED_40000: if (!(current_link.supported_caps & - SUPPORTED_40000baseLR4_Full)) { + QED_LM_40000baseLR4_Full_BIT)) { DP_INFO(edev, "40G speed not supported\n"); return -EINVAL; } - params.adv_speeds = SUPPORTED_40000baseLR4_Full; + params.adv_speeds = QED_LM_40000baseLR4_Full_BIT; + break; + case SPEED_50000: + if (!(current_link.supported_caps & + QED_LM_50000baseKR2_Full_BIT)) { + DP_INFO(edev, "50G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_50000baseKR2_Full_BIT; + break; + case SPEED_100000: + if (!(current_link.supported_caps & + QED_LM_100000baseKR4_Full_BIT)) { + DP_INFO(edev, "100G speed not supported\n"); + return -EINVAL; + } + params.adv_speeds = QED_LM_100000baseKR4_Full_BIT; break; default: - DP_INFO(edev, "Unsupported speed %u\n", speed); + DP_INFO(edev, "Unsupported speed %u\n", base->speed); return -EINVAL; } } @@ -368,8 +485,7 @@ static u32 qede_get_msglevel(struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); - return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | - edev->dp_module; + return ((u32)edev->dp_level << QED_LOG_LEVEL_SHIFT) | edev->dp_module; } static void qede_set_msglevel(struct net_device *ndev, u32 level) @@ -393,8 +509,7 @@ static int qede_nway_reset(struct net_device *dev) struct qed_link_params link_params; if (!edev->ops || !edev->ops->common->can_link_change(edev->cdev)) { - DP_INFO(edev, - "Link settings are not allowed to be changed\n"); + DP_INFO(edev, "Link settings are not allowed to be changed\n"); return -EOPNOTSUPP; } @@ -467,7 +582,7 @@ static int qede_set_coalesce(struct net_device *dev, rxc = (u16)coal->rx_coalesce_usecs; txc = (u16)coal->tx_coalesce_usecs; - for_each_rss(i) { + for_each_queue(i) { sb_id = edev->fp_array[i].sb_info->igu_sb_id; rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc, (u8)i, sb_id); @@ -563,7 +678,7 @@ static int qede_set_pauseparam(struct net_device *dev, memset(¶ms, 0, sizeof(params)); params.override_flags |= QED_LINK_OVERRIDE_PAUSE_CONFIG; if (epause->autoneg) { - if (!(current_link.supported_caps & SUPPORTED_Autoneg)) { + if (!(current_link.supported_caps & QED_LM_Autoneg_BIT)) { DP_INFO(edev, "autoneg not supported\n"); return -EINVAL; } @@ -580,6 +695,28 @@ static int qede_set_pauseparam(struct net_device *dev, return 0; } +static void qede_get_regs(struct net_device *ndev, + struct ethtool_regs *regs, void *buffer) +{ + struct qede_dev *edev = netdev_priv(ndev); + + regs->version = 0; + memset(buffer, 0, regs->len); + + if (edev->ops && edev->ops->common) + edev->ops->common->dbg_all_data(edev->cdev, buffer); +} + +static int qede_get_regs_len(struct net_device *ndev) +{ + struct qede_dev *edev = netdev_priv(ndev); + + if (edev->ops && edev->ops->common) + return edev->ops->common->dbg_all_data_size(edev->cdev); + else + return -EINVAL; +} + static void qede_update_mtu(struct qede_dev *edev, union qede_reload_args *args) { edev->ndev->mtu = args->mtu; @@ -619,45 +756,70 @@ static void qede_get_channels(struct net_device *dev, struct qede_dev *edev = netdev_priv(dev); channels->max_combined = QEDE_MAX_RSS_CNT(edev); - channels->combined_count = QEDE_RSS_CNT(edev); + channels->combined_count = QEDE_QUEUE_CNT(edev) - edev->fp_num_tx - + edev->fp_num_rx; + channels->tx_count = edev->fp_num_tx; + channels->rx_count = edev->fp_num_rx; } static int qede_set_channels(struct net_device *dev, struct ethtool_channels *channels) { struct qede_dev *edev = netdev_priv(dev); + u32 count; DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "set-channels command parameters: rx = %d, tx = %d, other = %d, combined = %d\n", channels->rx_count, channels->tx_count, channels->other_count, channels->combined_count); - /* We don't support separate rx / tx, nor `other' channels. */ - if (channels->rx_count || channels->tx_count || - channels->other_count || (channels->combined_count == 0) || - (channels->combined_count > QEDE_MAX_RSS_CNT(edev))) { + count = channels->rx_count + channels->tx_count + + channels->combined_count; + + /* We don't support `other' channels */ + if (channels->other_count) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "command parameters not supported\n"); return -EINVAL; } + if (!(channels->combined_count || (channels->rx_count && + channels->tx_count))) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "need to request at least one transmit and one receive channel\n"); + return -EINVAL; + } + + if (count > QEDE_MAX_RSS_CNT(edev)) { + DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), + "requested channels = %d max supported channels = %d\n", + count, QEDE_MAX_RSS_CNT(edev)); + return -EINVAL; + } + /* Check if there was a change in the active parameters */ - if (channels->combined_count == QEDE_RSS_CNT(edev)) { + if ((count == QEDE_QUEUE_CNT(edev)) && + (channels->tx_count == edev->fp_num_tx) && + (channels->rx_count == edev->fp_num_rx)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), "No change in active parameters\n"); return 0; } /* We need the number of queues to be divisible between the hwfns */ - if (channels->combined_count % edev->dev_info.common.num_hwfns) { + if ((count % edev->dev_info.common.num_hwfns) || + (channels->tx_count % edev->dev_info.common.num_hwfns) || + (channels->rx_count % edev->dev_info.common.num_hwfns)) { DP_VERBOSE(edev, (NETIF_MSG_IFUP | NETIF_MSG_IFDOWN), - "Number of channels must be divisable by %04x\n", + "Number of channels must be divisible by %04x\n", edev->dev_info.common.num_hwfns); return -EINVAL; } /* Set number of queues and reload if necessary */ - edev->req_rss = channels->combined_count; + edev->req_queues = count; + edev->req_num_tx = channels->tx_count; + edev->req_num_rx = channels->rx_count; if (netif_running(dev)) qede_reload(edev, NULL, NULL); @@ -727,7 +889,7 @@ static int qede_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *info, switch (info->cmd) { case ETHTOOL_GRXRINGS: - info->data = edev->num_rss; + info->data = QEDE_RSS_COUNT(edev); return 0; case ETHTOOL_GRXFH: return qede_get_rss_flags(edev, info); @@ -930,7 +1092,7 @@ static void qede_netif_start(struct qede_dev *edev) if (!netif_running(edev->ndev)) return; - for_each_rss(i) { + for_each_queue(i) { /* Update and reenable interrupts */ qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_ENABLE, 1); napi_enable(&edev->fp_array[i].napi); @@ -942,7 +1104,7 @@ static void qede_netif_stop(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { napi_disable(&edev->fp_array[i].napi); /* Disable interrupts */ qed_sb_ack(edev->fp_array[i].sb_info, IGU_INT_DISABLE, 0); @@ -952,11 +1114,23 @@ static void qede_netif_stop(struct qede_dev *edev) static int qede_selftest_transmit_traffic(struct qede_dev *edev, struct sk_buff *skb) { - struct qede_tx_queue *txq = &edev->fp_array[0].txqs[0]; + struct qede_tx_queue *txq = NULL; struct eth_tx_1st_bd *first_bd; dma_addr_t mapping; int i, idx, val; + for_each_queue(i) { + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + txq = edev->fp_array[i].txqs; + break; + } + } + + if (!txq) { + DP_NOTICE(edev, "Tx path is not available\n"); + return -1; + } + /* Fill the entry in the SW ring and the BDs in the FW ring */ idx = txq->sw_tx_prod & NUM_TX_BDS_MAX; txq->sw_tx_ring[idx].skb = skb; @@ -1020,14 +1194,26 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev, static int qede_selftest_receive_traffic(struct qede_dev *edev) { - struct qede_rx_queue *rxq = edev->fp_array[0].rxq; u16 hw_comp_cons, sw_comp_cons, sw_rx_index, len; struct eth_fast_path_rx_reg_cqe *fp_cqe; + struct qede_rx_queue *rxq = NULL; struct sw_rx_data *sw_rx_data; union eth_rx_cqe *cqe; u8 *data_ptr; int i; + for_each_queue(i) { + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + rxq = edev->fp_array[i].rxq; + break; + } + } + + if (!rxq) { + DP_NOTICE(edev, "Rx path is not available\n"); + return -1; + } + /* The packet is expected to receive on rx-queue 0 even though RSS is * enabled. This is because the queue 0 is configured as the default * queue and that the loopback traffic is not IP. @@ -1228,9 +1414,11 @@ static int qede_get_tunable(struct net_device *dev, } static const struct ethtool_ops qede_ethtool_ops = { - .get_settings = qede_get_settings, - .set_settings = qede_set_settings, + .get_link_ksettings = qede_get_link_ksettings, + .set_link_ksettings = qede_set_link_ksettings, .get_drvinfo = qede_get_drvinfo, + .get_regs_len = qede_get_regs_len, + .get_regs = qede_get_regs, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, .nway_reset = qede_nway_reset, @@ -1260,7 +1448,7 @@ static const struct ethtool_ops qede_ethtool_ops = { }; static const struct ethtool_ops qede_vf_ethtool_ops = { - .get_settings = qede_get_settings, + .get_link_ksettings = qede_get_link_ksettings, .get_drvinfo = qede_get_drvinfo, .get_msglevel = qede_get_msglevel, .set_msglevel = qede_set_msglevel, diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 9544e4c41359..343038ca047d 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -36,7 +36,7 @@ #include #include #include - +#include #include "qede.h" static char version[] = @@ -100,7 +100,8 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, static void qede_link_update(void *dev, struct qed_link_output *link); #ifdef CONFIG_QED_SRIOV -static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) +static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos, + __be16 vlan_proto) { struct qede_dev *edev = netdev_priv(ndev); @@ -109,6 +110,9 @@ static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos) return -EINVAL; } + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + DP_VERBOSE(edev, QED_MSG_IOV, "Setting Vlan 0x%04x to VF [%d]\n", vlan, vf); @@ -189,8 +193,7 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, struct ethtool_drvinfo drvinfo; struct qede_dev *edev; - /* Currently only support name change */ - if (event != NETDEV_CHANGENAME) + if (event != NETDEV_CHANGENAME && event != NETDEV_CHANGEADDR) goto done; /* Check whether this is a qede device */ @@ -203,11 +206,18 @@ static int qede_netdev_event(struct notifier_block *this, unsigned long event, goto done; edev = netdev_priv(ndev); - /* Notify qed of the name change */ - if (!edev->ops || !edev->ops->common) - goto done; - edev->ops->common->set_id(edev->cdev, edev->ndev->name, - "qede"); + switch (event) { + case NETDEV_CHANGENAME: + /* Notify qed of the name change */ + if (!edev->ops || !edev->ops->common) + goto done; + edev->ops->common->set_id(edev->cdev, edev->ndev->name, "qede"); + break; + case NETDEV_CHANGEADDR: + edev = netdev_priv(ndev); + qede_roce_event_changeaddr(edev); + break; + } done: return NOTIFY_DONE; @@ -222,7 +232,7 @@ int __init qede_init(void) { int ret; - pr_notice("qede_init: %s\n", version); + pr_info("qede_init: %s\n", version); qed_ops = qed_get_eth_ops(); if (!qed_ops) { @@ -253,7 +263,8 @@ int __init qede_init(void) static void __exit qede_cleanup(void) { - pr_notice("qede_cleanup called\n"); + if (debug & QED_LOG_INFO_MASK) + pr_info("qede_cleanup called\n"); unregister_netdevice_notifier(&qede_netdev_notifier); pci_unregister_driver(&qede_pci_driver); @@ -270,8 +281,7 @@ module_exit(qede_cleanup); /* Unmap the data and free skb */ static int qede_free_tx_pkt(struct qede_dev *edev, - struct qede_tx_queue *txq, - int *len) + struct qede_tx_queue *txq, int *len) { u16 idx = txq->sw_tx_cons & NUM_TX_BDS_MAX; struct sk_buff *skb = txq->sw_tx_ring[idx].skb; @@ -329,8 +339,7 @@ static int qede_free_tx_pkt(struct qede_dev *edev, static void qede_free_failed_tx_pkt(struct qede_dev *edev, struct qede_tx_queue *txq, struct eth_tx_1st_bd *first_bd, - int nbd, - bool data_split) + int nbd, bool data_split) { u16 idx = txq->sw_tx_prod & NUM_TX_BDS_MAX; struct sk_buff *skb = txq->sw_tx_ring[idx].skb; @@ -339,8 +348,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, /* Return prod to its position before this skb was handled */ qed_chain_set_prod(&txq->tx_pbl, - le16_to_cpu(txq->tx_db.data.bd_prod), - first_bd); + le16_to_cpu(txq->tx_db.data.bd_prod), first_bd); first_bd = (struct eth_tx_1st_bd *)qed_chain_produce(&txq->tx_pbl); @@ -366,8 +374,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, /* Return again prod to its position before this skb was handled */ qed_chain_set_prod(&txq->tx_pbl, - le16_to_cpu(txq->tx_db.data.bd_prod), - first_bd); + le16_to_cpu(txq->tx_db.data.bd_prod), first_bd); /* Free skb */ dev_kfree_skb_any(skb); @@ -376,8 +383,7 @@ static void qede_free_failed_tx_pkt(struct qede_dev *edev, } static u32 qede_xmit_type(struct qede_dev *edev, - struct sk_buff *skb, - int *ipv6_ext) + struct sk_buff *skb, int *ipv6_ext) { u32 rc = XMIT_L4_CSUM; __be16 l3_proto; @@ -434,15 +440,13 @@ static void qede_set_params_for_ipv6_ext(struct sk_buff *skb, } static int map_frag_to_bd(struct qede_dev *edev, - skb_frag_t *frag, - struct eth_tx_bd *bd) + skb_frag_t *frag, struct eth_tx_bd *bd) { dma_addr_t mapping; /* Map skb non-linear frag data for DMA */ mapping = skb_frag_dma_map(&edev->pdev->dev, frag, 0, - skb_frag_size(frag), - DMA_TO_DEVICE); + skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(&edev->pdev->dev, mapping))) { DP_NOTICE(edev, "Unable to map frag - dropping packet\n"); return -ENOMEM; @@ -504,9 +508,8 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq) } /* Main transmit function */ -static -netdev_tx_t qede_start_xmit(struct sk_buff *skb, - struct net_device *ndev) +static netdev_tx_t qede_start_xmit(struct sk_buff *skb, + struct net_device *ndev) { struct qede_dev *edev = netdev_priv(ndev); struct netdev_queue *netdev_txq; @@ -526,12 +529,11 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, /* Get tx-queue context and netdev index */ txq_index = skb_get_queue_mapping(skb); - WARN_ON(txq_index >= QEDE_TSS_CNT(edev)); + WARN_ON(txq_index >= QEDE_TSS_COUNT(edev)); txq = QEDE_TX_QUEUE(edev, txq_index); netdev_txq = netdev_get_tx_queue(ndev, txq_index); - WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < - (MAX_SKB_FRAGS + 1)); + WARN_ON(qed_chain_get_elem_left(&txq->tx_pbl) < (MAX_SKB_FRAGS + 1)); xmit_type = qede_xmit_type(edev, skb, &ipv6_ext); @@ -606,6 +608,14 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, 1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; } + /* Legacy FW had flipped behavior in regard to this bit - + * I.e., needed to set to prevent FW from touching encapsulated + * packets when it didn't need to. + */ + if (unlikely(txq->is_legacy)) + first_bd->data.bitfields ^= + 1 << ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT; + /* If the packet is IPv6 with extension header, indicate that * to FW and pass few params, since the device cracker doesn't * support parsing IPv6 with extension header/s. @@ -731,6 +741,7 @@ netdev_tx_t qede_start_xmit(struct sk_buff *skb, qede_update_tx_producer(txq); netif_tx_stop_queue(netdev_txq); + txq->stopped_cnt++; DP_VERBOSE(edev, NETIF_MSG_TX_QUEUED, "Stop queue was called\n"); /* paired memory barrier is in qede_tx_int(), we have to keep @@ -764,8 +775,7 @@ int qede_txq_has_work(struct qede_tx_queue *txq) return hw_bd_cons != qed_chain_get_cons_idx(&txq->tx_pbl); } -static int qede_tx_int(struct qede_dev *edev, - struct qede_tx_queue *txq) +static int qede_tx_int(struct qede_dev *edev, struct qede_tx_queue *txq) { struct netdev_queue *netdev_txq; u16 hw_bd_cons; @@ -791,6 +801,7 @@ static int qede_tx_int(struct qede_dev *edev, bytes_compl += len; pkts_compl++; txq->sw_tx_cons++; + txq->xmit_pkts++; } netdev_tx_completed_queue(netdev_txq, pkts_compl, bytes_compl); @@ -963,8 +974,7 @@ static inline void qede_update_rx_prod(struct qede_dev *edev, static u32 qede_get_rxhash(struct qede_dev *edev, u8 bitfields, - __le32 rss_hash, - enum pkt_hash_types *rxhash_type) + __le32 rss_hash, enum pkt_hash_types *rxhash_type) { enum rss_hash_type htype; @@ -993,12 +1003,10 @@ static void qede_set_skb_csum(struct sk_buff *skb, u8 csum_flag) static inline void qede_skb_receive(struct qede_dev *edev, struct qede_fastpath *fp, - struct sk_buff *skb, - u16 vlan_tag) + struct sk_buff *skb, u16 vlan_tag) { if (vlan_tag) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), - vlan_tag); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); napi_gro_receive(&fp->napi, skb); } @@ -1021,8 +1029,7 @@ static void qede_set_gro_params(struct qede_dev *edev, static int qede_fill_frag_skb(struct qede_dev *edev, struct qede_rx_queue *rxq, - u8 tpa_agg_index, - u16 len_on_bd) + u8 tpa_agg_index, u16 len_on_bd) { struct sw_rx_data *current_bd = &rxq->sw_rx_ring[rxq->sw_rx_cons & NUM_RX_BDS_MAX]; @@ -1209,7 +1216,7 @@ static void qede_gro_receive(struct qede_dev *edev, #endif send_skb: - skb_record_rx_queue(skb, fp->rss_id); + skb_record_rx_queue(skb, fp->rxq->rxq_id); qede_skb_receive(edev, fp, skb, vlan_tag); } @@ -1413,7 +1420,7 @@ static int qede_rx_int(struct qede_fastpath *fp, int budget) if (unlikely(cqe_type == ETH_RX_CQE_TYPE_SLOW_PATH)) { edev->ops->eth_cqe_completion( - edev->cdev, fp->rss_id, + edev->cdev, fp->id, (struct eth_slow_path_rx_cqe *)cqe); goto next_cqe; } @@ -1470,7 +1477,7 @@ alloc_skb: skb = netdev_alloc_skb(edev->ndev, QEDE_RX_HDR_SIZE); if (unlikely(!skb)) { DP_NOTICE(edev, - "Build_skb failed, dropping incoming packet\n"); + "skb allocation failed, dropping incoming packet\n"); qede_recycle_rx_bd_ring(rxq, edev, fp_cqe->bd_num); rxq->rx_alloc_errors++; goto next_cqe; @@ -1578,14 +1585,13 @@ alloc_skb: skb->protocol = eth_type_trans(skb, edev->ndev); rx_hash = qede_get_rxhash(edev, fp_cqe->bitfields, - fp_cqe->rss_hash, - &rxhash_type); + fp_cqe->rss_hash, &rxhash_type); skb_set_hash(skb, rx_hash, rxhash_type); qede_set_skb_csum(skb, csum_flag); - skb_record_rx_queue(skb, fp->rss_id); + skb_record_rx_queue(skb, fp->rxq->rxq_id); qede_skb_receive(edev, fp, skb, le16_to_cpu(fp_cqe->vlan_tag)); next_rx_only: @@ -1604,6 +1610,8 @@ next_cqe: /* don't consume bd rx buffer */ /* Update producers */ qede_update_rx_prod(edev, rxq); + rxq->rcv_pkts += rx_pkt; + return rx_pkt; } @@ -1616,10 +1624,12 @@ static int qede_poll(struct napi_struct *napi, int budget) u8 tc; for (tc = 0; tc < edev->num_tc; tc++) - if (qede_txq_has_work(&fp->txqs[tc])) + if (likely(fp->type & QEDE_FASTPATH_TX) && + qede_txq_has_work(&fp->txqs[tc])) qede_tx_int(edev, &fp->txqs[tc]); - rx_work_done = qede_has_rx_work(fp->rxq) ? + rx_work_done = (likely(fp->type & QEDE_FASTPATH_RX) && + qede_has_rx_work(fp->rxq)) ? qede_rx_int(fp, budget) : 0; if (rx_work_done < budget) { qed_sb_update_sb_idx(fp->sb_info); @@ -1639,8 +1649,10 @@ static int qede_poll(struct napi_struct *napi, int budget) rmb(); /* Fall out from the NAPI loop if needed */ - if (!(qede_has_rx_work(fp->rxq) || - qede_has_tx_work(fp))) { + if (!((likely(fp->type & QEDE_FASTPATH_RX) && + qede_has_rx_work(fp->rxq)) || + (likely(fp->type & QEDE_FASTPATH_TX) && + qede_has_tx_work(fp)))) { napi_complete(napi); /* Update and reenable interrupts */ @@ -1711,6 +1723,8 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->ops->get_vport_stats(edev->cdev, &stats); edev->stats.no_buff_discards = stats.no_buff_discards; + edev->stats.packet_too_big_discard = stats.packet_too_big_discard; + edev->stats.ttl0_discard = stats.ttl0_discard; edev->stats.rx_ucast_bytes = stats.rx_ucast_bytes; edev->stats.rx_mcast_bytes = stats.rx_mcast_bytes; edev->stats.rx_bcast_bytes = stats.rx_bcast_bytes; @@ -1790,9 +1804,9 @@ void qede_fill_by_demand_stats(struct qede_dev *edev) edev->stats.tx_mac_ctrl_frames = stats.tx_mac_ctrl_frames; } -static struct rtnl_link_stats64 *qede_get_stats64( - struct net_device *dev, - struct rtnl_link_stats64 *stats) +static +struct rtnl_link_stats64 *qede_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) { struct qede_dev *edev = netdev_priv(dev); @@ -2106,14 +2120,13 @@ static void qede_vlan_mark_nonconfigured(struct qede_dev *edev) } DP_VERBOSE(edev, NETIF_MSG_IFDOWN, - "marked vlan %d as non-configured\n", - vlan->vid); + "marked vlan %d as non-configured\n", vlan->vid); } edev->accept_any_vlan = false; } -int qede_set_features(struct net_device *dev, netdev_features_t features) +static int qede_set_features(struct net_device *dev, netdev_features_t features) { struct qede_dev *edev = netdev_priv(dev); netdev_features_t changes = features ^ dev->features; @@ -2149,7 +2162,7 @@ static void qede_udp_tunnel_add(struct net_device *dev, edev->vxlan_dst_port = t_port; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added vxlan port=%d\n", t_port); set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); @@ -2160,7 +2173,7 @@ static void qede_udp_tunnel_add(struct net_device *dev, edev->geneve_dst_port = t_port; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Added geneve port=%d\n", t_port); set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); break; @@ -2184,7 +2197,7 @@ static void qede_udp_tunnel_del(struct net_device *dev, edev->vxlan_dst_port = 0; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted vxlan port=%d\n", t_port); set_bit(QEDE_SP_VXLAN_PORT_CONFIG, &edev->sp_flags); @@ -2195,7 +2208,7 @@ static void qede_udp_tunnel_del(struct net_device *dev, edev->geneve_dst_port = 0; - DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d", + DP_VERBOSE(edev, QED_MSG_DEBUG, "Deleted geneve port=%d\n", t_port); set_bit(QEDE_SP_GENEVE_PORT_CONFIG, &edev->sp_flags); break; @@ -2240,15 +2253,13 @@ static const struct net_device_ops qede_netdev_ops = { static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, struct pci_dev *pdev, struct qed_dev_eth_info *info, - u32 dp_module, - u8 dp_level) + u32 dp_module, u8 dp_level) { struct net_device *ndev; struct qede_dev *edev; ndev = alloc_etherdev_mqs(sizeof(*edev), - info->num_queues, - info->num_queues); + info->num_queues, info->num_queues); if (!ndev) { pr_err("etherdev allocation failed\n"); return NULL; @@ -2264,6 +2275,9 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev, edev->q_num_rx_buffers = NUM_RX_BDS_DEF; edev->q_num_tx_buffers = NUM_TX_BDS_DEF; + DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n", + info->num_queues, info->num_queues); + SET_NETDEV_DEV(ndev, &pdev->dev); memset(&edev->stats, 0, sizeof(edev->stats)); @@ -2352,7 +2366,7 @@ static void qede_free_fp_array(struct qede_dev *edev) struct qede_fastpath *fp; int i; - for_each_rss(i) { + for_each_queue(i) { fp = &edev->fp_array[i]; kfree(fp->sb_info); @@ -2361,22 +2375,33 @@ static void qede_free_fp_array(struct qede_dev *edev) } kfree(edev->fp_array); } - edev->num_rss = 0; + + edev->num_queues = 0; + edev->fp_num_tx = 0; + edev->fp_num_rx = 0; } static int qede_alloc_fp_array(struct qede_dev *edev) { + u8 fp_combined, fp_rx = edev->fp_num_rx; struct qede_fastpath *fp; int i; - edev->fp_array = kcalloc(QEDE_RSS_CNT(edev), + edev->fp_array = kcalloc(QEDE_QUEUE_CNT(edev), sizeof(*edev->fp_array), GFP_KERNEL); if (!edev->fp_array) { DP_NOTICE(edev, "fp array allocation failed\n"); goto err; } - for_each_rss(i) { + fp_combined = QEDE_QUEUE_CNT(edev) - fp_rx - edev->fp_num_tx; + + /* Allocate the FP elements for Rx queues followed by combined and then + * the Tx. This ordering should be maintained so that the respective + * queues (Rx or Tx) will be together in the fastpath array and the + * associated ids will be sequential. + */ + for_each_queue(i) { fp = &edev->fp_array[i]; fp->sb_info = kcalloc(1, sizeof(*fp->sb_info), GFP_KERNEL); @@ -2385,16 +2410,33 @@ static int qede_alloc_fp_array(struct qede_dev *edev) goto err; } - fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL); - if (!fp->rxq) { - DP_NOTICE(edev, "RXQ struct allocation failed\n"); - goto err; + if (fp_rx) { + fp->type = QEDE_FASTPATH_RX; + fp_rx--; + } else if (fp_combined) { + fp->type = QEDE_FASTPATH_COMBINED; + fp_combined--; + } else { + fp->type = QEDE_FASTPATH_TX; } - fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), GFP_KERNEL); - if (!fp->txqs) { - DP_NOTICE(edev, "TXQ array allocation failed\n"); - goto err; + if (fp->type & QEDE_FASTPATH_TX) { + fp->txqs = kcalloc(edev->num_tc, sizeof(*fp->txqs), + GFP_KERNEL); + if (!fp->txqs) { + DP_NOTICE(edev, + "TXQ array allocation failed\n"); + goto err; + } + } + + if (fp->type & QEDE_FASTPATH_RX) { + fp->rxq = kcalloc(1, sizeof(*fp->rxq), GFP_KERNEL); + if (!fp->rxq) { + DP_NOTICE(edev, + "RXQ struct allocation failed\n"); + goto err; + } } } @@ -2456,7 +2498,7 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, bool is_vf, enum qede_probe_mode mode) { struct qed_probe_params probe_params; - struct qed_slowpath_params params; + struct qed_slowpath_params sp_params; struct qed_dev_eth_info dev_info; struct qede_dev *edev; struct qed_dev *cdev; @@ -2479,14 +2521,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_update_pf_params(cdev); /* Start the Slowpath-process */ - memset(¶ms, 0, sizeof(struct qed_slowpath_params)); - params.int_mode = QED_INT_MODE_MSIX; - params.drv_major = QEDE_MAJOR_VERSION; - params.drv_minor = QEDE_MINOR_VERSION; - params.drv_rev = QEDE_REVISION_VERSION; - params.drv_eng = QEDE_ENGINEERING_VERSION; - strlcpy(params.name, "qede LAN", QED_DRV_VER_STR_SIZE); - rc = qed_ops->common->slowpath_start(cdev, ¶ms); + memset(&sp_params, 0, sizeof(sp_params)); + sp_params.int_mode = QED_INT_MODE_MSIX; + sp_params.drv_major = QEDE_MAJOR_VERSION; + sp_params.drv_minor = QEDE_MINOR_VERSION; + sp_params.drv_rev = QEDE_REVISION_VERSION; + sp_params.drv_eng = QEDE_ENGINEERING_VERSION; + strlcpy(sp_params.name, "qede LAN", QED_DRV_VER_STR_SIZE); + rc = qed_ops->common->slowpath_start(cdev, &sp_params); if (rc) { pr_notice("Cannot start slowpath\n"); goto err1; @@ -2509,10 +2551,14 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, qede_init_ndev(edev); + rc = qede_roce_dev_add(edev); + if (rc) + goto err3; + rc = register_netdev(edev->ndev); if (rc) { DP_NOTICE(edev, "Cannot register net-device\n"); - goto err3; + goto err4; } edev->ops->common->set_id(cdev, edev->ndev->name, DRV_MODULE_VERSION); @@ -2532,6 +2578,8 @@ static int __qede_probe(struct pci_dev *pdev, u32 dp_module, u8 dp_level, return 0; +err4: + qede_roce_dev_remove(edev); err3: free_netdev(edev->ndev); err2: @@ -2578,8 +2626,11 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); cancel_delayed_work_sync(&edev->sp_task); + unregister_netdev(ndev); + qede_roce_dev_remove(edev); + edev->ops->common->set_power_state(cdev, PCI_D0); pci_set_drvdata(pdev, NULL); @@ -2590,7 +2641,7 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) qed_ops->common->slowpath_stop(cdev); qed_ops->common->remove(cdev); - pr_notice("Ending successfully qede_remove\n"); + dev_info(&pdev->dev, "Ending qede_remove successfully\n"); } static void qede_remove(struct pci_dev *pdev) @@ -2609,8 +2660,8 @@ static int qede_set_num_queues(struct qede_dev *edev) u16 rss_num; /* Setup queues according to possible resources*/ - if (edev->req_rss) - rss_num = edev->req_rss; + if (edev->req_queues) + rss_num = edev->req_queues; else rss_num = netif_get_num_default_rss_queues() * edev->dev_info.common.num_hwfns; @@ -2620,11 +2671,15 @@ static int qede_set_num_queues(struct qede_dev *edev) rc = edev->ops->common->set_fp_int(edev->cdev, rss_num); if (rc > 0) { /* Managed to request interrupts for our queues */ - edev->num_rss = rc; + edev->num_queues = rc; DP_INFO(edev, "Managed %d [of %d] RSS queues\n", - QEDE_RSS_CNT(edev), rss_num); + QEDE_QUEUE_CNT(edev), rss_num); rc = 0; } + + edev->fp_num_tx = edev->req_num_tx; + edev->fp_num_rx = edev->req_num_rx; + return rc; } @@ -2638,16 +2693,14 @@ static void qede_free_mem_sb(struct qede_dev *edev, /* This function allocates fast-path status block memory */ static int qede_alloc_mem_sb(struct qede_dev *edev, - struct qed_sb_info *sb_info, - u16 sb_id) + struct qed_sb_info *sb_info, u16 sb_id) { struct status_block *sb_virt; dma_addr_t sb_phys; int rc; sb_virt = dma_alloc_coherent(&edev->pdev->dev, - sizeof(*sb_virt), - &sb_phys, GFP_KERNEL); + sizeof(*sb_virt), &sb_phys, GFP_KERNEL); if (!sb_virt) { DP_ERR(edev, "Status block allocation failed\n"); return -ENOMEM; @@ -2679,16 +2732,15 @@ static void qede_free_rx_buffers(struct qede_dev *edev, data = rx_buf->data; dma_unmap_page(&edev->pdev->dev, - rx_buf->mapping, - PAGE_SIZE, DMA_FROM_DEVICE); + rx_buf->mapping, PAGE_SIZE, DMA_FROM_DEVICE); rx_buf->data = NULL; __free_page(data); } } -static void qede_free_sge_mem(struct qede_dev *edev, - struct qede_rx_queue *rxq) { +static void qede_free_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) +{ int i; if (edev->gro_disable) @@ -2707,8 +2759,7 @@ static void qede_free_sge_mem(struct qede_dev *edev, } } -static void qede_free_mem_rxq(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static void qede_free_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { qede_free_sge_mem(edev, rxq); @@ -2730,9 +2781,6 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, struct eth_rx_bd *rx_bd; dma_addr_t mapping; struct page *data; - u16 rx_buf_size; - - rx_buf_size = rxq->rx_buf_size; data = alloc_pages(GFP_ATOMIC, 0); if (unlikely(!data)) { @@ -2767,8 +2815,7 @@ static int qede_alloc_rx_buffer(struct qede_dev *edev, return 0; } -static int qede_alloc_sge_mem(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static int qede_alloc_sge_mem(struct qede_dev *edev, struct qede_rx_queue *rxq) { dma_addr_t mapping; int i; @@ -2815,15 +2862,14 @@ err: } /* This function allocates all memory needed per Rx queue */ -static int qede_alloc_mem_rxq(struct qede_dev *edev, - struct qede_rx_queue *rxq) +static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq) { int i, rc, size; rxq->num_rx_buffers = edev->q_num_rx_buffers; - rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + - edev->ndev->mtu; + rxq->rx_buf_size = NET_IP_ALIGN + ETH_OVERHEAD + edev->ndev->mtu; + if (rxq->rx_buf_size > PAGE_SIZE) rxq->rx_buf_size = PAGE_SIZE; @@ -2877,8 +2923,7 @@ err: return rc; } -static void qede_free_mem_txq(struct qede_dev *edev, - struct qede_tx_queue *txq) +static void qede_free_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) { /* Free the parallel SW ring */ kfree(txq->sw_tx_ring); @@ -2888,8 +2933,7 @@ static void qede_free_mem_txq(struct qede_dev *edev, } /* This function allocates all memory needed per Tx queue */ -static int qede_alloc_mem_txq(struct qede_dev *edev, - struct qede_tx_queue *txq) +static int qede_alloc_mem_txq(struct qede_dev *edev, struct qede_tx_queue *txq) { int size, rc; union eth_tx_bd_types *p_virt; @@ -2921,41 +2965,45 @@ err: } /* This function frees all memory of a single fp */ -static void qede_free_mem_fp(struct qede_dev *edev, - struct qede_fastpath *fp) +static void qede_free_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) { int tc; qede_free_mem_sb(edev, fp->sb_info); - qede_free_mem_rxq(edev, fp->rxq); + if (fp->type & QEDE_FASTPATH_RX) + qede_free_mem_rxq(edev, fp->rxq); - for (tc = 0; tc < edev->num_tc; tc++) - qede_free_mem_txq(edev, &fp->txqs[tc]); + if (fp->type & QEDE_FASTPATH_TX) + for (tc = 0; tc < edev->num_tc; tc++) + qede_free_mem_txq(edev, &fp->txqs[tc]); } /* This function allocates all memory needed for a single fp (i.e. an entity - * which contains status block, one rx queue and multiple per-TC tx queues. + * which contains status block, one rx queue and/or multiple per-TC tx queues. */ -static int qede_alloc_mem_fp(struct qede_dev *edev, - struct qede_fastpath *fp) +static int qede_alloc_mem_fp(struct qede_dev *edev, struct qede_fastpath *fp) { int rc, tc; - rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->rss_id); + rc = qede_alloc_mem_sb(edev, fp->sb_info, fp->id); if (rc) goto err; - rc = qede_alloc_mem_rxq(edev, fp->rxq); - if (rc) - goto err; - - for (tc = 0; tc < edev->num_tc; tc++) { - rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]); + if (fp->type & QEDE_FASTPATH_RX) { + rc = qede_alloc_mem_rxq(edev, fp->rxq); if (rc) goto err; } + if (fp->type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + rc = qede_alloc_mem_txq(edev, &fp->txqs[tc]); + if (rc) + goto err; + } + } + return 0; err: return rc; @@ -2965,7 +3013,7 @@ static void qede_free_mem_load(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; qede_free_mem_fp(edev, fp); @@ -2975,16 +3023,16 @@ static void qede_free_mem_load(struct qede_dev *edev) /* This function allocates all qede memory at NIC load. */ static int qede_alloc_mem_load(struct qede_dev *edev) { - int rc = 0, rss_id; + int rc = 0, queue_id; - for (rss_id = 0; rss_id < QEDE_RSS_CNT(edev); rss_id++) { - struct qede_fastpath *fp = &edev->fp_array[rss_id]; + for (queue_id = 0; queue_id < QEDE_QUEUE_CNT(edev); queue_id++) { + struct qede_fastpath *fp = &edev->fp_array[queue_id]; rc = qede_alloc_mem_fp(edev, fp); if (rc) { DP_ERR(edev, "Failed to allocate memory for fastpath - rss id = %d\n", - rss_id); + queue_id); qede_free_mem_load(edev); return rc; } @@ -2996,30 +3044,38 @@ static int qede_alloc_mem_load(struct qede_dev *edev) /* This function inits fp content and resets the SB, RXQ and TXQ structures */ static void qede_init_fp(struct qede_dev *edev) { - int rss_id, txq_index, tc; + int queue_id, rxq_index = 0, txq_index = 0, tc; struct qede_fastpath *fp; - for_each_rss(rss_id) { - fp = &edev->fp_array[rss_id]; + for_each_queue(queue_id) { + fp = &edev->fp_array[queue_id]; fp->edev = edev; - fp->rss_id = rss_id; + fp->id = queue_id; memset((void *)&fp->napi, 0, sizeof(fp->napi)); memset((void *)fp->sb_info, 0, sizeof(*fp->sb_info)); - memset((void *)fp->rxq, 0, sizeof(*fp->rxq)); - fp->rxq->rxq_id = rss_id; + if (fp->type & QEDE_FASTPATH_RX) { + memset((void *)fp->rxq, 0, sizeof(*fp->rxq)); + fp->rxq->rxq_id = rxq_index++; + } - memset((void *)fp->txqs, 0, (edev->num_tc * sizeof(*fp->txqs))); - for (tc = 0; tc < edev->num_tc; tc++) { - txq_index = tc * QEDE_RSS_CNT(edev) + rss_id; - fp->txqs[tc].index = txq_index; + if (fp->type & QEDE_FASTPATH_TX) { + memset((void *)fp->txqs, 0, + (edev->num_tc * sizeof(*fp->txqs))); + for (tc = 0; tc < edev->num_tc; tc++) { + fp->txqs[tc].index = txq_index + + tc * QEDE_TSS_COUNT(edev); + if (edev->dev_info.is_legacy) + fp->txqs[tc].is_legacy = true; + } + txq_index++; } snprintf(fp->name, sizeof(fp->name), "%s-fp-%d", - edev->ndev->name, rss_id); + edev->ndev->name, queue_id); } edev->gro_disable = !(edev->ndev->features & NETIF_F_GRO); @@ -3029,12 +3085,13 @@ static int qede_set_real_num_queues(struct qede_dev *edev) { int rc = 0; - rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_CNT(edev)); + rc = netif_set_real_num_tx_queues(edev->ndev, QEDE_TSS_COUNT(edev)); if (rc) { DP_NOTICE(edev, "Failed to set real number of Tx queues\n"); return rc; } - rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_CNT(edev)); + + rc = netif_set_real_num_rx_queues(edev->ndev, QEDE_RSS_COUNT(edev)); if (rc) { DP_NOTICE(edev, "Failed to set real number of Rx queues\n"); return rc; @@ -3047,7 +3104,7 @@ static void qede_napi_disable_remove(struct qede_dev *edev) { int i; - for_each_rss(i) { + for_each_queue(i) { napi_disable(&edev->fp_array[i].napi); netif_napi_del(&edev->fp_array[i].napi); @@ -3059,7 +3116,7 @@ static void qede_napi_add_enable(struct qede_dev *edev) int i; /* Add NAPI objects */ - for_each_rss(i) { + for_each_queue(i) { netif_napi_add(edev->ndev, &edev->fp_array[i].napi, qede_poll, NAPI_POLL_WEIGHT); napi_enable(&edev->fp_array[i].napi); @@ -3088,14 +3145,14 @@ static int qede_req_msix_irqs(struct qede_dev *edev) int i, rc; /* Sanitize number of interrupts == number of prepared RSS queues */ - if (QEDE_RSS_CNT(edev) > edev->int_info.msix_cnt) { + if (QEDE_QUEUE_CNT(edev) > edev->int_info.msix_cnt) { DP_ERR(edev, "Interrupt mismatch: %d RSS queues > %d MSI-x vectors\n", - QEDE_RSS_CNT(edev), edev->int_info.msix_cnt); + QEDE_QUEUE_CNT(edev), edev->int_info.msix_cnt); return -EINVAL; } - for (i = 0; i < QEDE_RSS_CNT(edev); i++) { + for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) { rc = request_irq(edev->int_info.msix[i].vector, qede_msix_fp_int, 0, edev->fp_array[i].name, &edev->fp_array[i]); @@ -3140,18 +3197,17 @@ static int qede_setup_irqs(struct qede_dev *edev) /* qed should learn receive the RSS ids and callbacks */ ops = edev->ops->common; - for (i = 0; i < QEDE_RSS_CNT(edev); i++) + for (i = 0; i < QEDE_QUEUE_CNT(edev); i++) ops->simd_handler_config(edev->cdev, &edev->fp_array[i], i, qede_simd_fp_handler); - edev->int_info.used_cnt = QEDE_RSS_CNT(edev); + edev->int_info.used_cnt = QEDE_QUEUE_CNT(edev); } return 0; } static int qede_drain_txq(struct qede_dev *edev, - struct qede_tx_queue *txq, - bool allow_drain) + struct qede_tx_queue *txq, bool allow_drain) { int rc, cnt = 1000; @@ -3203,45 +3259,53 @@ static int qede_stop_queues(struct qede_dev *edev) } /* Flush Tx queues. If needed, request drain from MCP */ - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; - for (tc = 0; tc < edev->num_tc; tc++) { - struct qede_tx_queue *txq = &fp->txqs[tc]; + if (fp->type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + struct qede_tx_queue *txq = &fp->txqs[tc]; - rc = qede_drain_txq(edev, txq, true); - if (rc) - return rc; + rc = qede_drain_txq(edev, txq, true); + if (rc) + return rc; + } } } - /* Stop all Queues in reverse order*/ - for (i = QEDE_RSS_CNT(edev) - 1; i >= 0; i--) { + /* Stop all Queues in reverse order */ + for (i = QEDE_QUEUE_CNT(edev) - 1; i >= 0; i--) { struct qed_stop_rxq_params rx_params; - /* Stop the Tx Queue(s)*/ - for (tc = 0; tc < edev->num_tc; tc++) { - struct qed_stop_txq_params tx_params; + /* Stop the Tx Queue(s) */ + if (edev->fp_array[i].type & QEDE_FASTPATH_TX) { + for (tc = 0; tc < edev->num_tc; tc++) { + struct qed_stop_txq_params tx_params; + u8 val; - tx_params.rss_id = i; - tx_params.tx_queue_id = tc * QEDE_RSS_CNT(edev) + i; - rc = edev->ops->q_tx_stop(cdev, &tx_params); - if (rc) { - DP_ERR(edev, "Failed to stop TXQ #%d\n", - tx_params.tx_queue_id); - return rc; + tx_params.rss_id = i; + val = edev->fp_array[i].txqs[tc].index; + tx_params.tx_queue_id = val; + rc = edev->ops->q_tx_stop(cdev, &tx_params); + if (rc) { + DP_ERR(edev, "Failed to stop TXQ #%d\n", + tx_params.tx_queue_id); + return rc; + } } } - /* Stop the Rx Queue*/ - memset(&rx_params, 0, sizeof(rx_params)); - rx_params.rss_id = i; - rx_params.rx_queue_id = i; + /* Stop the Rx Queue */ + if (edev->fp_array[i].type & QEDE_FASTPATH_RX) { + memset(&rx_params, 0, sizeof(rx_params)); + rx_params.rss_id = i; + rx_params.rx_queue_id = edev->fp_array[i].rxq->rxq_id; - rc = edev->ops->q_rx_stop(cdev, &rx_params); - if (rc) { - DP_ERR(edev, "Failed to stop RXQ #%d\n", i); - return rc; + rc = edev->ops->q_rx_stop(cdev, &rx_params); + if (rc) { + DP_ERR(edev, "Failed to stop RXQ #%d\n", i); + return rc; + } } } @@ -3264,7 +3328,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) struct qed_start_vport_params start = {0}; bool reset_rss_indir = false; - if (!edev->num_rss) { + if (!edev->num_queues) { DP_ERR(edev, "Cannot update V-VPORT as active as there are no Rx queues\n"); return -EINVAL; @@ -3288,50 +3352,66 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) "Start vport ramrod passed, vport_id = %d, MTU = %d, vlan_removal_en = %d\n", start.vport_id, edev->ndev->mtu + 0xe, vlan_removal_en); - for_each_rss(i) { + for_each_queue(i) { struct qede_fastpath *fp = &edev->fp_array[i]; - dma_addr_t phys_table = fp->rxq->rx_comp_ring.pbl.p_phys_table; + dma_addr_t p_phys_table; + u32 page_cnt; - memset(&q_params, 0, sizeof(q_params)); - q_params.rss_id = i; - q_params.queue_id = i; - q_params.vport_id = 0; - q_params.sb = fp->sb_info->igu_sb_id; - q_params.sb_idx = RX_PI; - - rc = edev->ops->q_rx_start(cdev, &q_params, - fp->rxq->rx_buf_size, - fp->rxq->rx_bd_ring.p_phys_addr, - phys_table, - fp->rxq->rx_comp_ring.page_cnt, - &fp->rxq->hw_rxq_prod_addr); - if (rc) { - DP_ERR(edev, "Start RXQ #%d failed %d\n", i, rc); - return rc; - } - - fp->rxq->hw_cons_ptr = &fp->sb_info->sb_virt->pi_array[RX_PI]; - - qede_update_rx_prod(edev, fp->rxq); - - for (tc = 0; tc < edev->num_tc; tc++) { - struct qede_tx_queue *txq = &fp->txqs[tc]; - int txq_index = tc * QEDE_RSS_CNT(edev) + i; + if (fp->type & QEDE_FASTPATH_RX) { + struct qede_rx_queue *rxq = fp->rxq; + __le16 *val; memset(&q_params, 0, sizeof(q_params)); q_params.rss_id = i; - q_params.queue_id = txq_index; + q_params.queue_id = rxq->rxq_id; + q_params.vport_id = 0; + q_params.sb = fp->sb_info->igu_sb_id; + q_params.sb_idx = RX_PI; + + p_phys_table = + qed_chain_get_pbl_phys(&rxq->rx_comp_ring); + page_cnt = qed_chain_get_page_cnt(&rxq->rx_comp_ring); + + rc = edev->ops->q_rx_start(cdev, &q_params, + rxq->rx_buf_size, + rxq->rx_bd_ring.p_phys_addr, + p_phys_table, + page_cnt, + &rxq->hw_rxq_prod_addr); + if (rc) { + DP_ERR(edev, "Start RXQ #%d failed %d\n", i, + rc); + return rc; + } + + val = &fp->sb_info->sb_virt->pi_array[RX_PI]; + rxq->hw_cons_ptr = val; + + qede_update_rx_prod(edev, rxq); + } + + if (!(fp->type & QEDE_FASTPATH_TX)) + continue; + + for (tc = 0; tc < edev->num_tc; tc++) { + struct qede_tx_queue *txq = &fp->txqs[tc]; + + p_phys_table = qed_chain_get_pbl_phys(&txq->tx_pbl); + page_cnt = qed_chain_get_page_cnt(&txq->tx_pbl); + + memset(&q_params, 0, sizeof(q_params)); + q_params.rss_id = i; + q_params.queue_id = txq->index; q_params.vport_id = 0; q_params.sb = fp->sb_info->igu_sb_id; q_params.sb_idx = TX_PI(tc); rc = edev->ops->q_tx_start(cdev, &q_params, - txq->tx_pbl.pbl.p_phys_table, - txq->tx_pbl.page_cnt, + p_phys_table, page_cnt, &txq->doorbell_addr); if (rc) { DP_ERR(edev, "Start TXQ #%d failed %d\n", - txq_index, rc); + txq->index, rc); return rc; } @@ -3362,13 +3442,13 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) } /* Fill struct with RSS params */ - if (QEDE_RSS_CNT(edev) > 1) { + if (QEDE_RSS_COUNT(edev) > 1) { vport_update_params.update_rss_flg = 1; /* Need to validate current RSS config uses valid entries */ for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) { if (edev->rss_params.rss_ind_table[i] >= - edev->num_rss) { + QEDE_RSS_COUNT(edev)) { reset_rss_indir = true; break; } @@ -3381,7 +3461,7 @@ static int qede_start_queues(struct qede_dev *edev, bool clear_stats) for (i = 0; i < QED_RSS_IND_TABLE_SIZE; i++) { u16 indir_val; - val = QEDE_RSS_CNT(edev); + val = QEDE_RSS_COUNT(edev); indir_val = ethtool_rxfh_indir_default(i, val); edev->rss_params.rss_ind_table[i] = indir_val; } @@ -3447,6 +3527,7 @@ static void qede_unload(struct qede_dev *edev, enum qede_unload_mode mode) DP_INFO(edev, "Starting qede unload\n"); + qede_roce_dev_event_close(edev); mutex_lock(&edev->qede_lock); edev->state = QEDE_STATE_CLOSED; @@ -3510,7 +3591,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) if (rc) goto err1; DP_INFO(edev, "Allocated %d RSS queues on %d TC/s\n", - QEDE_RSS_CNT(edev), edev->num_tc); + QEDE_QUEUE_CNT(edev), edev->num_tc); rc = qede_set_real_num_queues(edev); if (rc) @@ -3547,6 +3628,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) /* Query whether link is already-up */ memset(&link_output, 0, sizeof(link_output)); edev->ops->common->get_link(edev->cdev, &link_output); + qede_roce_dev_event_open(edev); qede_link_update(edev, &link_output); DP_INFO(edev, "Ending successfully qede load\n"); @@ -3563,7 +3645,9 @@ err2: err1: edev->ops->common->set_fp_int(edev->cdev, 0); qede_free_fp_array(edev); - edev->num_rss = 0; + edev->num_queues = 0; + edev->fp_num_tx = 0; + edev->fp_num_rx = 0; err0: return rc; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_roce.c b/drivers/net/ethernet/qlogic/qede/qede_roce.c new file mode 100644 index 000000000000..9867f960b063 --- /dev/null +++ b/drivers/net/ethernet/qlogic/qede/qede_roce.c @@ -0,0 +1,314 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include +#include +#include +#include +#include +#include "qede.h" + +static struct qedr_driver *qedr_drv; +static LIST_HEAD(qedr_dev_list); +static DEFINE_MUTEX(qedr_dev_list_lock); + +bool qede_roce_supported(struct qede_dev *dev) +{ + return dev->dev_info.common.rdma_supported; +} + +static void _qede_roce_dev_add(struct qede_dev *edev) +{ + if (!qedr_drv) + return; + + edev->rdma_info.qedr_dev = qedr_drv->add(edev->cdev, edev->pdev, + edev->ndev); +} + +static int qede_roce_create_wq(struct qede_dev *edev) +{ + INIT_LIST_HEAD(&edev->rdma_info.roce_event_list); + edev->rdma_info.roce_wq = create_singlethread_workqueue("roce_wq"); + if (!edev->rdma_info.roce_wq) { + DP_NOTICE(edev, "qedr: Could not create workqueue\n"); + return -ENOMEM; + } + + return 0; +} + +static void qede_roce_cleanup_event(struct qede_dev *edev) +{ + struct list_head *head = &edev->rdma_info.roce_event_list; + struct qede_roce_event_work *event_node; + + flush_workqueue(edev->rdma_info.roce_wq); + while (!list_empty(head)) { + event_node = list_entry(head->next, struct qede_roce_event_work, + list); + cancel_work_sync(&event_node->work); + list_del(&event_node->list); + kfree(event_node); + } +} + +static void qede_roce_destroy_wq(struct qede_dev *edev) +{ + qede_roce_cleanup_event(edev); + destroy_workqueue(edev->rdma_info.roce_wq); +} + +int qede_roce_dev_add(struct qede_dev *edev) +{ + int rc = 0; + + if (qede_roce_supported(edev)) { + rc = qede_roce_create_wq(edev); + if (rc) + return rc; + + INIT_LIST_HEAD(&edev->rdma_info.entry); + mutex_lock(&qedr_dev_list_lock); + list_add_tail(&edev->rdma_info.entry, &qedr_dev_list); + _qede_roce_dev_add(edev); + mutex_unlock(&qedr_dev_list_lock); + } + + return rc; +} + +static void _qede_roce_dev_remove(struct qede_dev *edev) +{ + if (qedr_drv && qedr_drv->remove && edev->rdma_info.qedr_dev) + qedr_drv->remove(edev->rdma_info.qedr_dev); + edev->rdma_info.qedr_dev = NULL; +} + +void qede_roce_dev_remove(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + qede_roce_destroy_wq(edev); + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_remove(edev); + list_del(&edev->rdma_info.entry); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_open(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_UP); +} + +static void qede_roce_dev_open(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_open(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void _qede_roce_dev_close(struct qede_dev *edev) +{ + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_DOWN); +} + +static void qede_roce_dev_close(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + _qede_roce_dev_close(edev); + mutex_unlock(&qedr_dev_list_lock); +} + +static void qede_roce_dev_shutdown(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CLOSE); + mutex_unlock(&qedr_dev_list_lock); +} + +int qede_roce_register_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + u8 qedr_counter = 0; + + mutex_lock(&qedr_dev_list_lock); + if (qedr_drv) { + mutex_unlock(&qedr_dev_list_lock); + return -EINVAL; + } + qedr_drv = drv; + + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + struct net_device *ndev; + + qedr_counter++; + _qede_roce_dev_add(edev); + ndev = edev->ndev; + if (netif_running(ndev) && netif_oper_up(ndev)) + _qede_roce_dev_open(edev); + } + mutex_unlock(&qedr_dev_list_lock); + + DP_INFO(edev, "qedr: discovered and registered %d RoCE funcs\n", + qedr_counter); + + return 0; +} +EXPORT_SYMBOL(qede_roce_register_driver); + +void qede_roce_unregister_driver(struct qedr_driver *drv) +{ + struct qede_dev *edev; + + mutex_lock(&qedr_dev_list_lock); + list_for_each_entry(edev, &qedr_dev_list, rdma_info.entry) { + if (edev->rdma_info.qedr_dev) + _qede_roce_dev_remove(edev); + } + qedr_drv = NULL; + mutex_unlock(&qedr_dev_list_lock); +} +EXPORT_SYMBOL(qede_roce_unregister_driver); + +static void qede_roce_changeaddr(struct qede_dev *edev) +{ + if (!qede_roce_supported(edev)) + return; + + if (qedr_drv && edev->rdma_info.qedr_dev && qedr_drv->notify) + qedr_drv->notify(edev->rdma_info.qedr_dev, QEDE_CHANGE_ADDR); +} + +struct qede_roce_event_work *qede_roce_get_free_event_node(struct qede_dev + *edev) +{ + struct qede_roce_event_work *event_node = NULL; + struct list_head *list_node = NULL; + bool found = false; + + list_for_each(list_node, &edev->rdma_info.roce_event_list) { + event_node = list_entry(list_node, struct qede_roce_event_work, + list); + if (!work_pending(&event_node->work)) { + found = true; + break; + } + } + + if (!found) { + event_node = kzalloc(sizeof(*event_node), GFP_KERNEL); + if (!event_node) { + DP_NOTICE(edev, + "qedr: Could not allocate memory for roce work\n"); + return NULL; + } + list_add_tail(&event_node->list, + &edev->rdma_info.roce_event_list); + } + + return event_node; +} + +static void qede_roce_handle_event(struct work_struct *work) +{ + struct qede_roce_event_work *event_node; + enum qede_roce_event event; + struct qede_dev *edev; + + event_node = container_of(work, struct qede_roce_event_work, work); + event = event_node->event; + edev = event_node->ptr; + + switch (event) { + case QEDE_UP: + qede_roce_dev_open(edev); + break; + case QEDE_DOWN: + qede_roce_dev_close(edev); + break; + case QEDE_CLOSE: + qede_roce_dev_shutdown(edev); + break; + case QEDE_CHANGE_ADDR: + qede_roce_changeaddr(edev); + break; + default: + DP_NOTICE(edev, "Invalid roce event %d", event); + } +} + +static void qede_roce_add_event(struct qede_dev *edev, + enum qede_roce_event event) +{ + struct qede_roce_event_work *event_node; + + if (!edev->rdma_info.qedr_dev) + return; + + event_node = qede_roce_get_free_event_node(edev); + if (!event_node) + return; + + event_node->event = event; + event_node->ptr = edev; + + INIT_WORK(&event_node->work, qede_roce_handle_event); + queue_work(edev->rdma_info.roce_wq, &event_node->work); +} + +void qede_roce_dev_event_open(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_UP); +} + +void qede_roce_dev_event_close(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_DOWN); +} + +void qede_roce_event_changeaddr(struct qede_dev *edev) +{ + qede_roce_add_event(edev, QEDE_CHANGE_ADDR); +} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c index 3ebef27e0964..3ae3968b0edf 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c @@ -432,18 +432,19 @@ static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], static int qlcnic_fdb_dump(struct sk_buff *skb, struct netlink_callback *ncb, struct net_device *netdev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct qlcnic_adapter *adapter = netdev_priv(netdev); + int err = 0; if (!adapter->fdb_mac_learn) return ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) || qlcnic_sriov_check(adapter)) - idx = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); + err = ndo_dflt_fdb_dump(skb, ncb, netdev, filter_dev, idx); - return idx; + return err; } static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h index 24061b9b92e8..5f327659efa7 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov.h @@ -238,7 +238,7 @@ int qlcnic_sriov_set_vf_mac(struct net_device *, int, u8 *); int qlcnic_sriov_set_vf_tx_rate(struct net_device *, int, int, int); int qlcnic_sriov_get_vf_config(struct net_device *, int , struct ifla_vf_info *); -int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8); +int qlcnic_sriov_set_vf_vlan(struct net_device *, int, u16, u8, __be16); int qlcnic_sriov_set_vf_spoofchk(struct net_device *, int, bool); #else static inline void qlcnic_sriov_pf_disable(struct qlcnic_adapter *adapter) {} diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c index afd687e5e779..50eaafa3eaba 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_pf.c @@ -1915,7 +1915,7 @@ int qlcnic_sriov_set_vf_tx_rate(struct net_device *netdev, int vf, } int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, - u16 vlan, u8 qos) + u16 vlan, u8 qos, __be16 vlan_proto) { struct qlcnic_adapter *adapter = netdev_priv(netdev); struct qlcnic_sriov *sriov = adapter->ahw->sriov; @@ -1928,6 +1928,9 @@ int qlcnic_sriov_set_vf_vlan(struct net_device *netdev, int vf, if (vf >= sriov->num_vfs || qos > 7) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + if (vlan > MAX_VLAN_ID) { netdev_err(netdev, "Invalid VLAN ID, allowed range is [0 - %d]\n", diff --git a/drivers/net/ethernet/qualcomm/Kconfig b/drivers/net/ethernet/qualcomm/Kconfig index a76e380cf89a..9ba568db576f 100644 --- a/drivers/net/ethernet/qualcomm/Kconfig +++ b/drivers/net/ethernet/qualcomm/Kconfig @@ -24,4 +24,16 @@ config QCA7000 To compile this driver as a module, choose M here. The module will be called qcaspi. +config QCOM_EMAC + tristate "Qualcomm Technologies, Inc. EMAC Gigabit Ethernet support" + select CRC32 + select PHYLIB + ---help--- + This driver supports the Qualcomm Technologies, Inc. Gigabit + Ethernet Media Access Controller (EMAC). The controller + supports IEEE 802.3-2002, half-duplex mode at 10/100 Mb/s, + full-duplex mode at 10/100/1000Mb/s, Wake On LAN (WOL) for + low power, Receive-Side Scaling (RSS), and IEEE 1588-2008 + Precision Clock Synchronization Protocol. + endif # NET_VENDOR_QUALCOMM diff --git a/drivers/net/ethernet/qualcomm/Makefile b/drivers/net/ethernet/qualcomm/Makefile index 9da2d75db700..aacb0a585c68 100644 --- a/drivers/net/ethernet/qualcomm/Makefile +++ b/drivers/net/ethernet/qualcomm/Makefile @@ -4,3 +4,5 @@ obj-$(CONFIG_QCA7000) += qcaspi.o qcaspi-objs := qca_spi.o qca_framing.o qca_7k.o qca_debug.o + +obj-y += emac/ diff --git a/drivers/net/ethernet/qualcomm/emac/Makefile b/drivers/net/ethernet/qualcomm/emac/Makefile new file mode 100644 index 000000000000..01ee144c6386 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Qualcomm Technologies, Inc. EMAC Gigabit Ethernet driver +# + +obj-$(CONFIG_QCOM_EMAC) += qcom-emac.o + +qcom-emac-objs := emac.o emac-mac.o emac-phy.o emac-sgmii.o diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c new file mode 100644 index 000000000000..e97968ed4b8f --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c @@ -0,0 +1,1528 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Ethernet Controller MAC layer support + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MAC_CTRL 0x001480 +#define EMAC_WOL_CTRL0 0x0014a0 +#define EMAC_RSS_KEY0 0x0014b0 +#define EMAC_H1TPD_BASE_ADDR_LO 0x0014e0 +#define EMAC_H2TPD_BASE_ADDR_LO 0x0014e4 +#define EMAC_H3TPD_BASE_ADDR_LO 0x0014e8 +#define EMAC_INTER_SRAM_PART9 0x001534 +#define EMAC_DESC_CTRL_0 0x001540 +#define EMAC_DESC_CTRL_1 0x001544 +#define EMAC_DESC_CTRL_2 0x001550 +#define EMAC_DESC_CTRL_10 0x001554 +#define EMAC_DESC_CTRL_12 0x001558 +#define EMAC_DESC_CTRL_13 0x00155c +#define EMAC_DESC_CTRL_3 0x001560 +#define EMAC_DESC_CTRL_4 0x001564 +#define EMAC_DESC_CTRL_5 0x001568 +#define EMAC_DESC_CTRL_14 0x00156c +#define EMAC_DESC_CTRL_15 0x001570 +#define EMAC_DESC_CTRL_16 0x001574 +#define EMAC_DESC_CTRL_6 0x001578 +#define EMAC_DESC_CTRL_8 0x001580 +#define EMAC_DESC_CTRL_9 0x001584 +#define EMAC_DESC_CTRL_11 0x001588 +#define EMAC_TXQ_CTRL_0 0x001590 +#define EMAC_TXQ_CTRL_1 0x001594 +#define EMAC_TXQ_CTRL_2 0x001598 +#define EMAC_RXQ_CTRL_0 0x0015a0 +#define EMAC_RXQ_CTRL_1 0x0015a4 +#define EMAC_RXQ_CTRL_2 0x0015a8 +#define EMAC_RXQ_CTRL_3 0x0015ac +#define EMAC_BASE_CPU_NUMBER 0x0015b8 +#define EMAC_DMA_CTRL 0x0015c0 +#define EMAC_MAILBOX_0 0x0015e0 +#define EMAC_MAILBOX_5 0x0015e4 +#define EMAC_MAILBOX_6 0x0015e8 +#define EMAC_MAILBOX_13 0x0015ec +#define EMAC_MAILBOX_2 0x0015f4 +#define EMAC_MAILBOX_3 0x0015f8 +#define EMAC_MAILBOX_11 0x00160c +#define EMAC_AXI_MAST_CTRL 0x001610 +#define EMAC_MAILBOX_12 0x001614 +#define EMAC_MAILBOX_9 0x001618 +#define EMAC_MAILBOX_10 0x00161c +#define EMAC_ATHR_HEADER_CTRL 0x001620 +#define EMAC_CLK_GATE_CTRL 0x001814 +#define EMAC_MISC_CTRL 0x001990 +#define EMAC_MAILBOX_7 0x0019e0 +#define EMAC_MAILBOX_8 0x0019e4 +#define EMAC_MAILBOX_15 0x001bd4 +#define EMAC_MAILBOX_16 0x001bd8 + +/* EMAC_MAC_CTRL */ +#define SINGLE_PAUSE_MODE 0x10000000 +#define DEBUG_MODE 0x08000000 +#define BROAD_EN 0x04000000 +#define MULTI_ALL 0x02000000 +#define RX_CHKSUM_EN 0x01000000 +#define HUGE 0x00800000 +#define SPEED(x) (((x) & 0x3) << 20) +#define SPEED_MASK SPEED(0x3) +#define SIMR 0x00080000 +#define TPAUSE 0x00010000 +#define PROM_MODE 0x00008000 +#define VLAN_STRIP 0x00004000 +#define PRLEN_BMSK 0x00003c00 +#define PRLEN_SHFT 10 +#define HUGEN 0x00000200 +#define FLCHK 0x00000100 +#define PCRCE 0x00000080 +#define CRCE 0x00000040 +#define FULLD 0x00000020 +#define MAC_LP_EN 0x00000010 +#define RXFC 0x00000008 +#define TXFC 0x00000004 +#define RXEN 0x00000002 +#define TXEN 0x00000001 + + +/* EMAC_WOL_CTRL0 */ +#define LK_CHG_PME 0x20 +#define LK_CHG_EN 0x10 +#define MG_FRAME_PME 0x8 +#define MG_FRAME_EN 0x4 +#define WK_FRAME_EN 0x1 + +/* EMAC_DESC_CTRL_3 */ +#define RFD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_4 */ +#define RX_BUFFER_SIZE_BMSK 0xffff + +/* EMAC_DESC_CTRL_6 */ +#define RRD_RING_SIZE_BMSK 0xfff + +/* EMAC_DESC_CTRL_9 */ +#define TPD_RING_SIZE_BMSK 0xffff + +/* EMAC_TXQ_CTRL_0 */ +#define NUM_TXF_BURST_PREF_BMSK 0xffff0000 +#define NUM_TXF_BURST_PREF_SHFT 16 +#define LS_8023_SP 0x80 +#define TXQ_MODE 0x40 +#define TXQ_EN 0x20 +#define IP_OP_SP 0x10 +#define NUM_TPD_BURST_PREF_BMSK 0xf +#define NUM_TPD_BURST_PREF_SHFT 0 + +/* EMAC_TXQ_CTRL_1 */ +#define JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK 0x7ff + +/* EMAC_TXQ_CTRL_2 */ +#define TXF_HWM_BMSK 0xfff0000 +#define TXF_LWM_BMSK 0xfff + +/* EMAC_RXQ_CTRL_0 */ +#define RXQ_EN BIT(31) +#define CUT_THRU_EN BIT(30) +#define RSS_HASH_EN BIT(29) +#define NUM_RFD_BURST_PREF_BMSK 0x3f00000 +#define NUM_RFD_BURST_PREF_SHFT 20 +#define IDT_TABLE_SIZE_BMSK 0x1ff00 +#define IDT_TABLE_SIZE_SHFT 8 +#define SP_IPV6 0x80 + +/* EMAC_RXQ_CTRL_1 */ +#define JUMBO_1KAH_BMSK 0xf000 +#define JUMBO_1KAH_SHFT 12 +#define RFD_PREF_LOW_TH 0x10 +#define RFD_PREF_LOW_THRESHOLD_BMSK 0xfc0 +#define RFD_PREF_LOW_THRESHOLD_SHFT 6 +#define RFD_PREF_UP_TH 0x10 +#define RFD_PREF_UP_THRESHOLD_BMSK 0x3f +#define RFD_PREF_UP_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_2 */ +#define RXF_DOF_THRESFHOLD 0x1a0 +#define RXF_DOF_THRESHOLD_BMSK 0xfff0000 +#define RXF_DOF_THRESHOLD_SHFT 16 +#define RXF_UOF_THRESFHOLD 0xbe +#define RXF_UOF_THRESHOLD_BMSK 0xfff +#define RXF_UOF_THRESHOLD_SHFT 0 + +/* EMAC_RXQ_CTRL_3 */ +#define RXD_TIMER_BMSK 0xffff0000 +#define RXD_THRESHOLD_BMSK 0xfff +#define RXD_THRESHOLD_SHFT 0 + +/* EMAC_DMA_CTRL */ +#define DMAW_DLY_CNT_BMSK 0xf0000 +#define DMAW_DLY_CNT_SHFT 16 +#define DMAR_DLY_CNT_BMSK 0xf800 +#define DMAR_DLY_CNT_SHFT 11 +#define DMAR_REQ_PRI 0x400 +#define REGWRBLEN_BMSK 0x380 +#define REGWRBLEN_SHFT 7 +#define REGRDBLEN_BMSK 0x70 +#define REGRDBLEN_SHFT 4 +#define OUT_ORDER_MODE 0x4 +#define ENH_ORDER_MODE 0x2 +#define IN_ORDER_MODE 0x1 + +/* EMAC_MAILBOX_13 */ +#define RFD3_PROC_IDX_BMSK 0xfff0000 +#define RFD3_PROC_IDX_SHFT 16 +#define RFD3_PROD_IDX_BMSK 0xfff +#define RFD3_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_2 */ +#define NTPD_CONS_IDX_BMSK 0xffff0000 +#define NTPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_3 */ +#define RFD0_CONS_IDX_BMSK 0xfff +#define RFD0_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_11 */ +#define H3TPD_PROD_IDX_BMSK 0xffff0000 +#define H3TPD_PROD_IDX_SHFT 16 + +/* EMAC_AXI_MAST_CTRL */ +#define DATA_BYTE_SWAP 0x8 +#define MAX_BOUND 0x2 +#define MAX_BTYPE 0x1 + +/* EMAC_MAILBOX_12 */ +#define H3TPD_CONS_IDX_BMSK 0xffff0000 +#define H3TPD_CONS_IDX_SHFT 16 + +/* EMAC_MAILBOX_9 */ +#define H2TPD_PROD_IDX_BMSK 0xffff +#define H2TPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_10 */ +#define H1TPD_CONS_IDX_BMSK 0xffff0000 +#define H1TPD_CONS_IDX_SHFT 16 +#define H2TPD_CONS_IDX_BMSK 0xffff +#define H2TPD_CONS_IDX_SHFT 0 + +/* EMAC_ATHR_HEADER_CTRL */ +#define HEADER_CNT_EN 0x2 +#define HEADER_ENABLE 0x1 + +/* EMAC_MAILBOX_0 */ +#define RFD0_PROC_IDX_BMSK 0xfff0000 +#define RFD0_PROC_IDX_SHFT 16 +#define RFD0_PROD_IDX_BMSK 0xfff +#define RFD0_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_5 */ +#define RFD1_PROC_IDX_BMSK 0xfff0000 +#define RFD1_PROC_IDX_SHFT 16 +#define RFD1_PROD_IDX_BMSK 0xfff +#define RFD1_PROD_IDX_SHFT 0 + +/* EMAC_MISC_CTRL */ +#define RX_UNCPL_INT_EN 0x1 + +/* EMAC_MAILBOX_7 */ +#define RFD2_CONS_IDX_BMSK 0xfff0000 +#define RFD2_CONS_IDX_SHFT 16 +#define RFD1_CONS_IDX_BMSK 0xfff +#define RFD1_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_8 */ +#define RFD3_CONS_IDX_BMSK 0xfff +#define RFD3_CONS_IDX_SHFT 0 + +/* EMAC_MAILBOX_15 */ +#define NTPD_PROD_IDX_BMSK 0xffff +#define NTPD_PROD_IDX_SHFT 0 + +/* EMAC_MAILBOX_16 */ +#define H1TPD_PROD_IDX_BMSK 0xffff +#define H1TPD_PROD_IDX_SHFT 0 + +#define RXQ0_RSS_HSTYP_IPV6_TCP_EN 0x20 +#define RXQ0_RSS_HSTYP_IPV6_EN 0x10 +#define RXQ0_RSS_HSTYP_IPV4_TCP_EN 0x8 +#define RXQ0_RSS_HSTYP_IPV4_EN 0x4 + +/* EMAC_EMAC_WRAPPER_TX_TS_INX */ +#define EMAC_WRAPPER_TX_TS_EMPTY BIT(31) +#define EMAC_WRAPPER_TX_TS_INX_BMSK 0xffff + +struct emac_skb_cb { + u32 tpd_idx; + unsigned long jiffies; +}; + +#define EMAC_SKB_CB(skb) ((struct emac_skb_cb *)(skb)->cb) +#define EMAC_RSS_IDT_SIZE 256 +#define JUMBO_1KAH 0x4 +#define RXD_TH 0x100 +#define EMAC_TPD_LAST_FRAGMENT 0x80000000 +#define EMAC_TPD_TSTAMP_SAVE 0x80000000 + +/* EMAC Errors in emac_rrd.word[3] */ +#define EMAC_RRD_L4F BIT(14) +#define EMAC_RRD_IPF BIT(15) +#define EMAC_RRD_CRC BIT(21) +#define EMAC_RRD_FAE BIT(22) +#define EMAC_RRD_TRN BIT(23) +#define EMAC_RRD_RNT BIT(24) +#define EMAC_RRD_INC BIT(25) +#define EMAC_RRD_FOV BIT(29) +#define EMAC_RRD_LEN BIT(30) + +/* Error bits that will result in a received frame being discarded */ +#define EMAC_RRD_ERROR (EMAC_RRD_IPF | EMAC_RRD_CRC | EMAC_RRD_FAE | \ + EMAC_RRD_TRN | EMAC_RRD_RNT | EMAC_RRD_INC | \ + EMAC_RRD_FOV | EMAC_RRD_LEN) +#define EMAC_RRD_STATS_DW_IDX 3 + +#define EMAC_RRD(RXQ, SIZE, IDX) ((RXQ)->rrd.v_addr + (SIZE * (IDX))) +#define EMAC_RFD(RXQ, SIZE, IDX) ((RXQ)->rfd.v_addr + (SIZE * (IDX))) +#define EMAC_TPD(TXQ, SIZE, IDX) ((TXQ)->tpd.v_addr + (SIZE * (IDX))) + +#define GET_RFD_BUFFER(RXQ, IDX) (&((RXQ)->rfd.rfbuff[(IDX)])) +#define GET_TPD_BUFFER(RTQ, IDX) (&((RTQ)->tpd.tpbuff[(IDX)])) + +#define EMAC_TX_POLL_HWTXTSTAMP_THRESHOLD 8 + +#define ISR_RX_PKT (\ + RX_PKT_INT0 |\ + RX_PKT_INT1 |\ + RX_PKT_INT2 |\ + RX_PKT_INT3) + +#define EMAC_MAC_IRQ_RES "core0" + +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr) +{ + u32 crc32, bit, reg, mta; + + /* Calculate the CRC of the MAC address */ + crc32 = ether_crc(ETH_ALEN, addr); + + /* The HASH Table is an array of 2 32-bit registers. It is + * treated like an array of 64 bits (BitArray[hash_value]). + * Use the upper 6 bits of the above CRC as the hash value. + */ + reg = (crc32 >> 31) & 0x1; + bit = (crc32 >> 26) & 0x1F; + + mta = readl(adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); + mta |= BIT(bit); + writel(mta, adpt->base + EMAC_HASH_TAB_REG0 + (reg << 2)); +} + +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt) +{ + writel(0, adpt->base + EMAC_HASH_TAB_REG0); + writel(0, adpt->base + EMAC_HASH_TAB_REG1); +} + +/* definitions for RSS */ +#define EMAC_RSS_KEY(_i, _type) \ + (EMAC_RSS_KEY0 + ((_i) * sizeof(_type))) +#define EMAC_RSS_TBL(_i, _type) \ + (EMAC_IDT_TABLE0 + ((_i) * sizeof(_type))) + +/* Config MAC modes */ +void emac_mac_mode_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + u32 mac; + + mac = readl(adpt->base + EMAC_MAC_CTRL); + mac &= ~(VLAN_STRIP | PROM_MODE | MULTI_ALL | MAC_LP_EN); + + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + mac |= VLAN_STRIP; + + if (netdev->flags & IFF_PROMISC) + mac |= PROM_MODE; + + if (netdev->flags & IFF_ALLMULTI) + mac |= MULTI_ALL; + + writel(mac, adpt->base + EMAC_MAC_CTRL); +} + +/* Config descriptor rings */ +static void emac_mac_dma_rings_config(struct emac_adapter *adpt) +{ + static const unsigned short tpd_q_offset[] = { + EMAC_DESC_CTRL_8, EMAC_H1TPD_BASE_ADDR_LO, + EMAC_H2TPD_BASE_ADDR_LO, EMAC_H3TPD_BASE_ADDR_LO}; + static const unsigned short rfd_q_offset[] = { + EMAC_DESC_CTRL_2, EMAC_DESC_CTRL_10, + EMAC_DESC_CTRL_12, EMAC_DESC_CTRL_13}; + static const unsigned short rrd_q_offset[] = { + EMAC_DESC_CTRL_5, EMAC_DESC_CTRL_14, + EMAC_DESC_CTRL_15, EMAC_DESC_CTRL_16}; + + /* TPD (Transmit Packet Descriptor) */ + writel(upper_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + EMAC_DESC_CTRL_1); + + writel(lower_32_bits(adpt->tx_q.tpd.dma_addr), + adpt->base + tpd_q_offset[0]); + + writel(adpt->tx_q.tpd.count & TPD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_9); + + /* RFD (Receive Free Descriptor) & RRD (Receive Return Descriptor) */ + writel(upper_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + EMAC_DESC_CTRL_0); + + writel(lower_32_bits(adpt->rx_q.rfd.dma_addr), + adpt->base + rfd_q_offset[0]); + writel(lower_32_bits(adpt->rx_q.rrd.dma_addr), + adpt->base + rrd_q_offset[0]); + + writel(adpt->rx_q.rfd.count & RFD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_3); + writel(adpt->rx_q.rrd.count & RRD_RING_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_6); + + writel(adpt->rxbuf_size & RX_BUFFER_SIZE_BMSK, + adpt->base + EMAC_DESC_CTRL_4); + + writel(0, adpt->base + EMAC_DESC_CTRL_11); + + /* Load all of the base addresses above and ensure that triggering HW to + * read ring pointers is flushed + */ + writel(1, adpt->base + EMAC_INTER_SRAM_PART9); +} + +/* Config transmit parameters */ +static void emac_mac_tx_config(struct emac_adapter *adpt) +{ + u32 val; + + writel((EMAC_MAX_TX_OFFLOAD_THRESH >> 3) & + JUMBO_TASK_OFFLOAD_THRESHOLD_BMSK, adpt->base + EMAC_TXQ_CTRL_1); + + val = (adpt->tpd_burst << NUM_TPD_BURST_PREF_SHFT) & + NUM_TPD_BURST_PREF_BMSK; + + val |= TXQ_MODE | LS_8023_SP; + val |= (0x0100 << NUM_TXF_BURST_PREF_SHFT) & + NUM_TXF_BURST_PREF_BMSK; + + writel(val, adpt->base + EMAC_TXQ_CTRL_0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_2, + (TXF_HWM_BMSK | TXF_LWM_BMSK), 0); +} + +/* Config receive parameters */ +static void emac_mac_rx_config(struct emac_adapter *adpt) +{ + u32 val; + + val = (adpt->rfd_burst << NUM_RFD_BURST_PREF_SHFT) & + NUM_RFD_BURST_PREF_BMSK; + val |= (SP_IPV6 | CUT_THRU_EN); + + writel(val, adpt->base + EMAC_RXQ_CTRL_0); + + val = readl(adpt->base + EMAC_RXQ_CTRL_1); + val &= ~(JUMBO_1KAH_BMSK | RFD_PREF_LOW_THRESHOLD_BMSK | + RFD_PREF_UP_THRESHOLD_BMSK); + val |= (JUMBO_1KAH << JUMBO_1KAH_SHFT) | + (RFD_PREF_LOW_TH << RFD_PREF_LOW_THRESHOLD_SHFT) | + (RFD_PREF_UP_TH << RFD_PREF_UP_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_1); + + val = readl(adpt->base + EMAC_RXQ_CTRL_2); + val &= ~(RXF_DOF_THRESHOLD_BMSK | RXF_UOF_THRESHOLD_BMSK); + val |= (RXF_DOF_THRESFHOLD << RXF_DOF_THRESHOLD_SHFT) | + (RXF_UOF_THRESFHOLD << RXF_UOF_THRESHOLD_SHFT); + writel(val, adpt->base + EMAC_RXQ_CTRL_2); + + val = readl(adpt->base + EMAC_RXQ_CTRL_3); + val &= ~(RXD_TIMER_BMSK | RXD_THRESHOLD_BMSK); + val |= RXD_TH << RXD_THRESHOLD_SHFT; + writel(val, adpt->base + EMAC_RXQ_CTRL_3); +} + +/* Config dma */ +static void emac_mac_dma_config(struct emac_adapter *adpt) +{ + u32 dma_ctrl = DMAR_REQ_PRI; + + switch (adpt->dma_order) { + case emac_dma_ord_in: + dma_ctrl |= IN_ORDER_MODE; + break; + case emac_dma_ord_enh: + dma_ctrl |= ENH_ORDER_MODE; + break; + case emac_dma_ord_out: + dma_ctrl |= OUT_ORDER_MODE; + break; + default: + break; + } + + dma_ctrl |= (((u32)adpt->dmar_block) << REGRDBLEN_SHFT) & + REGRDBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_block) << REGWRBLEN_SHFT) & + REGWRBLEN_BMSK; + dma_ctrl |= (((u32)adpt->dmar_dly_cnt) << DMAR_DLY_CNT_SHFT) & + DMAR_DLY_CNT_BMSK; + dma_ctrl |= (((u32)adpt->dmaw_dly_cnt) << DMAW_DLY_CNT_SHFT) & + DMAW_DLY_CNT_BMSK; + + /* config DMA and ensure that configuration is flushed to HW */ + writel(dma_ctrl, adpt->base + EMAC_DMA_CTRL); +} + +/* set MAC address */ +static void emac_set_mac_address(struct emac_adapter *adpt, u8 *addr) +{ + u32 sta; + + /* for example: 00-A0-C6-11-22-33 + * 0<-->C6112233, 1<-->00A0. + */ + + /* low 32bit word */ + sta = (((u32)addr[2]) << 24) | (((u32)addr[3]) << 16) | + (((u32)addr[4]) << 8) | (((u32)addr[5])); + writel(sta, adpt->base + EMAC_MAC_STA_ADDR0); + + /* hight 32bit word */ + sta = (((u32)addr[0]) << 8) | (u32)addr[1]; + writel(sta, adpt->base + EMAC_MAC_STA_ADDR1); +} + +static void emac_mac_config(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + unsigned int max_frame; + u32 val; + + emac_set_mac_address(adpt, netdev->dev_addr); + + max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + adpt->rxbuf_size = netdev->mtu > EMAC_DEF_RX_BUF_SIZE ? + ALIGN(max_frame, 8) : EMAC_DEF_RX_BUF_SIZE; + + emac_mac_dma_rings_config(adpt); + + writel(netdev->mtu + ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN, + adpt->base + EMAC_MAX_FRAM_LEN_CTRL); + + emac_mac_tx_config(adpt); + emac_mac_rx_config(adpt); + emac_mac_dma_config(adpt); + + val = readl(adpt->base + EMAC_AXI_MAST_CTRL); + val &= ~(DATA_BYTE_SWAP | MAX_BOUND); + val |= MAX_BTYPE; + writel(val, adpt->base + EMAC_AXI_MAST_CTRL); + writel(0, adpt->base + EMAC_CLK_GATE_CTRL); + writel(RX_UNCPL_INT_EN, adpt->base + EMAC_MISC_CTRL); +} + +void emac_mac_reset(struct emac_adapter *adpt) +{ + emac_mac_stop(adpt); + + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, SOFT_RST); + usleep_range(100, 150); /* reset may take up to 100usec */ + + /* interrupt clear-on-read */ + emac_reg_update32(adpt->base + EMAC_DMA_MAS_CTRL, 0, INT_RD_CLR_EN); +} + +void emac_mac_start(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + u32 mac, csr1; + + /* enable tx queue */ + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, 0, TXQ_EN); + + /* enable rx queue */ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, 0, RXQ_EN); + + /* enable mac control */ + mac = readl(adpt->base + EMAC_MAC_CTRL); + csr1 = readl(adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + mac |= TXEN | RXEN; /* enable RX/TX */ + + /* We don't have ethtool support yet, so force flow-control mode + * to 'full' always. + */ + mac |= TXFC | RXFC; + + /* setup link speed */ + mac &= ~SPEED_MASK; + if (phydev->speed == SPEED_1000) { + mac |= SPEED(2); + csr1 |= FREQ_MODE; + } else { + mac |= SPEED(1); + csr1 &= ~FREQ_MODE; + } + + if (phydev->duplex == DUPLEX_FULL) + mac |= FULLD; + else + mac &= ~FULLD; + + /* other parameters */ + mac |= (CRCE | PCRCE); + mac |= ((adpt->preamble << PRLEN_SHFT) & PRLEN_BMSK); + mac |= BROAD_EN; + mac |= FLCHK; + mac &= ~RX_CHKSUM_EN; + mac &= ~(HUGEN | VLAN_STRIP | TPAUSE | SIMR | HUGE | MULTI_ALL | + DEBUG_MODE | SINGLE_PAUSE_MODE); + + writel_relaxed(csr1, adpt->csr + EMAC_EMAC_WRAPPER_CSR1); + + writel_relaxed(mac, adpt->base + EMAC_MAC_CTRL); + + /* enable interrupt read clear, low power sleep mode and + * the irq moderators + */ + + writel_relaxed(adpt->irq_mod, adpt->base + EMAC_IRQ_MOD_TIM_INIT); + writel_relaxed(INT_RD_CLR_EN | LPW_MODE | IRQ_MODERATOR_EN | + IRQ_MODERATOR2_EN, adpt->base + EMAC_DMA_MAS_CTRL); + + emac_mac_mode_config(adpt); + + emac_reg_update32(adpt->base + EMAC_ATHR_HEADER_CTRL, + (HEADER_ENABLE | HEADER_CNT_EN), 0); + + emac_reg_update32(adpt->csr + EMAC_EMAC_WRAPPER_CSR2, 0, WOL_EN); +} + +void emac_mac_stop(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_RXQ_CTRL_0, RXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_TXQ_CTRL_0, TXQ_EN, 0); + emac_reg_update32(adpt->base + EMAC_MAC_CTRL, TXEN | RXEN, 0); + usleep_range(1000, 1050); /* stopping mac may take upto 1msec */ +} + +/* Free all descriptors of given transmit queue */ +static void emac_tx_q_descs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!tx_q->tpd.tpbuff) + return; + + for (i = 0; i < tx_q->tpd.count; i++) { + struct emac_buffer *tpbuf = GET_TPD_BUFFER(tx_q, i); + + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + if (tpbuf->skb) { + dev_kfree_skb_any(tpbuf->skb); + tpbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + memset(tx_q->tpd.tpbuff, 0, size); + + /* clear the descriptor ring */ + memset(tx_q->tpd.v_addr, 0, tx_q->tpd.size); + + tx_q->tpd.consume_idx = 0; + tx_q->tpd.produce_idx = 0; +} + +/* Free all descriptors of given receive queue */ +static void emac_rx_q_free_descs(struct emac_adapter *adpt) +{ + struct device *dev = adpt->netdev->dev.parent; + struct emac_rx_queue *rx_q = &adpt->rx_q; + unsigned int i; + size_t size; + + /* ring already cleared, nothing to do */ + if (!rx_q->rfd.rfbuff) + return; + + for (i = 0; i < rx_q->rfd.count; i++) { + struct emac_buffer *rfbuf = GET_RFD_BUFFER(rx_q, i); + + if (rfbuf->dma_addr) { + dma_unmap_single(dev, rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + } + if (rfbuf->skb) { + dev_kfree_skb(rfbuf->skb); + rfbuf->skb = NULL; + } + } + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + memset(rx_q->rfd.rfbuff, 0, size); + + /* clear the descriptor rings */ + memset(rx_q->rrd.v_addr, 0, rx_q->rrd.size); + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + memset(rx_q->rfd.v_addr, 0, rx_q->rfd.size); + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_tx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_tx_queue *tx_q = &adpt->tx_q; + + emac_tx_q_descs_free(adpt); + + kfree(tx_q->tpd.tpbuff); + tx_q->tpd.tpbuff = NULL; + tx_q->tpd.v_addr = NULL; + tx_q->tpd.dma_addr = 0; + tx_q->tpd.size = 0; +} + +/* Allocate TX descriptor ring for the given transmit queue */ +static int emac_tx_q_desc_alloc(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + size_t size; + + size = sizeof(struct emac_buffer) * tx_q->tpd.count; + tx_q->tpd.tpbuff = kzalloc(size, GFP_KERNEL); + if (!tx_q->tpd.tpbuff) + return -ENOMEM; + + tx_q->tpd.size = tx_q->tpd.count * (adpt->tpd_size * 4); + tx_q->tpd.dma_addr = ring_header->dma_addr + ring_header->used; + tx_q->tpd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(tx_q->tpd.size, 8); + tx_q->tpd.produce_idx = 0; + tx_q->tpd.consume_idx = 0; + + return 0; +} + +/* Free all buffers associated with given transmit queue */ +static void emac_rx_q_bufs_free(struct emac_adapter *adpt) +{ + struct emac_rx_queue *rx_q = &adpt->rx_q; + + emac_rx_q_free_descs(adpt); + + kfree(rx_q->rfd.rfbuff); + rx_q->rfd.rfbuff = NULL; + + rx_q->rfd.v_addr = NULL; + rx_q->rfd.dma_addr = 0; + rx_q->rfd.size = 0; + + rx_q->rrd.v_addr = NULL; + rx_q->rrd.dma_addr = 0; + rx_q->rrd.size = 0; +} + +/* Allocate RX descriptor rings for the given receive queue */ +static int emac_rx_descs_alloc(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct emac_rx_queue *rx_q = &adpt->rx_q; + size_t size; + + size = sizeof(struct emac_buffer) * rx_q->rfd.count; + rx_q->rfd.rfbuff = kzalloc(size, GFP_KERNEL); + if (!rx_q->rfd.rfbuff) + return -ENOMEM; + + rx_q->rrd.size = rx_q->rrd.count * (adpt->rrd_size * 4); + rx_q->rfd.size = rx_q->rfd.count * (adpt->rfd_size * 4); + + rx_q->rrd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rrd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rrd.size, 8); + + rx_q->rfd.dma_addr = ring_header->dma_addr + ring_header->used; + rx_q->rfd.v_addr = ring_header->v_addr + ring_header->used; + ring_header->used += ALIGN(rx_q->rfd.size, 8); + + rx_q->rrd.produce_idx = 0; + rx_q->rrd.consume_idx = 0; + + rx_q->rfd.produce_idx = 0; + rx_q->rfd.consume_idx = 0; + + return 0; +} + +/* Allocate all TX and RX descriptor rings */ +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + unsigned int num_tx_descs = adpt->tx_desc_cnt; + unsigned int num_rx_descs = adpt->rx_desc_cnt; + int ret; + + adpt->tx_q.tpd.count = adpt->tx_desc_cnt; + + adpt->rx_q.rrd.count = adpt->rx_desc_cnt; + adpt->rx_q.rfd.count = adpt->rx_desc_cnt; + + /* Ring DMA buffer. Each ring may need up to 8 bytes for alignment, + * hence the additional padding bytes are allocated. + */ + ring_header->size = num_tx_descs * (adpt->tpd_size * 4) + + num_rx_descs * (adpt->rfd_size * 4) + + num_rx_descs * (adpt->rrd_size * 4) + + 8 + 2 * 8; /* 8 byte per one Tx and two Rx rings */ + + ring_header->used = 0; + ring_header->v_addr = dma_zalloc_coherent(dev, ring_header->size, + &ring_header->dma_addr, + GFP_KERNEL); + if (!ring_header->v_addr) + return -ENOMEM; + + ring_header->used = ALIGN(ring_header->dma_addr, 8) - + ring_header->dma_addr; + + ret = emac_tx_q_desc_alloc(adpt, &adpt->tx_q); + if (ret) { + netdev_err(adpt->netdev, "error: Tx Queue alloc failed\n"); + goto err_alloc_tx; + } + + ret = emac_rx_descs_alloc(adpt); + if (ret) { + netdev_err(adpt->netdev, "error: Rx Queue alloc failed\n"); + goto err_alloc_rx; + } + + return 0; + +err_alloc_rx: + emac_tx_q_bufs_free(adpt); +err_alloc_tx: + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; + + return ret; +} + +/* Free all TX and RX descriptor rings */ +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt) +{ + struct emac_ring_header *ring_header = &adpt->ring_header; + struct device *dev = adpt->netdev->dev.parent; + + emac_tx_q_bufs_free(adpt); + emac_rx_q_bufs_free(adpt); + + dma_free_coherent(dev, ring_header->size, + ring_header->v_addr, ring_header->dma_addr); + + ring_header->v_addr = NULL; + ring_header->dma_addr = 0; + ring_header->size = 0; + ring_header->used = 0; +} + +/* Initialize descriptor rings */ +static void emac_mac_rx_tx_ring_reset_all(struct emac_adapter *adpt) +{ + unsigned int i; + + adpt->tx_q.tpd.produce_idx = 0; + adpt->tx_q.tpd.consume_idx = 0; + for (i = 0; i < adpt->tx_q.tpd.count; i++) + adpt->tx_q.tpd.tpbuff[i].dma_addr = 0; + + adpt->rx_q.rrd.produce_idx = 0; + adpt->rx_q.rrd.consume_idx = 0; + adpt->rx_q.rfd.produce_idx = 0; + adpt->rx_q.rfd.consume_idx = 0; + for (i = 0; i < adpt->rx_q.rfd.count; i++) + adpt->rx_q.rfd.rfbuff[i].dma_addr = 0; +} + +/* Produce new receive free descriptor */ +static void emac_mac_rx_rfd_create(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + dma_addr_t addr) +{ + u32 *hw_rfd = EMAC_RFD(rx_q, adpt->rfd_size, rx_q->rfd.produce_idx); + + *(hw_rfd++) = lower_32_bits(addr); + *hw_rfd = upper_32_bits(addr); + + if (++rx_q->rfd.produce_idx == rx_q->rfd.count) + rx_q->rfd.produce_idx = 0; +} + +/* Fill up receive queue's RFD with preallocated receive buffers */ +static void emac_mac_rx_descs_refill(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q) +{ + struct emac_buffer *curr_rxbuf; + struct emac_buffer *next_rxbuf; + unsigned int count = 0; + u32 next_produce_idx; + + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + + /* this always has a blank rx_buffer*/ + while (!next_rxbuf->dma_addr) { + struct sk_buff *skb; + int ret; + + skb = netdev_alloc_skb_ip_align(adpt->netdev, adpt->rxbuf_size); + if (!skb) + break; + + curr_rxbuf->dma_addr = + dma_map_single(adpt->netdev->dev.parent, skb->data, + curr_rxbuf->length, DMA_FROM_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + curr_rxbuf->dma_addr); + if (ret) { + dev_kfree_skb(skb); + break; + } + curr_rxbuf->skb = skb; + curr_rxbuf->length = adpt->rxbuf_size; + + emac_mac_rx_rfd_create(adpt, rx_q, curr_rxbuf->dma_addr); + next_produce_idx = rx_q->rfd.produce_idx + 1; + if (next_produce_idx == rx_q->rfd.count) + next_produce_idx = 0; + + curr_rxbuf = GET_RFD_BUFFER(rx_q, rx_q->rfd.produce_idx); + next_rxbuf = GET_RFD_BUFFER(rx_q, next_produce_idx); + count++; + } + + if (count) { + u32 prod_idx = (rx_q->rfd.produce_idx << rx_q->produce_shift) & + rx_q->produce_mask; + emac_reg_update32(adpt->base + rx_q->produce_reg, + rx_q->produce_mask, prod_idx); + } +} + +static void emac_adjust_link(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct phy_device *phydev = netdev->phydev; + + if (phydev->link) + emac_mac_start(adpt); + else + emac_mac_stop(adpt); + + phy_print_status(phydev); +} + +/* Bringup the interface/HW */ +int emac_mac_up(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + struct emac_irq *irq = &adpt->irq; + int ret; + + emac_mac_rx_tx_ring_reset_all(adpt); + emac_mac_config(adpt); + + ret = request_irq(irq->irq, emac_isr, 0, EMAC_MAC_IRQ_RES, irq); + if (ret) { + netdev_err(adpt->netdev, "could not request %s irq\n", + EMAC_MAC_IRQ_RES); + return ret; + } + + emac_mac_rx_descs_refill(adpt, &adpt->rx_q); + + ret = phy_connect_direct(netdev, adpt->phydev, emac_adjust_link, + PHY_INTERFACE_MODE_SGMII); + if (ret) { + netdev_err(adpt->netdev, "could not connect phy\n"); + free_irq(irq->irq, irq); + return ret; + } + + /* enable mac irq */ + writel((u32)~DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(adpt->irq.mask, adpt->base + EMAC_INT_MASK); + + adpt->phydev->irq = PHY_IGNORE_INTERRUPT; + phy_start(adpt->phydev); + + napi_enable(&adpt->rx_q.napi); + netif_start_queue(netdev); + + return 0; +} + +/* Bring down the interface/HW */ +void emac_mac_down(struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + + netif_stop_queue(netdev); + napi_disable(&adpt->rx_q.napi); + + phy_stop(adpt->phydev); + phy_disconnect(adpt->phydev); + + /* disable mac irq */ + writel(DIS_INT, adpt->base + EMAC_INT_STATUS); + writel(0, adpt->base + EMAC_INT_MASK); + synchronize_irq(adpt->irq.irq); + free_irq(adpt->irq.irq, &adpt->irq); + + emac_mac_reset(adpt); + + emac_tx_q_descs_free(adpt); + netdev_reset_queue(adpt->netdev); + emac_rx_q_free_descs(adpt); +} + +/* Consume next received packet descriptor */ +static bool emac_rx_process_rrd(struct emac_adapter *adpt, + struct emac_rx_queue *rx_q, + struct emac_rrd *rrd) +{ + u32 *hw_rrd = EMAC_RRD(rx_q, adpt->rrd_size, rx_q->rrd.consume_idx); + + rrd->word[3] = *(hw_rrd + 3); + + if (!RRD_UPDT(rrd)) + return false; + + rrd->word[4] = 0; + rrd->word[5] = 0; + + rrd->word[0] = *(hw_rrd++); + rrd->word[1] = *(hw_rrd++); + rrd->word[2] = *(hw_rrd++); + + if (unlikely(RRD_NOR(rrd) != 1)) { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet! nor:%lu\n", + RRD_NOR(rrd)); + } + + /* mark rrd as processed */ + RRD_UPDT_SET(rrd, 0); + *hw_rrd = rrd->word[3]; + + if (++rx_q->rrd.consume_idx == rx_q->rrd.count) + rx_q->rrd.consume_idx = 0; + + return true; +} + +/* Produce new transmit descriptor */ +static void emac_tx_tpd_create(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct emac_tpd *tpd) +{ + u32 *hw_tpd; + + tx_q->tpd.last_produce_idx = tx_q->tpd.produce_idx; + hw_tpd = EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.produce_idx); + + if (++tx_q->tpd.produce_idx == tx_q->tpd.count) + tx_q->tpd.produce_idx = 0; + + *(hw_tpd++) = tpd->word[0]; + *(hw_tpd++) = tpd->word[1]; + *(hw_tpd++) = tpd->word[2]; + *hw_tpd = tpd->word[3]; +} + +/* Mark the last transmit descriptor as such (for the transmit packet) */ +static void emac_tx_tpd_mark_last(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q) +{ + u32 *hw_tpd = + EMAC_TPD(tx_q, adpt->tpd_size, tx_q->tpd.last_produce_idx); + u32 tmp_tpd; + + tmp_tpd = *(hw_tpd + 1); + tmp_tpd |= EMAC_TPD_LAST_FRAGMENT; + *(hw_tpd + 1) = tmp_tpd; +} + +static void emac_rx_rfd_clean(struct emac_rx_queue *rx_q, struct emac_rrd *rrd) +{ + struct emac_buffer *rfbuf = rx_q->rfd.rfbuff; + u32 consume_idx = RRD_SI(rrd); + unsigned int i; + + for (i = 0; i < RRD_NOR(rrd); i++) { + rfbuf[consume_idx].skb = NULL; + if (++consume_idx == rx_q->rfd.count) + consume_idx = 0; + } + + rx_q->rfd.consume_idx = consume_idx; + rx_q->rfd.process_idx = consume_idx; +} + +/* Push the received skb to upper layers */ +static void emac_receive_skb(struct emac_rx_queue *rx_q, + struct sk_buff *skb, + u16 vlan_tag, bool vlan_flag) +{ + if (vlan_flag) { + u16 vlan; + + EMAC_TAG_TO_VLAN(vlan_tag, vlan); + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan); + } + + napi_gro_receive(&rx_q->napi, skb); +} + +/* Process receive event */ +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts) +{ + u32 proc_idx, hw_consume_idx, num_consume_pkts; + struct net_device *netdev = adpt->netdev; + struct emac_buffer *rfbuf; + unsigned int count = 0; + struct emac_rrd rrd; + struct sk_buff *skb; + u32 reg; + + reg = readl_relaxed(adpt->base + rx_q->consume_reg); + + hw_consume_idx = (reg & rx_q->consume_mask) >> rx_q->consume_shift; + num_consume_pkts = (hw_consume_idx >= rx_q->rrd.consume_idx) ? + (hw_consume_idx - rx_q->rrd.consume_idx) : + (hw_consume_idx + rx_q->rrd.count - rx_q->rrd.consume_idx); + + do { + if (!num_consume_pkts) + break; + + if (!emac_rx_process_rrd(adpt, rx_q, &rrd)) + break; + + if (likely(RRD_NOR(&rrd) == 1)) { + /* good receive */ + rfbuf = GET_RFD_BUFFER(rx_q, RRD_SI(&rrd)); + dma_unmap_single(adpt->netdev->dev.parent, + rfbuf->dma_addr, rfbuf->length, + DMA_FROM_DEVICE); + rfbuf->dma_addr = 0; + skb = rfbuf->skb; + } else { + netdev_err(adpt->netdev, + "error: multi-RFD not support yet!\n"); + break; + } + emac_rx_rfd_clean(rx_q, &rrd); + num_consume_pkts--; + count++; + + /* Due to a HW issue in L4 check sum detection (UDP/TCP frags + * with DF set are marked as error), drop packets based on the + * error mask rather than the summary bit (ignoring L4F errors) + */ + if (rrd.word[EMAC_RRD_STATS_DW_IDX] & EMAC_RRD_ERROR) { + netif_dbg(adpt, rx_status, adpt->netdev, + "Drop error packet[RRD: 0x%x:0x%x:0x%x:0x%x]\n", + rrd.word[0], rrd.word[1], + rrd.word[2], rrd.word[3]); + + dev_kfree_skb(skb); + continue; + } + + skb_put(skb, RRD_PKT_SIZE(&rrd) - ETH_FCS_LEN); + skb->dev = netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + if (netdev->features & NETIF_F_RXCSUM) + skb->ip_summed = RRD_L4F(&rrd) ? + CHECKSUM_NONE : CHECKSUM_UNNECESSARY; + else + skb_checksum_none_assert(skb); + + emac_receive_skb(rx_q, skb, (u16)RRD_CVALN_TAG(&rrd), + (bool)RRD_CVTAG(&rrd)); + + netdev->last_rx = jiffies; + (*num_pkts)++; + } while (*num_pkts < max_pkts); + + if (count) { + proc_idx = (rx_q->rfd.process_idx << rx_q->process_shft) & + rx_q->process_mask; + emac_reg_update32(adpt->base + rx_q->process_reg, + rx_q->process_mask, proc_idx); + emac_mac_rx_descs_refill(adpt, rx_q); + } +} + +/* get the number of free transmit descriptors */ +static unsigned int emac_tpd_num_free_descs(struct emac_tx_queue *tx_q) +{ + u32 produce_idx = tx_q->tpd.produce_idx; + u32 consume_idx = tx_q->tpd.consume_idx; + + return (consume_idx > produce_idx) ? + (consume_idx - produce_idx - 1) : + (tx_q->tpd.count + consume_idx - produce_idx - 1); +} + +/* Process transmit event */ +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q) +{ + u32 reg = readl_relaxed(adpt->base + tx_q->consume_reg); + u32 hw_consume_idx, pkts_compl = 0, bytes_compl = 0; + struct emac_buffer *tpbuf; + + hw_consume_idx = (reg & tx_q->consume_mask) >> tx_q->consume_shift; + + while (tx_q->tpd.consume_idx != hw_consume_idx) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.consume_idx); + if (tpbuf->dma_addr) { + dma_unmap_single(adpt->netdev->dev.parent, + tpbuf->dma_addr, tpbuf->length, + DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + } + + if (tpbuf->skb) { + pkts_compl++; + bytes_compl += tpbuf->skb->len; + dev_kfree_skb_irq(tpbuf->skb); + tpbuf->skb = NULL; + } + + if (++tx_q->tpd.consume_idx == tx_q->tpd.count) + tx_q->tpd.consume_idx = 0; + } + + netdev_completed_queue(adpt->netdev, pkts_compl, bytes_compl); + + if (netif_queue_stopped(adpt->netdev)) + if (emac_tpd_num_free_descs(tx_q) > (MAX_SKB_FRAGS + 1)) + netif_wake_queue(adpt->netdev); +} + +/* Initialize all queue data structures */ +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + adpt->rx_q.netdev = adpt->netdev; + + adpt->rx_q.produce_reg = EMAC_MAILBOX_0; + adpt->rx_q.produce_mask = RFD0_PROD_IDX_BMSK; + adpt->rx_q.produce_shift = RFD0_PROD_IDX_SHFT; + + adpt->rx_q.process_reg = EMAC_MAILBOX_0; + adpt->rx_q.process_mask = RFD0_PROC_IDX_BMSK; + adpt->rx_q.process_shft = RFD0_PROC_IDX_SHFT; + + adpt->rx_q.consume_reg = EMAC_MAILBOX_3; + adpt->rx_q.consume_mask = RFD0_CONS_IDX_BMSK; + adpt->rx_q.consume_shift = RFD0_CONS_IDX_SHFT; + + adpt->rx_q.irq = &adpt->irq; + adpt->rx_q.intr = adpt->irq.mask & ISR_RX_PKT; + + adpt->tx_q.produce_reg = EMAC_MAILBOX_15; + adpt->tx_q.produce_mask = NTPD_PROD_IDX_BMSK; + adpt->tx_q.produce_shift = NTPD_PROD_IDX_SHFT; + + adpt->tx_q.consume_reg = EMAC_MAILBOX_2; + adpt->tx_q.consume_mask = NTPD_CONS_IDX_BMSK; + adpt->tx_q.consume_shift = NTPD_CONS_IDX_SHFT; +} + +/* Fill up transmit descriptors with TSO and Checksum offload information */ +static int emac_tso_csum(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, + struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int hdr_len; + int ret; + + if (skb_is_gso(skb)) { + if (skb_header_cloned(skb)) { + ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (unlikely(ret)) + return ret; + } + + if (skb->protocol == htons(ETH_P_IP)) { + u32 pkt_len = ((unsigned char *)ip_hdr(skb) - skb->data) + + ntohs(ip_hdr(skb)->tot_len); + if (skb->len > pkt_len) + pskb_trim(skb, pkt_len); + } + + hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + if (unlikely(skb->len == hdr_len)) { + /* we only need to do csum */ + netif_warn(adpt, tx_err, adpt->netdev, + "tso not needed for packet with 0 data\n"); + goto do_csum; + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_IPV4_SET(tpd, 1); + } + + if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { + /* ipv6 tso need an extra tpd */ + struct emac_tpd extra_tpd; + + memset(tpd, 0, sizeof(*tpd)); + memset(&extra_tpd, 0, sizeof(extra_tpd)); + + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + TPD_PKT_LEN_SET(&extra_tpd, skb->len); + TPD_LSO_SET(&extra_tpd, 1); + TPD_LSOV_SET(&extra_tpd, 1); + emac_tx_tpd_create(adpt, tx_q, &extra_tpd); + TPD_LSOV_SET(tpd, 1); + } + + TPD_LSO_SET(tpd, 1); + TPD_TCPHDR_OFFSET_SET(tpd, skb_transport_offset(skb)); + TPD_MSS_SET(tpd, skb_shinfo(skb)->gso_size); + return 0; + } + +do_csum: + if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + unsigned int css, cso; + + cso = skb_transport_offset(skb); + if (unlikely(cso & 0x1)) { + netdev_err(adpt->netdev, + "error: payload offset should be even\n"); + return -EINVAL; + } + css = cso + skb->csum_offset; + + TPD_PAYLOAD_OFFSET_SET(tpd, cso >> 1); + TPD_CXSUM_OFFSET_SET(tpd, css >> 1); + TPD_CSX_SET(tpd, 1); + } + + return 0; +} + +/* Fill up transmit descriptors */ +static void emac_tx_fill_tpd(struct emac_adapter *adpt, + struct emac_tx_queue *tx_q, struct sk_buff *skb, + struct emac_tpd *tpd) +{ + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + unsigned int first = tx_q->tpd.produce_idx; + unsigned int len = skb_headlen(skb); + struct emac_buffer *tpbuf = NULL; + unsigned int mapped_len = 0; + unsigned int i; + int count = 0; + int ret; + + /* if Large Segment Offload is (in TCP Segmentation Offload struct) */ + if (TPD_LSO(tpd)) { + mapped_len = skb_transport_offset(skb) + tcp_hdrlen(skb); + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data, tpbuf->length, + DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + if (mapped_len < len) { + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = len - mapped_len; + tpbuf->dma_addr = dma_map_single(adpt->netdev->dev.parent, + skb->data + mapped_len, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + for (i = 0; i < nr_frags; i++) { + struct skb_frag_struct *frag; + + frag = &skb_shinfo(skb)->frags[i]; + + tpbuf = GET_TPD_BUFFER(tx_q, tx_q->tpd.produce_idx); + tpbuf->length = frag->size; + tpbuf->dma_addr = dma_map_page(adpt->netdev->dev.parent, + frag->page.p, frag->page_offset, + tpbuf->length, DMA_TO_DEVICE); + ret = dma_mapping_error(adpt->netdev->dev.parent, + tpbuf->dma_addr); + if (ret) + goto error; + + TPD_BUFFER_ADDR_L_SET(tpd, lower_32_bits(tpbuf->dma_addr)); + TPD_BUFFER_ADDR_H_SET(tpd, upper_32_bits(tpbuf->dma_addr)); + TPD_BUF_LEN_SET(tpd, tpbuf->length); + emac_tx_tpd_create(adpt, tx_q, tpd); + count++; + } + + /* The last tpd */ + wmb(); + emac_tx_tpd_mark_last(adpt, tx_q); + + /* The last buffer info contain the skb address, + * so it will be freed after unmap + */ + tpbuf->skb = skb; + + return; + +error: + /* One of the memory mappings failed, so undo everything */ + tx_q->tpd.produce_idx = first; + + while (count--) { + tpbuf = GET_TPD_BUFFER(tx_q, first); + dma_unmap_page(adpt->netdev->dev.parent, tpbuf->dma_addr, + tpbuf->length, DMA_TO_DEVICE); + tpbuf->dma_addr = 0; + tpbuf->length = 0; + + if (++first == tx_q->tpd.count) + first = 0; + } + + dev_kfree_skb(skb); +} + +/* Transmit the packet using specified transmit queue */ +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb) +{ + struct emac_tpd tpd; + u32 prod_idx; + + memset(&tpd, 0, sizeof(tpd)); + + if (emac_tso_csum(adpt, tx_q, skb, &tpd) != 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (skb_vlan_tag_present(skb)) { + u16 tag; + + EMAC_VLAN_TO_TAG(skb_vlan_tag_get(skb), tag); + TPD_CVLAN_TAG_SET(&tpd, tag); + TPD_INSTC_SET(&tpd, 1); + } + + if (skb_network_offset(skb) != ETH_HLEN) + TPD_TYP_SET(&tpd, 1); + + emac_tx_fill_tpd(adpt, tx_q, skb, &tpd); + + netdev_sent_queue(adpt->netdev, skb->len); + + /* Make sure the are enough free descriptors to hold one + * maximum-sized SKB. We need one desc for each fragment, + * one for the checksum (emac_tso_csum), one for TSO, and + * and one for the SKB header. + */ + if (emac_tpd_num_free_descs(tx_q) < (MAX_SKB_FRAGS + 3)) + netif_stop_queue(adpt->netdev); + + /* update produce idx */ + prod_idx = (tx_q->tpd.produce_idx << tx_q->produce_shift) & + tx_q->produce_mask; + emac_reg_update32(adpt->base + tx_q->produce_reg, + tx_q->produce_mask, prod_idx); + + return NETDEV_TX_OK; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h new file mode 100644 index 000000000000..f3aa24dc4a29 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h @@ -0,0 +1,248 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* EMAC DMA HW engine uses three rings: + * Tx: + * TPD: Transmit Packet Descriptor ring. + * Rx: + * RFD: Receive Free Descriptor ring. + * Ring of descriptors with empty buffers to be filled by Rx HW. + * RRD: Receive Return Descriptor ring. + * Ring of descriptors with buffers filled with received data. + */ + +#ifndef _EMAC_HW_H_ +#define _EMAC_HW_H_ + +/* EMAC_CSR register offsets */ +#define EMAC_EMAC_WRAPPER_CSR1 0x000000 +#define EMAC_EMAC_WRAPPER_CSR2 0x000004 +#define EMAC_EMAC_WRAPPER_TX_TS_LO 0x000104 +#define EMAC_EMAC_WRAPPER_TX_TS_HI 0x000108 +#define EMAC_EMAC_WRAPPER_TX_TS_INX 0x00010c + +/* DMA Order Settings */ +enum emac_dma_order { + emac_dma_ord_in = 1, + emac_dma_ord_enh = 2, + emac_dma_ord_out = 4 +}; + +enum emac_dma_req_block { + emac_dma_req_128 = 0, + emac_dma_req_256 = 1, + emac_dma_req_512 = 2, + emac_dma_req_1024 = 3, + emac_dma_req_2048 = 4, + emac_dma_req_4096 = 5 +}; + +/* Returns the value of bits idx...idx+n_bits */ +#define BITS_GET(val, lo, hi) ((le32_to_cpu(val) & GENMASK((hi), (lo))) >> lo) +#define BITS_SET(val, lo, hi, new_val) \ + val = cpu_to_le32((le32_to_cpu(val) & (~GENMASK((hi), (lo)))) | \ + (((new_val) << (lo)) & GENMASK((hi), (lo)))) + +/* RRD (Receive Return Descriptor) */ +struct emac_rrd { + u32 word[6]; + +/* number of RFD */ +#define RRD_NOR(rrd) BITS_GET((rrd)->word[0], 16, 19) +/* start consumer index of rfd-ring */ +#define RRD_SI(rrd) BITS_GET((rrd)->word[0], 20, 31) +/* vlan-tag (CVID, CFI and PRI) */ +#define RRD_CVALN_TAG(rrd) BITS_GET((rrd)->word[2], 0, 15) +/* length of the packet */ +#define RRD_PKT_SIZE(rrd) BITS_GET((rrd)->word[3], 0, 13) +/* L4(TCP/UDP) checksum failed */ +#define RRD_L4F(rrd) BITS_GET((rrd)->word[3], 14, 14) +/* vlan tagged */ +#define RRD_CVTAG(rrd) BITS_GET((rrd)->word[3], 16, 16) +/* When set, indicates that the descriptor is updated by the IP core. + * When cleared, indicates that the descriptor is invalid. + */ +#define RRD_UPDT(rrd) BITS_GET((rrd)->word[3], 31, 31) +#define RRD_UPDT_SET(rrd, val) BITS_SET((rrd)->word[3], 31, 31, val) +/* timestamp low */ +#define RRD_TS_LOW(rrd) BITS_GET((rrd)->word[4], 0, 29) +/* timestamp high */ +#define RRD_TS_HI(rrd) le32_to_cpu((rrd)->word[5]) +}; + +/* TPD (Transmit Packet Descriptor) */ +struct emac_tpd { + u32 word[4]; + +/* Number of bytes of the transmit packet. (include 4-byte CRC) */ +#define TPD_BUF_LEN_SET(tpd, val) BITS_SET((tpd)->word[0], 0, 15, val) +/* Custom Checksum Offload: When set, ask IP core to offload custom checksum */ +#define TPD_CSX_SET(tpd, val) BITS_SET((tpd)->word[1], 8, 8, val) +/* TCP Large Send Offload: When set, ask IP core to do offload TCP Large Send */ +#define TPD_LSO(tpd) BITS_GET((tpd)->word[1], 12, 12) +#define TPD_LSO_SET(tpd, val) BITS_SET((tpd)->word[1], 12, 12, val) +/* Large Send Offload Version: When set, indicates this is an LSOv2 + * (for both IPv4 and IPv6). When cleared, indicates this is an LSOv1 + * (only for IPv4). + */ +#define TPD_LSOV_SET(tpd, val) BITS_SET((tpd)->word[1], 13, 13, val) +/* IPv4 packet: When set, indicates this is an IPv4 packet, this bit is only + * for LSOV2 format. + */ +#define TPD_IPV4_SET(tpd, val) BITS_SET((tpd)->word[1], 16, 16, val) +/* 0: Ethernet frame (DA+SA+TYPE+DATA+CRC) + * 1: IEEE 802.3 frame (DA+SA+LEN+DSAP+SSAP+CTL+ORG+TYPE+DATA+CRC) + */ +#define TPD_TYP_SET(tpd, val) BITS_SET((tpd)->word[1], 17, 17, val) +/* Low-32bit Buffer Address */ +#define TPD_BUFFER_ADDR_L_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +/* CVLAN Tag to be inserted if INS_VLAN_TAG is set, CVLAN TPID based on global + * register configuration. + */ +#define TPD_CVLAN_TAG_SET(tpd, val) BITS_SET((tpd)->word[3], 0, 15, val) +/* Insert CVlan Tag: When set, ask MAC to insert CVLAN TAG to outgoing packet + */ +#define TPD_INSTC_SET(tpd, val) BITS_SET((tpd)->word[3], 17, 17, val) +/* High-14bit Buffer Address, So, the 64b-bit address is + * {DESC_CTRL_11_TX_DATA_HIADDR[17:0],(register) BUFFER_ADDR_H, BUFFER_ADDR_L} + */ +#define TPD_BUFFER_ADDR_H_SET(tpd, val) BITS_SET((tpd)->word[3], 18, 30, val) +/* Format D. Word offset from the 1st byte of this packet to start to calculate + * the custom checksum. + */ +#define TPD_PAYLOAD_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format D. Word offset from the 1st byte of this packet to fill the custom + * checksum to + */ +#define TPD_CXSUM_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 25, val) + +/* Format C. TCP Header offset from the 1st byte of this packet. (byte unit) */ +#define TPD_TCPHDR_OFFSET_SET(tpd, val) BITS_SET((tpd)->word[1], 0, 7, val) +/* Format C. MSS (Maximum Segment Size) got from the protocol layer. (byte unit) + */ +#define TPD_MSS_SET(tpd, val) BITS_SET((tpd)->word[1], 18, 30, val) +/* packet length in ext tpd */ +#define TPD_PKT_LEN_SET(tpd, val) ((tpd)->word[2] = cpu_to_le32(val)) +}; + +/* emac_ring_header represents a single, contiguous block of DMA space + * mapped for the three descriptor rings (tpd, rfd, rrd) + */ +struct emac_ring_header { + void *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + size_t used; +}; + +/* emac_buffer is wrapper around a pointer to a socket buffer + * so a DMA handle can be stored along with the skb + */ +struct emac_buffer { + struct sk_buff *skb; /* socket buffer */ + u16 length; /* rx buffer length */ + dma_addr_t dma_addr; /* dma address */ +}; + +/* receive free descriptor (rfd) ring */ +struct emac_rfd_ring { + struct emac_buffer *rfbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int process_idx; + unsigned int consume_idx; /* unused */ +}; + +/* Receive Return Desciptor (RRD) ring */ +struct emac_rrd_ring { + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* physical address */ + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; /* unused */ + unsigned int consume_idx; +}; + +/* Rx queue */ +struct emac_rx_queue { + struct net_device *netdev; /* netdev ring belongs to */ + struct emac_rrd_ring rrd; + struct emac_rfd_ring rfd; + struct napi_struct napi; + struct emac_irq *irq; + + u32 intr; + u32 produce_mask; + u32 process_mask; + u32 consume_mask; + + u16 produce_reg; + u16 process_reg; + u16 consume_reg; + + u8 produce_shift; + u8 process_shft; + u8 consume_shift; +}; + +/* Transimit Packet Descriptor (tpd) ring */ +struct emac_tpd_ring { + struct emac_buffer *tpbuff; + u32 *v_addr; /* virtual address */ + dma_addr_t dma_addr; /* dma address */ + + size_t size; /* length in bytes */ + unsigned int count; /* number of desc in the ring */ + unsigned int produce_idx; + unsigned int consume_idx; + unsigned int last_produce_idx; +}; + +/* Tx queue */ +struct emac_tx_queue { + struct emac_tpd_ring tpd; + + u32 produce_mask; + u32 consume_mask; + + u16 max_packets; /* max packets per interrupt */ + u16 produce_reg; + u16 consume_reg; + + u8 produce_shift; + u8 consume_shift; +}; + +struct emac_adapter; + +int emac_mac_up(struct emac_adapter *adpt); +void emac_mac_down(struct emac_adapter *adpt); +void emac_mac_reset(struct emac_adapter *adpt); +void emac_mac_start(struct emac_adapter *adpt); +void emac_mac_stop(struct emac_adapter *adpt); +void emac_mac_mode_config(struct emac_adapter *adpt); +void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q, + int *num_pkts, int max_pkts); +int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q, + struct sk_buff *skb); +void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q); +void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev, + struct emac_adapter *adpt); +int emac_mac_rx_tx_rings_alloc_all(struct emac_adapter *adpt); +void emac_mac_rx_tx_rings_free_all(struct emac_adapter *adpt); +void emac_mac_multicast_addr_clear(struct emac_adapter *adpt); +void emac_mac_multicast_addr_set(struct emac_adapter *adpt, u8 *addr); + +#endif /*_EMAC_HW_H_*/ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.c b/drivers/net/ethernet/qualcomm/emac/emac-phy.c new file mode 100644 index 000000000000..da4e90db4d98 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.c @@ -0,0 +1,227 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC PHY Controller driver. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +/* EMAC base register offsets */ +#define EMAC_MDIO_CTRL 0x001414 +#define EMAC_PHY_STS 0x001418 +#define EMAC_MDIO_EX_CTRL 0x001440 + +/* EMAC_MDIO_CTRL */ +#define MDIO_MODE BIT(30) +#define MDIO_PR BIT(29) +#define MDIO_AP_EN BIT(28) +#define MDIO_BUSY BIT(27) +#define MDIO_CLK_SEL_BMSK 0x7000000 +#define MDIO_CLK_SEL_SHFT 24 +#define MDIO_START BIT(23) +#define SUP_PREAMBLE BIT(22) +#define MDIO_RD_NWR BIT(21) +#define MDIO_REG_ADDR_BMSK 0x1f0000 +#define MDIO_REG_ADDR_SHFT 16 +#define MDIO_DATA_BMSK 0xffff +#define MDIO_DATA_SHFT 0 + +/* EMAC_PHY_STS */ +#define PHY_ADDR_BMSK 0x1f0000 +#define PHY_ADDR_SHFT 16 + +#define MDIO_CLK_25_4 0 +#define MDIO_CLK_25_28 7 + +#define MDIO_WAIT_TIMES 1000 + +#define EMAC_LINK_SPEED_DEFAULT (\ + EMAC_LINK_SPEED_10_HALF |\ + EMAC_LINK_SPEED_10_FULL |\ + EMAC_LINK_SPEED_100_HALF |\ + EMAC_LINK_SPEED_100_FULL |\ + EMAC_LINK_SPEED_1GB_FULL) + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The autopoll feature takes over the MDIO bus. In order for + * the PHY driver to be able to talk to the PHY over the MDIO + * bus, we need to temporarily disable the autopoll feature. + */ +static int emac_phy_mdio_autopoll_disable(struct emac_adapter *adpt) +{ + u32 val; + + /* disable autopoll */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, MDIO_AP_EN, 0); + + /* wait for any mdio polling to complete */ + if (!readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, val, + !(val & MDIO_BUSY), 100, MDIO_WAIT_TIMES * 100)) + return 0; + + /* failed to disable; ensure it is enabled before returning */ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); + + return -EBUSY; +} + +/** + * emac_phy_mdio_autopoll_disable() - disable mdio autopoll + * @adpt: the emac adapter + * + * The EMAC has the ability to poll the external PHY on the MDIO + * bus for link state changes. This eliminates the need for the + * driver to poll the phy. If if the link state does change, + * the EMAC issues an interrupt on behalf of the PHY. + */ +static void emac_phy_mdio_autopoll_enable(struct emac_adapter *adpt) +{ + emac_reg_update32(adpt->base + EMAC_MDIO_CTRL, 0, MDIO_AP_EN); +} + +static int emac_mdio_read(struct mii_bus *bus, int addr, int regnum) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + MDIO_START | MDIO_RD_NWR; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), + 100, MDIO_WAIT_TIMES * 100)) + ret = -EIO; + else + ret = (reg >> MDIO_DATA_SHFT) & MDIO_DATA_BMSK; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +static int emac_mdio_write(struct mii_bus *bus, int addr, int regnum, u16 val) +{ + struct emac_adapter *adpt = bus->priv; + u32 reg; + int ret; + + ret = emac_phy_mdio_autopoll_disable(adpt); + if (ret) + return ret; + + emac_reg_update32(adpt->base + EMAC_PHY_STS, PHY_ADDR_BMSK, + (addr << PHY_ADDR_SHFT)); + + reg = SUP_PREAMBLE | + ((MDIO_CLK_25_4 << MDIO_CLK_SEL_SHFT) & MDIO_CLK_SEL_BMSK) | + ((regnum << MDIO_REG_ADDR_SHFT) & MDIO_REG_ADDR_BMSK) | + ((val << MDIO_DATA_SHFT) & MDIO_DATA_BMSK) | + MDIO_START; + + writel(reg, adpt->base + EMAC_MDIO_CTRL); + + if (readl_poll_timeout(adpt->base + EMAC_MDIO_CTRL, reg, + !(reg & (MDIO_START | MDIO_BUSY)), 100, + MDIO_WAIT_TIMES * 100)) + ret = -EIO; + + emac_phy_mdio_autopoll_enable(adpt); + + return ret; +} + +/* Configure the MDIO bus and connect the external PHY */ +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct device_node *np = pdev->dev.of_node; + struct mii_bus *mii_bus; + int ret; + + /* Create the mii_bus object for talking to the MDIO bus */ + adpt->mii_bus = mii_bus = devm_mdiobus_alloc(&pdev->dev); + if (!mii_bus) + return -ENOMEM; + + mii_bus->name = "emac-mdio"; + snprintf(mii_bus->id, MII_BUS_ID_SIZE, "%s", pdev->name); + mii_bus->read = emac_mdio_read; + mii_bus->write = emac_mdio_write; + mii_bus->parent = &pdev->dev; + mii_bus->priv = adpt; + + if (has_acpi_companion(&pdev->dev)) { + u32 phy_addr; + + ret = mdiobus_register(mii_bus); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + ret = device_property_read_u32(&pdev->dev, "phy-channel", + &phy_addr); + if (ret) + /* If we can't read a valid phy address, then assume + * that there is only one phy on this mdio bus. + */ + adpt->phydev = phy_find_first(mii_bus); + else + adpt->phydev = mdiobus_get_phy(mii_bus, phy_addr); + + } else { + struct device_node *phy_np; + + ret = of_mdiobus_register(mii_bus, np); + if (ret) { + dev_err(&pdev->dev, "could not register mdio bus\n"); + return ret; + } + + phy_np = of_parse_phandle(np, "phy-handle", 0); + adpt->phydev = of_phy_find_device(phy_np); + } + + if (!adpt->phydev) { + dev_err(&pdev->dev, "could not find external phy\n"); + mdiobus_unregister(mii_bus); + return -ENODEV; + } + + if (adpt->phydev->drv) + phy_attached_print(adpt->phydev, NULL); + + return 0; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-phy.h b/drivers/net/ethernet/qualcomm/emac/emac-phy.h new file mode 100644 index 000000000000..49f3701a6dd7 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-phy.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. +* +* This program is free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License version 2 and +* only version 2 as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +*/ + +#ifndef _EMAC_PHY_H_ +#define _EMAC_PHY_H_ + +typedef int (*emac_sgmii_initialize)(struct emac_adapter *adpt); + +/** emac_phy - internal emac phy + * @base base address + * @digital per-lane digital block + * @initialize initialization function + */ +struct emac_phy { + void __iomem *base; + void __iomem *digital; + emac_sgmii_initialize initialize; +}; + +struct emac_adapter; + +int emac_phy_config(struct platform_device *pdev, struct emac_adapter *adpt); + +#endif /* _EMAC_PHY_H_ */ diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c new file mode 100644 index 000000000000..75c1b530e39e --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.c @@ -0,0 +1,784 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC SGMII Controller driver. + */ + +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-sgmii.h" + +/* EMAC_QSERDES register offsets */ +#define EMAC_QSERDES_COM_SYS_CLK_CTRL 0x000000 +#define EMAC_QSERDES_COM_PLL_CNTRL 0x000014 +#define EMAC_QSERDES_COM_PLL_IP_SETI 0x000018 +#define EMAC_QSERDES_COM_PLL_CP_SETI 0x000024 +#define EMAC_QSERDES_COM_PLL_IP_SETP 0x000028 +#define EMAC_QSERDES_COM_PLL_CP_SETP 0x00002c +#define EMAC_QSERDES_COM_SYSCLK_EN_SEL 0x000038 +#define EMAC_QSERDES_COM_RESETSM_CNTRL 0x000040 +#define EMAC_QSERDES_COM_PLLLOCK_CMP1 0x000044 +#define EMAC_QSERDES_COM_PLLLOCK_CMP2 0x000048 +#define EMAC_QSERDES_COM_PLLLOCK_CMP3 0x00004c +#define EMAC_QSERDES_COM_PLLLOCK_CMP_EN 0x000050 +#define EMAC_QSERDES_COM_DEC_START1 0x000064 +#define EMAC_QSERDES_COM_DIV_FRAC_START1 0x000098 +#define EMAC_QSERDES_COM_DIV_FRAC_START2 0x00009c +#define EMAC_QSERDES_COM_DIV_FRAC_START3 0x0000a0 +#define EMAC_QSERDES_COM_DEC_START2 0x0000a4 +#define EMAC_QSERDES_COM_PLL_CRCTRL 0x0000ac +#define EMAC_QSERDES_COM_RESET_SM 0x0000bc +#define EMAC_QSERDES_TX_BIST_MODE_LANENO 0x000100 +#define EMAC_QSERDES_TX_TX_EMP_POST1_LVL 0x000108 +#define EMAC_QSERDES_TX_TX_DRV_LVL 0x00010c +#define EMAC_QSERDES_TX_LANE_MODE 0x000150 +#define EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN 0x000170 +#define EMAC_QSERDES_RX_CDR_CONTROL 0x000200 +#define EMAC_QSERDES_RX_CDR_CONTROL2 0x000210 +#define EMAC_QSERDES_RX_RX_EQ_GAIN12 0x000230 + +/* EMAC_SGMII register offsets */ +#define EMAC_SGMII_PHY_SERDES_START 0x000000 +#define EMAC_SGMII_PHY_CMN_PWR_CTRL 0x000004 +#define EMAC_SGMII_PHY_RX_PWR_CTRL 0x000008 +#define EMAC_SGMII_PHY_TX_PWR_CTRL 0x00000C +#define EMAC_SGMII_PHY_LANE_CTRL1 0x000018 +#define EMAC_SGMII_PHY_AUTONEG_CFG2 0x000048 +#define EMAC_SGMII_PHY_CDR_CTRL0 0x000058 +#define EMAC_SGMII_PHY_SPEED_CFG1 0x000074 +#define EMAC_SGMII_PHY_POW_DWN_CTRL0 0x000080 +#define EMAC_SGMII_PHY_RESET_CTRL 0x0000a8 +#define EMAC_SGMII_PHY_IRQ_CMD 0x0000ac +#define EMAC_SGMII_PHY_INTERRUPT_CLEAR 0x0000b0 +#define EMAC_SGMII_PHY_INTERRUPT_MASK 0x0000b4 +#define EMAC_SGMII_PHY_INTERRUPT_STATUS 0x0000b8 +#define EMAC_SGMII_PHY_RX_CHK_STATUS 0x0000d4 +#define EMAC_SGMII_PHY_AUTONEG0_STATUS 0x0000e0 +#define EMAC_SGMII_PHY_AUTONEG1_STATUS 0x0000e4 + +/* EMAC_QSERDES_COM_PLL_IP_SETI */ +#define PLL_IPSETI(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETI */ +#define PLL_CPSETI(x) ((x) & 0xff) + +/* EMAC_QSERDES_COM_PLL_IP_SETP */ +#define PLL_IPSETP(x) ((x) & 0x3f) + +/* EMAC_QSERDES_COM_PLL_CP_SETP */ +#define PLL_CPSETP(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLL_CRCTRL */ +#define PLL_RCTRL(x) (((x) & 0xf) << 4) +#define PLL_CCTRL(x) ((x) & 0xf) + +/* SGMII v2 PHY registers per lane */ +#define EMAC_SGMII_PHY_LN_OFFSET 0x0400 + +/* SGMII v2 digital lane registers */ +#define EMAC_SGMII_LN_DRVR_CTRL0 0x00C +#define EMAC_SGMII_LN_DRVR_TAP_EN 0x018 +#define EMAC_SGMII_LN_TX_MARGINING 0x01C +#define EMAC_SGMII_LN_TX_PRE 0x020 +#define EMAC_SGMII_LN_TX_POST 0x024 +#define EMAC_SGMII_LN_TX_BAND_MODE 0x060 +#define EMAC_SGMII_LN_LANE_MODE 0x064 +#define EMAC_SGMII_LN_PARALLEL_RATE 0x078 +#define EMAC_SGMII_LN_CML_CTRL_MODE0 0x0B8 +#define EMAC_SGMII_LN_MIXER_CTRL_MODE0 0x0D0 +#define EMAC_SGMII_LN_VGA_INITVAL 0x134 +#define EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0 0x17C +#define EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0 0x188 +#define EMAC_SGMII_LN_UCDR_SO_CONFIG 0x194 +#define EMAC_SGMII_LN_RX_BAND 0x19C +#define EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0 0x1B8 +#define EMAC_SGMII_LN_RSM_CONFIG 0x1F0 +#define EMAC_SGMII_LN_SIGDET_ENABLES 0x224 +#define EMAC_SGMII_LN_SIGDET_CNTRL 0x228 +#define EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL 0x22C +#define EMAC_SGMII_LN_RX_EN_SIGNAL 0x2A0 +#define EMAC_SGMII_LN_RX_MISC_CNTRL0 0x2AC +#define EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV 0x2BC + +/* SGMII v2 digital lane register values */ +#define UCDR_STEP_BY_TWO_MODE0 BIT(7) +#define UCDR_xO_GAIN_MODE(x) ((x) & 0x7f) +#define UCDR_ENABLE BIT(6) +#define UCDR_SO_SATURATION(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS4 BIT(7) +#define SIGDET_EN_PS0_TO_PS2 BIT(6) +#define EN_ACCOUPLEVCM_SW_MUX BIT(5) +#define EN_ACCOUPLEVCM_SW BIT(4) +#define RX_SYNC_EN BIT(3) +#define RXTERM_HIGHZ_PS5 BIT(2) +#define SIGDET_EN_PS3 BIT(1) +#define EN_ACCOUPLE_VCM_PS3 BIT(0) +#define UFS_MODE BIT(5) +#define TXVAL_VALID_INIT BIT(4) +#define TXVAL_VALID_MUX BIT(3) +#define TXVAL_VALID BIT(2) +#define USB3P1_MODE BIT(1) +#define KR_PCIGEN3_MODE BIT(0) +#define PRE_EN BIT(3) +#define POST_EN BIT(2) +#define MAIN_EN_MUX BIT(1) +#define MAIN_EN BIT(0) +#define TX_MARGINING_MUX BIT(6) +#define TX_MARGINING(x) ((x) & 0x3f) +#define TX_PRE_MUX BIT(6) +#define TX_PRE(x) ((x) & 0x3f) +#define TX_POST_MUX BIT(6) +#define TX_POST(x) ((x) & 0x3f) +#define CML_GEAR_MODE(x) (((x) & 7) << 3) +#define CML2CMOS_IBOOST_MODE(x) ((x) & 7) +#define MIXER_LOADB_MODE(x) (((x) & 0xf) << 2) +#define MIXER_DATARATE_MODE(x) ((x) & 3) +#define VGA_THRESH_DFE(x) ((x) & 0x3f) +#define SIGDET_LP_BYP_PS0_TO_PS2 BIT(5) +#define SIGDET_LP_BYP_MUX BIT(4) +#define SIGDET_LP_BYP BIT(3) +#define SIGDET_EN_MUX BIT(2) +#define SIGDET_EN BIT(1) +#define SIGDET_FLT_BYP BIT(0) +#define SIGDET_LVL(x) (((x) & 0xf) << 4) +#define SIGDET_BW_CTRL(x) ((x) & 0xf) +#define SIGDET_DEGLITCH_CTRL(x) (((x) & 0xf) << 1) +#define SIGDET_DEGLITCH_BYP BIT(0) +#define INVERT_PCS_RX_CLK BIT(7) +#define PWM_EN BIT(6) +#define RXBIAS_SEL(x) (((x) & 0x3) << 4) +#define EBDAC_SIGN BIT(3) +#define EDAC_SIGN BIT(2) +#define EN_AUXTAP1SIGN_INVERT BIT(1) +#define EN_DAC_CHOPPING BIT(0) +#define DRVR_LOGIC_CLK_EN BIT(4) +#define DRVR_LOGIC_CLK_DIV(x) ((x) & 0xf) +#define PARALLEL_RATE_MODE2(x) (((x) & 0x3) << 4) +#define PARALLEL_RATE_MODE1(x) (((x) & 0x3) << 2) +#define PARALLEL_RATE_MODE0(x) ((x) & 0x3) +#define BAND_MODE2(x) (((x) & 0x3) << 4) +#define BAND_MODE1(x) (((x) & 0x3) << 2) +#define BAND_MODE0(x) ((x) & 0x3) +#define LANE_SYNC_MODE BIT(5) +#define LANE_MODE(x) ((x) & 0x1f) +#define CDR_PD_SEL_MODE0(x) (((x) & 0x3) << 5) +#define EN_DLL_MODE0 BIT(4) +#define EN_IQ_DCC_MODE0 BIT(3) +#define EN_IQCAL_MODE0 BIT(2) +#define EN_QPATH_MODE0 BIT(1) +#define EN_EPATH_MODE0 BIT(0) +#define FORCE_TSYNC_ACK BIT(7) +#define FORCE_CMN_ACK BIT(6) +#define FORCE_CMN_READY BIT(5) +#define EN_RCLK_DEGLITCH BIT(4) +#define BYPASS_RSM_CDR_RESET BIT(3) +#define BYPASS_RSM_TSYNC BIT(2) +#define BYPASS_RSM_SAMP_CAL BIT(1) +#define BYPASS_RSM_DLL_CAL BIT(0) + +/* EMAC_QSERDES_COM_SYS_CLK_CTRL */ +#define SYSCLK_CM BIT(4) +#define SYSCLK_AC_COUPLE BIT(3) + +/* EMAC_QSERDES_COM_PLL_CNTRL */ +#define OCP_EN BIT(5) +#define PLL_DIV_FFEN BIT(2) +#define PLL_DIV_ORD BIT(1) + +/* EMAC_QSERDES_COM_SYSCLK_EN_SEL */ +#define SYSCLK_SEL_CMOS BIT(3) + +/* EMAC_QSERDES_COM_RESETSM_CNTRL */ +#define FRQ_TUNE_MODE BIT(4) + +/* EMAC_QSERDES_COM_PLLLOCK_CMP_EN */ +#define PLLLOCK_CMP_EN BIT(0) + +/* EMAC_QSERDES_COM_DEC_START1 */ +#define DEC_START1_MUX BIT(7) +#define DEC_START1(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START1 * EMAC_QSERDES_COM_DIV_FRAC_START2 */ +#define DIV_FRAC_START_MUX BIT(7) +#define DIV_FRAC_START(x) ((x) & 0x7f) + +/* EMAC_QSERDES_COM_DIV_FRAC_START3 */ +#define DIV_FRAC_START3_MUX BIT(4) +#define DIV_FRAC_START3(x) ((x) & 0xf) + +/* EMAC_QSERDES_COM_DEC_START2 */ +#define DEC_START2_MUX BIT(1) +#define DEC_START2 BIT(0) + +/* EMAC_QSERDES_COM_RESET_SM */ +#define READY BIT(5) + +/* EMAC_QSERDES_TX_TX_EMP_POST1_LVL */ +#define TX_EMP_POST1_LVL_MUX BIT(5) +#define TX_EMP_POST1_LVL(x) ((x) & 0x1f) +#define TX_EMP_POST1_LVL_BMSK 0x1f +#define TX_EMP_POST1_LVL_SHFT 0 + +/* EMAC_QSERDES_TX_TX_DRV_LVL */ +#define TX_DRV_LVL_MUX BIT(4) +#define TX_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN */ +#define EMP_EN_MUX BIT(1) +#define EMP_EN BIT(0) + +/* EMAC_QSERDES_RX_CDR_CONTROL & EMAC_QSERDES_RX_CDR_CONTROL2 */ +#define HBW_PD_EN BIT(7) +#define SECONDORDERENABLE BIT(6) +#define FIRSTORDER_THRESH(x) (((x) & 0x7) << 3) +#define SECONDORDERGAIN(x) ((x) & 0x7) + +/* EMAC_QSERDES_RX_RX_EQ_GAIN12 */ +#define RX_EQ_GAIN2(x) (((x) & 0xf) << 4) +#define RX_EQ_GAIN1(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_SERDES_START */ +#define SERDES_START BIT(0) + +/* EMAC_SGMII_PHY_CMN_PWR_CTRL */ +#define BIAS_EN BIT(6) +#define PLL_EN BIT(5) +#define SYSCLK_EN BIT(4) +#define CLKBUF_L_EN BIT(3) +#define PLL_TXCLK_EN BIT(1) +#define PLL_RXCLK_EN BIT(0) + +/* EMAC_SGMII_PHY_RX_PWR_CTRL */ +#define L0_RX_SIGDET_EN BIT(7) +#define L0_RX_TERM_MODE(x) (((x) & 3) << 4) +#define L0_RX_I_EN BIT(1) + +/* EMAC_SGMII_PHY_TX_PWR_CTRL */ +#define L0_TX_EN BIT(5) +#define L0_CLKBUF_EN BIT(4) +#define L0_TRAN_BIAS_EN BIT(1) + +/* EMAC_SGMII_PHY_LANE_CTRL1 */ +#define L0_RX_EQUALIZE_ENABLE BIT(6) +#define L0_RESET_TSYNC_EN BIT(4) +#define L0_DRV_LVL(x) ((x) & 0xf) + +/* EMAC_SGMII_PHY_AUTONEG_CFG2 */ +#define FORCE_AN_TX_CFG BIT(5) +#define FORCE_AN_RX_CFG BIT(4) +#define AN_ENABLE BIT(0) + +/* EMAC_SGMII_PHY_SPEED_CFG1 */ +#define DUPLEX_MODE BIT(4) +#define SPDMODE_1000 BIT(1) +#define SPDMODE_100 BIT(0) +#define SPDMODE_10 0 +#define SPDMODE_BMSK 3 +#define SPDMODE_SHFT 0 + +/* EMAC_SGMII_PHY_POW_DWN_CTRL0 */ +#define PWRDN_B BIT(0) +#define CDR_MAX_CNT(x) ((x) & 0xff) + +/* EMAC_QSERDES_TX_BIST_MODE_LANENO */ +#define BIST_LANE_NUMBER(x) (((x) & 3) << 5) +#define BISTMODE(x) ((x) & 0x1f) + +/* EMAC_QSERDES_COM_PLLLOCK_CMPx */ +#define PLLLOCK_CMP(x) ((x) & 0xff) + +/* EMAC_SGMII_PHY_RESET_CTRL */ +#define PHY_SW_RESET BIT(0) + +/* EMAC_SGMII_PHY_IRQ_CMD */ +#define IRQ_GLOBAL_CLEAR BIT(0) + +/* EMAC_SGMII_PHY_INTERRUPT_MASK */ +#define DECODE_CODE_ERR BIT(7) +#define DECODE_DISP_ERR BIT(6) +#define PLL_UNLOCK BIT(5) +#define AN_ILLEGAL_TERM BIT(4) +#define SYNC_FAIL BIT(3) +#define AN_START BIT(2) +#define AN_END BIT(1) +#define AN_REQUEST BIT(0) + +#define SGMII_PHY_IRQ_CLR_WAIT_TIME 10 + +#define SGMII_PHY_INTERRUPT_ERR (\ + DECODE_CODE_ERR |\ + DECODE_DISP_ERR) + +#define SGMII_ISR_AN_MASK (\ + AN_REQUEST |\ + AN_START |\ + AN_END |\ + AN_ILLEGAL_TERM |\ + PLL_UNLOCK |\ + SYNC_FAIL) + +#define SGMII_ISR_MASK (\ + SGMII_PHY_INTERRUPT_ERR |\ + SGMII_ISR_AN_MASK) + +/* SGMII TX_CONFIG */ +#define TXCFG_LINK 0x8000 +#define TXCFG_MODE_BMSK 0x1c00 +#define TXCFG_1000_FULL 0x1800 +#define TXCFG_100_FULL 0x1400 +#define TXCFG_100_HALF 0x0400 +#define TXCFG_10_FULL 0x1000 +#define TXCFG_10_HALF 0x0000 + +#define SERDES_START_WAIT_TIMES 100 + +struct emac_reg_write { + unsigned int offset; + u32 val; +}; + +static void emac_reg_write_all(void __iomem *base, + const struct emac_reg_write *itr, size_t size) +{ + size_t i; + + for (i = 0; i < size; ++itr, ++i) + writel(itr->val, base + itr->offset); +} + +static const struct emac_reg_write physical_coding_sublayer_programming_v1[] = { + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, L0_TX_EN | L0_CLKBUF_EN | L0_TRAN_BIAS_EN}, + {EMAC_SGMII_PHY_RX_PWR_CTRL, + L0_RX_SIGDET_EN | L0_RX_TERM_MODE(1) | L0_RX_I_EN}, + {EMAC_SGMII_PHY_CMN_PWR_CTRL, + BIAS_EN | PLL_EN | SYSCLK_EN | CLKBUF_L_EN | PLL_TXCLK_EN | + PLL_RXCLK_EN}, + {EMAC_SGMII_PHY_LANE_CTRL1, + L0_RX_EQUALIZE_ENABLE | L0_RESET_TSYNC_EN | L0_DRV_LVL(15)}, +}; + +static const struct emac_reg_write sysclk_refclk_setting[] = { + {EMAC_QSERDES_COM_SYSCLK_EN_SEL, SYSCLK_SEL_CMOS}, + {EMAC_QSERDES_COM_SYS_CLK_CTRL, SYSCLK_CM | SYSCLK_AC_COUPLE}, +}; + +static const struct emac_reg_write pll_setting[] = { + {EMAC_QSERDES_COM_PLL_IP_SETI, PLL_IPSETI(1)}, + {EMAC_QSERDES_COM_PLL_CP_SETI, PLL_CPSETI(59)}, + {EMAC_QSERDES_COM_PLL_IP_SETP, PLL_IPSETP(10)}, + {EMAC_QSERDES_COM_PLL_CP_SETP, PLL_CPSETP(9)}, + {EMAC_QSERDES_COM_PLL_CRCTRL, PLL_RCTRL(15) | PLL_CCTRL(11)}, + {EMAC_QSERDES_COM_PLL_CNTRL, OCP_EN | PLL_DIV_FFEN | PLL_DIV_ORD}, + {EMAC_QSERDES_COM_DEC_START1, DEC_START1_MUX | DEC_START1(2)}, + {EMAC_QSERDES_COM_DEC_START2, DEC_START2_MUX | DEC_START2}, + {EMAC_QSERDES_COM_DIV_FRAC_START1, + DIV_FRAC_START_MUX | DIV_FRAC_START(85)}, + {EMAC_QSERDES_COM_DIV_FRAC_START2, + DIV_FRAC_START_MUX | DIV_FRAC_START(42)}, + {EMAC_QSERDES_COM_DIV_FRAC_START3, + DIV_FRAC_START3_MUX | DIV_FRAC_START3(3)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP1, PLLLOCK_CMP(43)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP2, PLLLOCK_CMP(104)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP3, PLLLOCK_CMP(0)}, + {EMAC_QSERDES_COM_PLLLOCK_CMP_EN, PLLLOCK_CMP_EN}, + {EMAC_QSERDES_COM_RESETSM_CNTRL, FRQ_TUNE_MODE}, +}; + +static const struct emac_reg_write cdr_setting[] = { + {EMAC_QSERDES_RX_CDR_CONTROL, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(2)}, + {EMAC_QSERDES_RX_CDR_CONTROL2, + SECONDORDERENABLE | FIRSTORDER_THRESH(3) | SECONDORDERGAIN(4)}, +}; + +static const struct emac_reg_write tx_rx_setting[] = { + {EMAC_QSERDES_TX_BIST_MODE_LANENO, 0}, + {EMAC_QSERDES_TX_TX_DRV_LVL, TX_DRV_LVL_MUX | TX_DRV_LVL(15)}, + {EMAC_QSERDES_TX_TRAN_DRVR_EMP_EN, EMP_EN_MUX | EMP_EN}, + {EMAC_QSERDES_TX_TX_EMP_POST1_LVL, + TX_EMP_POST1_LVL_MUX | TX_EMP_POST1_LVL(1)}, + {EMAC_QSERDES_RX_RX_EQ_GAIN12, RX_EQ_GAIN2(15) | RX_EQ_GAIN1(15)}, + {EMAC_QSERDES_TX_LANE_MODE, LANE_MODE(8)}, +}; + +static const struct emac_reg_write sgmii_v2_laned[] = { + /* CDR Settings */ + {EMAC_SGMII_LN_UCDR_FO_GAIN_MODE0, + UCDR_STEP_BY_TWO_MODE0 | UCDR_xO_GAIN_MODE(10)}, + {EMAC_SGMII_LN_UCDR_SO_GAIN_MODE0, UCDR_xO_GAIN_MODE(6)}, + {EMAC_SGMII_LN_UCDR_SO_CONFIG, UCDR_ENABLE | UCDR_SO_SATURATION(12)}, + + /* TX/RX Settings */ + {EMAC_SGMII_LN_RX_EN_SIGNAL, SIGDET_LP_BYP_PS4 | SIGDET_EN_PS0_TO_PS2}, + + {EMAC_SGMII_LN_DRVR_CTRL0, TXVAL_VALID_INIT | KR_PCIGEN3_MODE}, + {EMAC_SGMII_LN_DRVR_TAP_EN, MAIN_EN}, + {EMAC_SGMII_LN_TX_MARGINING, TX_MARGINING_MUX | TX_MARGINING(25)}, + {EMAC_SGMII_LN_TX_PRE, TX_PRE_MUX}, + {EMAC_SGMII_LN_TX_POST, TX_POST_MUX}, + + {EMAC_SGMII_LN_CML_CTRL_MODE0, + CML_GEAR_MODE(1) | CML2CMOS_IBOOST_MODE(1)}, + {EMAC_SGMII_LN_MIXER_CTRL_MODE0, + MIXER_LOADB_MODE(12) | MIXER_DATARATE_MODE(1)}, + {EMAC_SGMII_LN_VGA_INITVAL, VGA_THRESH_DFE(31)}, + {EMAC_SGMII_LN_SIGDET_ENABLES, + SIGDET_LP_BYP_PS0_TO_PS2 | SIGDET_FLT_BYP}, + {EMAC_SGMII_LN_SIGDET_CNTRL, SIGDET_LVL(8)}, + + {EMAC_SGMII_LN_SIGDET_DEGLITCH_CNTRL, SIGDET_DEGLITCH_CTRL(4)}, + {EMAC_SGMII_LN_RX_MISC_CNTRL0, 0}, + {EMAC_SGMII_LN_DRVR_LOGIC_CLKDIV, + DRVR_LOGIC_CLK_EN | DRVR_LOGIC_CLK_DIV(4)}, + + {EMAC_SGMII_LN_PARALLEL_RATE, PARALLEL_RATE_MODE0(1)}, + {EMAC_SGMII_LN_TX_BAND_MODE, BAND_MODE0(2)}, + {EMAC_SGMII_LN_RX_BAND, BAND_MODE0(3)}, + {EMAC_SGMII_LN_LANE_MODE, LANE_MODE(26)}, + {EMAC_SGMII_LN_RX_RCVR_PATH1_MODE0, CDR_PD_SEL_MODE0(3)}, + {EMAC_SGMII_LN_RSM_CONFIG, BYPASS_RSM_SAMP_CAL | BYPASS_RSM_DLL_CAL}, +}; + +static const struct emac_reg_write physical_coding_sublayer_programming_v2[] = { + {EMAC_SGMII_PHY_POW_DWN_CTRL0, PWRDN_B}, + {EMAC_SGMII_PHY_CDR_CTRL0, CDR_MAX_CNT(15)}, + {EMAC_SGMII_PHY_TX_PWR_CTRL, 0}, + {EMAC_SGMII_PHY_LANE_CTRL1, L0_RX_EQUALIZE_ENABLE}, +}; + +static int emac_sgmii_link_init(struct emac_adapter *adpt) +{ + struct phy_device *phydev = adpt->phydev; + struct emac_phy *phy = &adpt->phy; + u32 val; + + val = readl(phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + + if (phydev->autoneg == AUTONEG_ENABLE) { + val &= ~(FORCE_AN_RX_CFG | FORCE_AN_TX_CFG); + val |= AN_ENABLE; + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } else { + u32 speed_cfg; + + switch (phydev->speed) { + case SPEED_10: + speed_cfg = SPDMODE_10; + break; + case SPEED_100: + speed_cfg = SPDMODE_100; + break; + case SPEED_1000: + speed_cfg = SPDMODE_1000; + break; + default: + return -EINVAL; + } + + if (phydev->duplex == DUPLEX_FULL) + speed_cfg |= DUPLEX_MODE; + + val &= ~AN_ENABLE; + writel(speed_cfg, phy->base + EMAC_SGMII_PHY_SPEED_CFG1); + writel(val, phy->base + EMAC_SGMII_PHY_AUTONEG_CFG2); + } + + return 0; +} + +static int emac_sgmii_irq_clear(struct emac_adapter *adpt, u32 irq_bits) +{ + struct emac_phy *phy = &adpt->phy; + u32 status; + + writel_relaxed(irq_bits, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + writel_relaxed(IRQ_GLOBAL_CLEAR, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + /* Ensure interrupt clear command is written to HW */ + wmb(); + + /* After set the IRQ_GLOBAL_CLEAR bit, the status clearing must + * be confirmed before clearing the bits in other registers. + * It takes a few cycles for hw to clear the interrupt status. + */ + if (readl_poll_timeout_atomic(phy->base + + EMAC_SGMII_PHY_INTERRUPT_STATUS, + status, !(status & irq_bits), 1, + SGMII_PHY_IRQ_CLR_WAIT_TIME)) { + netdev_err(adpt->netdev, + "error: failed clear SGMII irq: status:0x%x bits:0x%x\n", + status, irq_bits); + return -EIO; + } + + /* Finalize clearing procedure */ + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_IRQ_CMD); + writel_relaxed(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_CLEAR); + + /* Ensure that clearing procedure finalization is written to HW */ + wmb(); + + return 0; +} + +int emac_sgmii_init_v1(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + unsigned int i; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v1, + ARRAY_SIZE(physical_coding_sublayer_programming_v1)); + emac_reg_write_all(phy->base, sysclk_refclk_setting, + ARRAY_SIZE(sysclk_refclk_setting)); + emac_reg_write_all(phy->base, pll_setting, ARRAY_SIZE(pll_setting)); + emac_reg_write_all(phy->base, cdr_setting, ARRAY_SIZE(cdr_setting)); + emac_reg_write_all(phy->base, tx_rx_setting, + ARRAY_SIZE(tx_rx_setting)); + + /* Power up the Ser/Des engine */ + writel(SERDES_START, phy->base + EMAC_SGMII_PHY_SERDES_START); + + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + if (readl(phy->base + EMAC_QSERDES_COM_RESET_SM) & READY) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "error: ser/des failed to start\n"); + return -EIO; + } + /* Mask out all the SGMII Interrupt */ + writel(0, phy->base + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +int emac_sgmii_init_v2(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + void __iomem *phy_regs = phy->base; + void __iomem *laned = phy->digital; + unsigned int i; + u32 lnstatus; + int ret; + + ret = emac_sgmii_link_init(adpt); + if (ret) + return ret; + + /* PCS lane-x init */ + emac_reg_write_all(phy->base, physical_coding_sublayer_programming_v2, + ARRAY_SIZE(physical_coding_sublayer_programming_v2)); + + /* SGMII lane-x init */ + emac_reg_write_all(phy->digital, + sgmii_v2_laned, ARRAY_SIZE(sgmii_v2_laned)); + + /* Power up PCS and start reset lane state machine */ + + writel(0, phy_regs + EMAC_SGMII_PHY_RESET_CTRL); + writel(1, laned + SGMII_LN_RSM_START); + + /* Wait for c_ready assertion */ + for (i = 0; i < SERDES_START_WAIT_TIMES; i++) { + lnstatus = readl(phy_regs + SGMII_PHY_LN_LANE_STATUS); + if (lnstatus & BIT(1)) + break; + usleep_range(100, 200); + } + + if (i == SERDES_START_WAIT_TIMES) { + netdev_err(adpt->netdev, "SGMII failed to start\n"); + return -EIO; + } + + /* Disable digital and SERDES loopback */ + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN0); + writel(0, phy_regs + SGMII_PHY_LN_BIST_GEN2); + writel(0, phy_regs + SGMII_PHY_LN_CDR_CTRL1); + + /* Mask out all the SGMII Interrupt */ + writel(0, phy_regs + EMAC_SGMII_PHY_INTERRUPT_MASK); + + emac_sgmii_irq_clear(adpt, SGMII_PHY_INTERRUPT_ERR); + + return 0; +} + +static void emac_sgmii_reset_prepare(struct emac_adapter *adpt) +{ + struct emac_phy *phy = &adpt->phy; + u32 val; + + /* Reset PHY */ + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel(((val & ~PHY_RESET) | PHY_RESET), phy->base + + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset command is written to HW before the release cmd */ + msleep(50); + val = readl(phy->base + EMAC_EMAC_WRAPPER_CSR2); + writel((val & ~PHY_RESET), phy->base + EMAC_EMAC_WRAPPER_CSR2); + /* Ensure phy-reset release command is written to HW before initializing + * SGMII + */ + msleep(50); +} + +void emac_sgmii_reset(struct emac_adapter *adpt) +{ + int ret; + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + emac_sgmii_reset_prepare(adpt); + + ret = adpt->phy.initialize(adpt); + if (ret) + netdev_err(adpt->netdev, + "could not reinitialize internal PHY (error=%i)\n", + ret); + + clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); +} + +static int emac_sgmii_acpi_match(struct device *dev, void *data) +{ + static const struct acpi_device_id match_table[] = { + { + .id = "QCOM8071", + .driver_data = (kernel_ulong_t)emac_sgmii_init_v2, + }, + {} + }; + const struct acpi_device_id *id = acpi_match_device(match_table, dev); + emac_sgmii_initialize *initialize = data; + + if (id) + *initialize = (emac_sgmii_initialize)id->driver_data; + + return !!id; +} + +static const struct of_device_id emac_sgmii_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac-sgmii", + .data = emac_sgmii_init_v1, + }, + { + .compatible = "qcom,qdf2432-emac-sgmii", + .data = emac_sgmii_init_v2, + }, + {} +}; + +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt) +{ + struct platform_device *sgmii_pdev = NULL; + struct emac_phy *phy = &adpt->phy; + struct resource *res; + int ret; + + if (has_acpi_companion(&pdev->dev)) { + struct device *dev; + + dev = device_find_child(&pdev->dev, &phy->initialize, + emac_sgmii_acpi_match); + + if (!dev) { + dev_err(&pdev->dev, "cannot find internal phy node\n"); + return -ENODEV; + } + + sgmii_pdev = to_platform_device(dev); + } else { + const struct of_device_id *match; + struct device_node *np; + + np = of_parse_phandle(pdev->dev.of_node, "internal-phy", 0); + if (!np) { + dev_err(&pdev->dev, "missing internal-phy property\n"); + return -ENODEV; + } + + sgmii_pdev = of_find_device_by_node(np); + if (!sgmii_pdev) { + dev_err(&pdev->dev, "invalid internal-phy property\n"); + return -ENODEV; + } + + match = of_match_device(emac_sgmii_dt_match, &sgmii_pdev->dev); + if (!match) { + dev_err(&pdev->dev, "unrecognized internal phy node\n"); + ret = -ENODEV; + goto error_put_device; + } + + phy->initialize = (emac_sgmii_initialize)match->data; + } + + /* Base address is the first address */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 0); + if (!res) { + ret = -EINVAL; + goto error_put_device; + } + + phy->base = ioremap(res->start, resource_size(res)); + if (!phy->base) { + ret = -ENOMEM; + goto error_put_device; + } + + /* v2 SGMII has a per-lane digital digital, so parse it if it exists */ + res = platform_get_resource(sgmii_pdev, IORESOURCE_MEM, 1); + if (res) { + phy->digital = ioremap(res->start, resource_size(res)); + if (!phy->digital) { + ret = -ENOMEM; + goto error_unmap_base; + } + } + + ret = phy->initialize(adpt); + if (ret) + goto error; + + /* We've remapped the addresses, so we don't need the device any + * more. of_find_device_by_node() says we should release it. + */ + put_device(&sgmii_pdev->dev); + + return 0; + +error: + if (phy->digital) + iounmap(phy->digital); +error_unmap_base: + iounmap(phy->base); +error_put_device: + put_device(&sgmii_pdev->dev); + + return ret; +} diff --git a/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h new file mode 100644 index 000000000000..ce79212ff403 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac-sgmii.h @@ -0,0 +1,24 @@ +/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_SGMII_H_ +#define _EMAC_SGMII_H_ + +struct emac_adapter; +struct platform_device; + +int emac_sgmii_init_v1(struct emac_adapter *adpt); +int emac_sgmii_init_v2(struct emac_adapter *adpt); +int emac_sgmii_config(struct platform_device *pdev, struct emac_adapter *adpt); +void emac_sgmii_reset(struct emac_adapter *adpt); + +#endif diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c new file mode 100644 index 000000000000..9bf3b2b82e95 --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.c @@ -0,0 +1,755 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +/* Qualcomm Technologies, Inc. EMAC Gigabit Ethernet Driver */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "emac.h" +#include "emac-mac.h" +#include "emac-phy.h" +#include "emac-sgmii.h" + +#define EMAC_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK | \ + NETIF_MSG_TIMER | NETIF_MSG_IFDOWN | NETIF_MSG_IFUP) + +#define EMAC_RRD_SIZE 4 +/* The RRD size if timestamping is enabled: */ +#define EMAC_TS_RRD_SIZE 6 +#define EMAC_TPD_SIZE 4 +#define EMAC_RFD_SIZE 2 + +#define REG_MAC_RX_STATUS_BIN EMAC_RXMAC_STATC_REG0 +#define REG_MAC_RX_STATUS_END EMAC_RXMAC_STATC_REG22 +#define REG_MAC_TX_STATUS_BIN EMAC_TXMAC_STATC_REG0 +#define REG_MAC_TX_STATUS_END EMAC_TXMAC_STATC_REG24 + +#define RXQ0_NUM_RFD_PREF_DEF 8 +#define TXQ0_NUM_TPD_PREF_DEF 5 + +#define EMAC_PREAMBLE_DEF 7 + +#define DMAR_DLY_CNT_DEF 15 +#define DMAW_DLY_CNT_DEF 4 + +#define IMR_NORMAL_MASK (\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define IMR_EXTENDED_MASK (\ + SW_MAN_INT |\ + ISR_OVER |\ + ISR_ERROR |\ + ISR_GPHY_LINK |\ + ISR_TX_PKT |\ + GPHY_WAKEUP_INT) + +#define ISR_TX_PKT (\ + TX_PKT_INT |\ + TX_PKT_INT1 |\ + TX_PKT_INT2 |\ + TX_PKT_INT3) + +#define ISR_GPHY_LINK (\ + GPHY_LINK_UP_INT |\ + GPHY_LINK_DOWN_INT) + +#define ISR_OVER (\ + RFD0_UR_INT |\ + RFD1_UR_INT |\ + RFD2_UR_INT |\ + RFD3_UR_INT |\ + RFD4_UR_INT |\ + RXF_OF_INT |\ + TXF_UR_INT) + +#define ISR_ERROR (\ + DMAR_TO_INT |\ + DMAW_TO_INT |\ + TXQ_TO_INT) + +/* in sync with enum emac_clk_id */ +static const char * const emac_clk_name[] = { + "axi_clk", "cfg_ahb_clk", "high_speed_clk", "mdio_clk", "tx_clk", + "rx_clk", "sys_clk" +}; + +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val) +{ + u32 data = readl(addr); + + writel(((data & ~mask) | val), addr); +} + +/* reinitialize */ +int emac_reinit_locked(struct emac_adapter *adpt) +{ + int ret; + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_sgmii_reset(adpt); + ret = emac_mac_up(adpt); + + mutex_unlock(&adpt->reset_lock); + + return ret; +} + +/* NAPI */ +static int emac_napi_rtx(struct napi_struct *napi, int budget) +{ + struct emac_rx_queue *rx_q = + container_of(napi, struct emac_rx_queue, napi); + struct emac_adapter *adpt = netdev_priv(rx_q->netdev); + struct emac_irq *irq = rx_q->irq; + int work_done = 0; + + emac_mac_rx_process(adpt, rx_q, &work_done, budget); + + if (work_done < budget) { + napi_complete(napi); + + irq->mask |= rx_q->intr; + writel(irq->mask, adpt->base + EMAC_INT_MASK); + } + + return work_done; +} + +/* Transmit the packet */ +static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + return emac_mac_tx_buf_send(adpt, &adpt->tx_q, skb); +} + +irqreturn_t emac_isr(int _irq, void *data) +{ + struct emac_irq *irq = data; + struct emac_adapter *adpt = + container_of(irq, struct emac_adapter, irq); + struct emac_rx_queue *rx_q = &adpt->rx_q; + u32 isr, status; + + /* disable the interrupt */ + writel(0, adpt->base + EMAC_INT_MASK); + + isr = readl_relaxed(adpt->base + EMAC_INT_STATUS); + + status = isr & irq->mask; + if (status == 0) + goto exit; + + if (status & ISR_ERROR) { + netif_warn(adpt, intr, adpt->netdev, + "warning: error irq status 0x%lx\n", + status & ISR_ERROR); + /* reset MAC */ + schedule_work(&adpt->work_thread); + } + + /* Schedule the napi for receive queue with interrupt + * status bit set + */ + if (status & rx_q->intr) { + if (napi_schedule_prep(&rx_q->napi)) { + irq->mask &= ~rx_q->intr; + __napi_schedule(&rx_q->napi); + } + } + + if (status & TX_PKT_INT) + emac_mac_tx_process(adpt, &adpt->tx_q); + + if (status & ISR_OVER) + net_warn_ratelimited("warning: TX/RX overflow\n"); + + /* link event */ + if (status & ISR_GPHY_LINK) + phy_mac_interrupt(adpt->phydev, !!(status & GPHY_LINK_UP_INT)); + +exit: + /* enable the interrupt */ + writel(irq->mask, adpt->base + EMAC_INT_MASK); + + return IRQ_HANDLED; +} + +/* Configure VLAN tag strip/insert feature */ +static int emac_set_features(struct net_device *netdev, + netdev_features_t features) +{ + netdev_features_t changed = features ^ netdev->features; + struct emac_adapter *adpt = netdev_priv(netdev); + + /* We only need to reprogram the hardware if the VLAN tag features + * have changed, and if it's already running. + */ + if (!(changed & (NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX))) + return 0; + + if (!netif_running(netdev)) + return 0; + + /* emac_mac_mode_config() uses netdev->features to configure the EMAC, + * so make sure it's set first. + */ + netdev->features = features; + + return emac_reinit_locked(adpt); +} + +/* Configure Multicast and Promiscuous modes */ +static void emac_rx_mode_set(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + struct netdev_hw_addr *ha; + + emac_mac_mode_config(adpt); + + /* update multicast address filtering */ + emac_mac_multicast_addr_clear(adpt); + netdev_for_each_mc_addr(ha, netdev) + emac_mac_multicast_addr_set(adpt, ha->addr); +} + +/* Change the Maximum Transfer Unit (MTU) */ +static int emac_change_mtu(struct net_device *netdev, int new_mtu) +{ + unsigned int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + struct emac_adapter *adpt = netdev_priv(netdev); + + if ((max_frame < EMAC_MIN_ETH_FRAME_SIZE) || + (max_frame > EMAC_MAX_ETH_FRAME_SIZE)) { + netdev_err(adpt->netdev, "error: invalid MTU setting\n"); + return -EINVAL; + } + + netif_info(adpt, hw, adpt->netdev, + "changing MTU from %d to %d\n", netdev->mtu, + new_mtu); + netdev->mtu = new_mtu; + + if (netif_running(netdev)) + return emac_reinit_locked(adpt); + + return 0; +} + +/* Called when the network interface is made active */ +static int emac_open(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + int ret; + + /* allocate rx/tx dma buffer & descriptors */ + ret = emac_mac_rx_tx_rings_alloc_all(adpt); + if (ret) { + netdev_err(adpt->netdev, "error allocating rx/tx rings\n"); + return ret; + } + + ret = emac_mac_up(adpt); + if (ret) { + emac_mac_rx_tx_rings_free_all(adpt); + return ret; + } + + emac_mac_start(adpt); + + return 0; +} + +/* Called when the network interface is disabled */ +static int emac_close(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + mutex_lock(&adpt->reset_lock); + + emac_mac_down(adpt); + emac_mac_rx_tx_rings_free_all(adpt); + + mutex_unlock(&adpt->reset_lock); + + return 0; +} + +/* Respond to a TX hang */ +static void emac_tx_timeout(struct net_device *netdev) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + + schedule_work(&adpt->work_thread); +} + +/* IOCTL support for the interface */ +static int emac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + if (!netif_running(netdev)) + return -EINVAL; + + if (!netdev->phydev) + return -ENODEV; + + return phy_mii_ioctl(netdev->phydev, ifr, cmd); +} + +/* Provide network statistics info for the interface */ +static struct rtnl_link_stats64 *emac_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *net_stats) +{ + struct emac_adapter *adpt = netdev_priv(netdev); + unsigned int addr = REG_MAC_RX_STATUS_BIN; + struct emac_stats *stats = &adpt->stats; + u64 *stats_itr = &adpt->stats.rx_ok; + u32 val; + + spin_lock(&stats->lock); + + while (addr <= REG_MAC_RX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + stats_itr++; + addr += sizeof(u32); + } + + /* additional rx status */ + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG23); + adpt->stats.rx_crc_align += val; + val = readl_relaxed(adpt->base + EMAC_RXMAC_STATC_REG24); + adpt->stats.rx_jabbers += val; + + /* update tx status */ + addr = REG_MAC_TX_STATUS_BIN; + stats_itr = &adpt->stats.tx_ok; + + while (addr <= REG_MAC_TX_STATUS_END) { + val = readl_relaxed(adpt->base + addr); + *stats_itr += val; + ++stats_itr; + addr += sizeof(u32); + } + + /* additional tx status */ + val = readl_relaxed(adpt->base + EMAC_TXMAC_STATC_REG25); + adpt->stats.tx_col += val; + + /* return parsed statistics */ + net_stats->rx_packets = stats->rx_ok; + net_stats->tx_packets = stats->tx_ok; + net_stats->rx_bytes = stats->rx_byte_cnt; + net_stats->tx_bytes = stats->tx_byte_cnt; + net_stats->multicast = stats->rx_mcast; + net_stats->collisions = stats->tx_1_col + stats->tx_2_col * 2 + + stats->tx_late_col + stats->tx_abort_col; + + net_stats->rx_errors = stats->rx_frag + stats->rx_fcs_err + + stats->rx_len_err + stats->rx_sz_ov + + stats->rx_align_err; + net_stats->rx_fifo_errors = stats->rx_rxf_ov; + net_stats->rx_length_errors = stats->rx_len_err; + net_stats->rx_crc_errors = stats->rx_fcs_err; + net_stats->rx_frame_errors = stats->rx_align_err; + net_stats->rx_over_errors = stats->rx_rxf_ov; + net_stats->rx_missed_errors = stats->rx_rxf_ov; + + net_stats->tx_errors = stats->tx_late_col + stats->tx_abort_col + + stats->tx_underrun + stats->tx_trunc; + net_stats->tx_fifo_errors = stats->tx_underrun; + net_stats->tx_aborted_errors = stats->tx_abort_col; + net_stats->tx_window_errors = stats->tx_late_col; + + spin_unlock(&stats->lock); + + return net_stats; +} + +static const struct net_device_ops emac_netdev_ops = { + .ndo_open = emac_open, + .ndo_stop = emac_close, + .ndo_validate_addr = eth_validate_addr, + .ndo_start_xmit = emac_start_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_change_mtu = emac_change_mtu, + .ndo_do_ioctl = emac_ioctl, + .ndo_tx_timeout = emac_tx_timeout, + .ndo_get_stats64 = emac_get_stats64, + .ndo_set_features = emac_set_features, + .ndo_set_rx_mode = emac_rx_mode_set, +}; + +/* Watchdog task routine, called to reinitialize the EMAC */ +static void emac_work_thread(struct work_struct *work) +{ + struct emac_adapter *adpt = + container_of(work, struct emac_adapter, work_thread); + + emac_reinit_locked(adpt); +} + +/* Initialize various data structures */ +static void emac_init_adapter(struct emac_adapter *adpt) +{ + u32 reg; + + /* descriptors */ + adpt->tx_desc_cnt = EMAC_DEF_TX_DESCS; + adpt->rx_desc_cnt = EMAC_DEF_RX_DESCS; + + /* dma */ + adpt->dma_order = emac_dma_ord_out; + adpt->dmar_block = emac_dma_req_4096; + adpt->dmaw_block = emac_dma_req_128; + adpt->dmar_dly_cnt = DMAR_DLY_CNT_DEF; + adpt->dmaw_dly_cnt = DMAW_DLY_CNT_DEF; + adpt->tpd_burst = TXQ0_NUM_TPD_PREF_DEF; + adpt->rfd_burst = RXQ0_NUM_RFD_PREF_DEF; + + /* irq moderator */ + reg = ((EMAC_DEF_RX_IRQ_MOD >> 1) << IRQ_MODERATOR2_INIT_SHFT) | + ((EMAC_DEF_TX_IRQ_MOD >> 1) << IRQ_MODERATOR_INIT_SHFT); + adpt->irq_mod = reg; + + /* others */ + adpt->preamble = EMAC_PREAMBLE_DEF; +} + +/* Get the clock */ +static int emac_clks_get(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) { + struct clk *clk = devm_clk_get(&pdev->dev, emac_clk_name[i]); + + if (IS_ERR(clk)) { + dev_err(&pdev->dev, + "could not claim clock %s (error=%li)\n", + emac_clk_name[i], PTR_ERR(clk)); + + return PTR_ERR(clk); + } + + adpt->clk[i] = clk; + } + + return 0; +} + +/* Initialize clocks */ +static int emac_clks_phase1_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = emac_clks_get(pdev, adpt); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_AXI]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_CFG_AHB]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 19200000); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_HIGH_SPEED]); +} + +/* Enable clocks; needs emac_clks_phase1_init to be called before */ +static int emac_clks_phase2_init(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + int ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_TX], 125000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_TX]); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_HIGH_SPEED], 125000000); + if (ret) + return ret; + + ret = clk_set_rate(adpt->clk[EMAC_CLK_MDIO], 25000000); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_MDIO]); + if (ret) + return ret; + + ret = clk_prepare_enable(adpt->clk[EMAC_CLK_RX]); + if (ret) + return ret; + + return clk_prepare_enable(adpt->clk[EMAC_CLK_SYS]); +} + +static void emac_clks_teardown(struct emac_adapter *adpt) +{ + + unsigned int i; + + for (i = 0; i < EMAC_CLK_CNT; i++) + clk_disable_unprepare(adpt->clk[i]); +} + +/* Get the resources */ +static int emac_probe_resources(struct platform_device *pdev, + struct emac_adapter *adpt) +{ + struct net_device *netdev = adpt->netdev; + struct resource *res; + char maddr[ETH_ALEN]; + int ret = 0; + + /* get mac address */ + if (device_get_mac_address(&pdev->dev, maddr, ETH_ALEN)) + ether_addr_copy(netdev->dev_addr, maddr); + else + eth_hw_addr_random(netdev); + + /* Core 0 interrupt */ + ret = platform_get_irq(pdev, 0); + if (ret < 0) { + dev_err(&pdev->dev, + "error: missing core0 irq resource (error=%i)\n", ret); + return ret; + } + adpt->irq.irq = ret; + + /* base register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + adpt->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->base)) + return PTR_ERR(adpt->base); + + /* CSR register address */ + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + adpt->csr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(adpt->csr)) + return PTR_ERR(adpt->csr); + + netdev->base_addr = (unsigned long)adpt->base; + + return 0; +} + +static const struct of_device_id emac_dt_match[] = { + { + .compatible = "qcom,fsm9900-emac", + }, + {} +}; + +#if IS_ENABLED(CONFIG_ACPI) +static const struct acpi_device_id emac_acpi_match[] = { + { + .id = "QCOM8070", + }, + {} +}; +MODULE_DEVICE_TABLE(acpi, emac_acpi_match); +#endif + +static int emac_probe(struct platform_device *pdev) +{ + struct net_device *netdev; + struct emac_adapter *adpt; + struct emac_phy *phy; + u16 devid, revid; + u32 reg; + int ret; + + /* The EMAC itself is capable of 64-bit DMA, so try that first. */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (ret) { + /* Some platforms may restrict the EMAC's address bus to less + * then the size of DDR. In this case, we need to try a + * smaller mask. We could try every possible smaller mask, + * but that's overkill. Instead, just fall to 32-bit, which + * should always work. + */ + ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32)); + if (ret) { + dev_err(&pdev->dev, "could not set DMA mask\n"); + return ret; + } + } + + netdev = alloc_etherdev(sizeof(struct emac_adapter)); + if (!netdev) + return -ENOMEM; + + dev_set_drvdata(&pdev->dev, netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + adpt = netdev_priv(netdev); + adpt->netdev = netdev; + adpt->msg_enable = EMAC_MSG_DEFAULT; + + phy = &adpt->phy; + + mutex_init(&adpt->reset_lock); + spin_lock_init(&adpt->stats.lock); + + adpt->irq.mask = RX_PKT_INT0 | IMR_NORMAL_MASK; + + ret = emac_probe_resources(pdev, adpt); + if (ret) + goto err_undo_netdev; + + /* initialize clocks */ + ret = emac_clks_phase1_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_netdev; + } + + netdev->watchdog_timeo = EMAC_WATCHDOG_TIME; + netdev->irq = adpt->irq.irq; + + adpt->rrd_size = EMAC_RRD_SIZE; + adpt->tpd_size = EMAC_TPD_SIZE; + adpt->rfd_size = EMAC_RFD_SIZE; + + netdev->netdev_ops = &emac_netdev_ops; + + emac_init_adapter(adpt); + + /* init external phy */ + ret = emac_phy_config(pdev, adpt); + if (ret) + goto err_undo_clocks; + + /* init internal sgmii phy */ + ret = emac_sgmii_config(pdev, adpt); + if (ret) + goto err_undo_mdiobus; + + /* enable clocks */ + ret = emac_clks_phase2_init(pdev, adpt); + if (ret) { + dev_err(&pdev->dev, "could not initialize clocks\n"); + goto err_undo_mdiobus; + } + + emac_mac_reset(adpt); + + /* set hw features */ + netdev->features = NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM | + NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; + netdev->hw_features = netdev->features; + + netdev->vlan_features |= NETIF_F_SG | NETIF_F_HW_CSUM | + NETIF_F_TSO | NETIF_F_TSO6; + + INIT_WORK(&adpt->work_thread, emac_work_thread); + + /* Initialize queues */ + emac_mac_rx_tx_ring_init_all(pdev, adpt); + + netif_napi_add(netdev, &adpt->rx_q.napi, emac_napi_rtx, + NAPI_POLL_WEIGHT); + + ret = register_netdev(netdev); + if (ret) { + dev_err(&pdev->dev, "could not register net device\n"); + goto err_undo_napi; + } + + reg = readl_relaxed(adpt->base + EMAC_DMA_MAS_CTRL); + devid = (reg & DEV_ID_NUM_BMSK) >> DEV_ID_NUM_SHFT; + revid = (reg & DEV_REV_NUM_BMSK) >> DEV_REV_NUM_SHFT; + reg = readl_relaxed(adpt->base + EMAC_CORE_HW_VERSION); + + netif_info(adpt, probe, netdev, + "hardware id %d.%d, hardware version %d.%d.%d\n", + devid, revid, + (reg & MAJOR_BMSK) >> MAJOR_SHFT, + (reg & MINOR_BMSK) >> MINOR_SHFT, + (reg & STEP_BMSK) >> STEP_SHFT); + + return 0; + +err_undo_napi: + netif_napi_del(&adpt->rx_q.napi); +err_undo_mdiobus: + mdiobus_unregister(adpt->mii_bus); +err_undo_clocks: + emac_clks_teardown(adpt); +err_undo_netdev: + free_netdev(netdev); + + return ret; +} + +static int emac_remove(struct platform_device *pdev) +{ + struct net_device *netdev = dev_get_drvdata(&pdev->dev); + struct emac_adapter *adpt = netdev_priv(netdev); + + unregister_netdev(netdev); + netif_napi_del(&adpt->rx_q.napi); + + emac_clks_teardown(adpt); + + mdiobus_unregister(adpt->mii_bus); + free_netdev(netdev); + + if (adpt->phy.digital) + iounmap(adpt->phy.digital); + iounmap(adpt->phy.base); + + return 0; +} + +static struct platform_driver emac_platform_driver = { + .probe = emac_probe, + .remove = emac_remove, + .driver = { + .name = "qcom-emac", + .of_match_table = emac_dt_match, + .acpi_match_table = ACPI_PTR(emac_acpi_match), + }, +}; + +module_platform_driver(emac_platform_driver); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS("platform:qcom-emac"); diff --git a/drivers/net/ethernet/qualcomm/emac/emac.h b/drivers/net/ethernet/qualcomm/emac/emac.h new file mode 100644 index 000000000000..0c76e6cb8c9e --- /dev/null +++ b/drivers/net/ethernet/qualcomm/emac/emac.h @@ -0,0 +1,335 @@ +/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _EMAC_H_ +#define _EMAC_H_ + +#include +#include +#include +#include +#include "emac-mac.h" +#include "emac-phy.h" + +/* EMAC base register offsets */ +#define EMAC_DMA_MAS_CTRL 0x001400 +#define EMAC_IRQ_MOD_TIM_INIT 0x001408 +#define EMAC_BLK_IDLE_STS 0x00140c +#define EMAC_PHY_LINK_DELAY 0x00141c +#define EMAC_SYS_ALIV_CTRL 0x001434 +#define EMAC_MAC_IPGIFG_CTRL 0x001484 +#define EMAC_MAC_STA_ADDR0 0x001488 +#define EMAC_MAC_STA_ADDR1 0x00148c +#define EMAC_HASH_TAB_REG0 0x001490 +#define EMAC_HASH_TAB_REG1 0x001494 +#define EMAC_MAC_HALF_DPLX_CTRL 0x001498 +#define EMAC_MAX_FRAM_LEN_CTRL 0x00149c +#define EMAC_INT_STATUS 0x001600 +#define EMAC_INT_MASK 0x001604 +#define EMAC_RXMAC_STATC_REG0 0x001700 +#define EMAC_RXMAC_STATC_REG22 0x001758 +#define EMAC_TXMAC_STATC_REG0 0x001760 +#define EMAC_TXMAC_STATC_REG24 0x0017c0 +#define EMAC_CORE_HW_VERSION 0x001974 +#define EMAC_IDT_TABLE0 0x001b00 +#define EMAC_RXMAC_STATC_REG23 0x001bc8 +#define EMAC_RXMAC_STATC_REG24 0x001bcc +#define EMAC_TXMAC_STATC_REG25 0x001bd0 +#define EMAC_INT1_MASK 0x001bf0 +#define EMAC_INT1_STATUS 0x001bf4 +#define EMAC_INT2_MASK 0x001bf8 +#define EMAC_INT2_STATUS 0x001bfc +#define EMAC_INT3_MASK 0x001c00 +#define EMAC_INT3_STATUS 0x001c04 + +/* EMAC_DMA_MAS_CTRL */ +#define DEV_ID_NUM_BMSK 0x7f000000 +#define DEV_ID_NUM_SHFT 24 +#define DEV_REV_NUM_BMSK 0xff0000 +#define DEV_REV_NUM_SHFT 16 +#define INT_RD_CLR_EN 0x4000 +#define IRQ_MODERATOR2_EN 0x800 +#define IRQ_MODERATOR_EN 0x400 +#define LPW_CLK_SEL 0x80 +#define LPW_STATE 0x20 +#define LPW_MODE 0x10 +#define SOFT_RST 0x1 + +/* EMAC_IRQ_MOD_TIM_INIT */ +#define IRQ_MODERATOR2_INIT_BMSK 0xffff0000 +#define IRQ_MODERATOR2_INIT_SHFT 16 +#define IRQ_MODERATOR_INIT_BMSK 0xffff +#define IRQ_MODERATOR_INIT_SHFT 0 + +/* EMAC_INT_STATUS */ +#define DIS_INT BIT(31) +#define PTP_INT BIT(30) +#define RFD4_UR_INT BIT(29) +#define TX_PKT_INT3 BIT(26) +#define TX_PKT_INT2 BIT(25) +#define TX_PKT_INT1 BIT(24) +#define RX_PKT_INT3 BIT(19) +#define RX_PKT_INT2 BIT(18) +#define RX_PKT_INT1 BIT(17) +#define RX_PKT_INT0 BIT(16) +#define TX_PKT_INT BIT(15) +#define TXQ_TO_INT BIT(14) +#define GPHY_WAKEUP_INT BIT(13) +#define GPHY_LINK_DOWN_INT BIT(12) +#define GPHY_LINK_UP_INT BIT(11) +#define DMAW_TO_INT BIT(10) +#define DMAR_TO_INT BIT(9) +#define TXF_UR_INT BIT(8) +#define RFD3_UR_INT BIT(7) +#define RFD2_UR_INT BIT(6) +#define RFD1_UR_INT BIT(5) +#define RFD0_UR_INT BIT(4) +#define RXF_OF_INT BIT(3) +#define SW_MAN_INT BIT(2) + +/* EMAC_MAILBOX_6 */ +#define RFD2_PROC_IDX_BMSK 0xfff0000 +#define RFD2_PROC_IDX_SHFT 16 +#define RFD2_PROD_IDX_BMSK 0xfff +#define RFD2_PROD_IDX_SHFT 0 + +/* EMAC_CORE_HW_VERSION */ +#define MAJOR_BMSK 0xf0000000 +#define MAJOR_SHFT 28 +#define MINOR_BMSK 0xfff0000 +#define MINOR_SHFT 16 +#define STEP_BMSK 0xffff +#define STEP_SHFT 0 + +/* EMAC_EMAC_WRAPPER_CSR1 */ +#define TX_INDX_FIFO_SYNC_RST BIT(23) +#define TX_TS_FIFO_SYNC_RST BIT(22) +#define RX_TS_FIFO2_SYNC_RST BIT(21) +#define RX_TS_FIFO1_SYNC_RST BIT(20) +#define TX_TS_ENABLE BIT(16) +#define DIS_1588_CLKS BIT(11) +#define FREQ_MODE BIT(9) +#define ENABLE_RRD_TIMESTAMP BIT(3) + +/* EMAC_EMAC_WRAPPER_CSR2 */ +#define HDRIVE_BMSK 0x3000 +#define HDRIVE_SHFT 12 +#define SLB_EN BIT(9) +#define PLB_EN BIT(8) +#define WOL_EN BIT(3) +#define PHY_RESET BIT(0) + +#define EMAC_DEV_ID 0x0040 + +/* SGMII v2 per lane registers */ +#define SGMII_LN_RSM_START 0x029C + +/* SGMII v2 PHY common registers */ +#define SGMII_PHY_CMN_CTRL 0x0408 +#define SGMII_PHY_CMN_RESET_CTRL 0x0410 + +/* SGMII v2 PHY registers per lane */ +#define SGMII_PHY_LN_OFFSET 0x0400 +#define SGMII_PHY_LN_LANE_STATUS 0x00DC +#define SGMII_PHY_LN_BIST_GEN0 0x008C +#define SGMII_PHY_LN_BIST_GEN1 0x0090 +#define SGMII_PHY_LN_BIST_GEN2 0x0094 +#define SGMII_PHY_LN_BIST_GEN3 0x0098 +#define SGMII_PHY_LN_CDR_CTRL1 0x005C + +enum emac_clk_id { + EMAC_CLK_AXI, + EMAC_CLK_CFG_AHB, + EMAC_CLK_HIGH_SPEED, + EMAC_CLK_MDIO, + EMAC_CLK_TX, + EMAC_CLK_RX, + EMAC_CLK_SYS, + EMAC_CLK_CNT +}; + +#define EMAC_LINK_SPEED_UNKNOWN 0x0 +#define EMAC_LINK_SPEED_10_HALF BIT(0) +#define EMAC_LINK_SPEED_10_FULL BIT(1) +#define EMAC_LINK_SPEED_100_HALF BIT(2) +#define EMAC_LINK_SPEED_100_FULL BIT(3) +#define EMAC_LINK_SPEED_1GB_FULL BIT(5) + +#define EMAC_MAX_SETUP_LNK_CYCLE 100 + +/* Wake On Lan */ +#define EMAC_WOL_PHY 0x00000001 /* PHY Status Change */ +#define EMAC_WOL_MAGIC 0x00000002 /* Magic Packet */ + +struct emac_stats { + /* rx */ + u64 rx_ok; /* good packets */ + u64 rx_bcast; /* good broadcast packets */ + u64 rx_mcast; /* good multicast packets */ + u64 rx_pause; /* pause packet */ + u64 rx_ctrl; /* control packets other than pause frame. */ + u64 rx_fcs_err; /* packets with bad FCS. */ + u64 rx_len_err; /* packets with length mismatch */ + u64 rx_byte_cnt; /* good bytes count (without FCS) */ + u64 rx_runt; /* runt packets */ + u64 rx_frag; /* fragment count */ + u64 rx_sz_64; /* packets that are 64 bytes */ + u64 rx_sz_65_127; /* packets that are 65-127 bytes */ + u64 rx_sz_128_255; /* packets that are 128-255 bytes */ + u64 rx_sz_256_511; /* packets that are 256-511 bytes */ + u64 rx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 rx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 rx_sz_1519_max; /* packets that are 1519-MTU bytes*/ + u64 rx_sz_ov; /* packets that are >MTU bytes (truncated) */ + u64 rx_rxf_ov; /* packets dropped due to RX FIFO overflow */ + u64 rx_align_err; /* alignment errors */ + u64 rx_bcast_byte_cnt; /* broadcast packets byte count (without FCS) */ + u64 rx_mcast_byte_cnt; /* multicast packets byte count (without FCS) */ + u64 rx_err_addr; /* packets dropped due to address filtering */ + u64 rx_crc_align; /* CRC align errors */ + u64 rx_jabbers; /* jabbers */ + + /* tx */ + u64 tx_ok; /* good packets */ + u64 tx_bcast; /* good broadcast packets */ + u64 tx_mcast; /* good multicast packets */ + u64 tx_pause; /* pause packets */ + u64 tx_exc_defer; /* packets with excessive deferral */ + u64 tx_ctrl; /* control packets other than pause frame */ + u64 tx_defer; /* packets that are deferred. */ + u64 tx_byte_cnt; /* good bytes count (without FCS) */ + u64 tx_sz_64; /* packets that are 64 bytes */ + u64 tx_sz_65_127; /* packets that are 65-127 bytes */ + u64 tx_sz_128_255; /* packets that are 128-255 bytes */ + u64 tx_sz_256_511; /* packets that are 256-511 bytes */ + u64 tx_sz_512_1023; /* packets that are 512-1023 bytes */ + u64 tx_sz_1024_1518; /* packets that are 1024-1518 bytes */ + u64 tx_sz_1519_max; /* packets that are 1519-MTU bytes */ + u64 tx_1_col; /* packets single prior collision */ + u64 tx_2_col; /* packets with multiple prior collisions */ + u64 tx_late_col; /* packets with late collisions */ + u64 tx_abort_col; /* packets aborted due to excess collisions */ + u64 tx_underrun; /* packets aborted due to FIFO underrun */ + u64 tx_rd_eop; /* count of reads beyond EOP */ + u64 tx_len_err; /* packets with length mismatch */ + u64 tx_trunc; /* packets truncated due to size >MTU */ + u64 tx_bcast_byte; /* broadcast packets byte count (without FCS) */ + u64 tx_mcast_byte; /* multicast packets byte count (without FCS) */ + u64 tx_col; /* collisions */ + + spinlock_t lock; /* prevent multiple simultaneous readers */ +}; + +/* RSS hstype Definitions */ +#define EMAC_RSS_HSTYP_IPV4_EN 0x00000001 +#define EMAC_RSS_HSTYP_TCP4_EN 0x00000002 +#define EMAC_RSS_HSTYP_IPV6_EN 0x00000004 +#define EMAC_RSS_HSTYP_TCP6_EN 0x00000008 +#define EMAC_RSS_HSTYP_ALL_EN (\ + EMAC_RSS_HSTYP_IPV4_EN |\ + EMAC_RSS_HSTYP_TCP4_EN |\ + EMAC_RSS_HSTYP_IPV6_EN |\ + EMAC_RSS_HSTYP_TCP6_EN) + +#define EMAC_VLAN_TO_TAG(_vlan, _tag) \ + (_tag = ((((_vlan) >> 8) & 0xFF) | (((_vlan) & 0xFF) << 8))) + +#define EMAC_TAG_TO_VLAN(_tag, _vlan) \ + (_vlan = ((((_tag) >> 8) & 0xFF) | (((_tag) & 0xFF) << 8))) + +#define EMAC_DEF_RX_BUF_SIZE 1536 +#define EMAC_MAX_JUMBO_PKT_SIZE (9 * 1024) +#define EMAC_MAX_TX_OFFLOAD_THRESH (9 * 1024) + +#define EMAC_MAX_ETH_FRAME_SIZE EMAC_MAX_JUMBO_PKT_SIZE +#define EMAC_MIN_ETH_FRAME_SIZE 68 + +#define EMAC_DEF_TX_QUEUES 1 +#define EMAC_DEF_RX_QUEUES 1 + +#define EMAC_MIN_TX_DESCS 128 +#define EMAC_MIN_RX_DESCS 128 + +#define EMAC_MAX_TX_DESCS 16383 +#define EMAC_MAX_RX_DESCS 2047 + +#define EMAC_DEF_TX_DESCS 512 +#define EMAC_DEF_RX_DESCS 256 + +#define EMAC_DEF_RX_IRQ_MOD 250 +#define EMAC_DEF_TX_IRQ_MOD 250 + +#define EMAC_WATCHDOG_TIME (5 * HZ) + +/* by default check link every 4 seconds */ +#define EMAC_TRY_LINK_TIMEOUT (4 * HZ) + +/* emac_irq per-device (per-adapter) irq properties. + * @irq: irq number. + * @mask mask to use over status register. + */ +struct emac_irq { + unsigned int irq; + u32 mask; +}; + +/* The device's main data structure */ +struct emac_adapter { + struct net_device *netdev; + struct mii_bus *mii_bus; + struct phy_device *phydev; + + void __iomem *base; + void __iomem *csr; + + struct emac_phy phy; + struct emac_stats stats; + + struct emac_irq irq; + struct clk *clk[EMAC_CLK_CNT]; + + /* All Descriptor memory */ + struct emac_ring_header ring_header; + struct emac_tx_queue tx_q; + struct emac_rx_queue rx_q; + unsigned int tx_desc_cnt; + unsigned int rx_desc_cnt; + unsigned int rrd_size; /* in quad words */ + unsigned int rfd_size; /* in quad words */ + unsigned int tpd_size; /* in quad words */ + + unsigned int rxbuf_size; + + /* Ring parameter */ + u8 tpd_burst; + u8 rfd_burst; + unsigned int dmaw_dly_cnt; + unsigned int dmar_dly_cnt; + enum emac_dma_req_block dmar_block; + enum emac_dma_req_block dmaw_block; + enum emac_dma_order dma_order; + + u32 irq_mod; + u32 preamble; + + struct work_struct work_thread; + + u16 msg_enable; + + struct mutex reset_lock; +}; + +int emac_reinit_locked(struct emac_adapter *adpt); +void emac_reg_update32(void __iomem *addr, u32 mask, u32 val); +irqreturn_t emac_isr(int irq, void *data); + +#endif /* _EMAC_H_ */ diff --git a/drivers/net/ethernet/rdc/r6040.c b/drivers/net/ethernet/rdc/r6040.c index cb29ee24cf1b..5ef5d728c250 100644 --- a/drivers/net/ethernet/rdc/r6040.c +++ b/drivers/net/ethernet/rdc/r6040.c @@ -1062,14 +1062,12 @@ static int r6040_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) /* this should always be supported */ err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32)); if (err) { - dev_err(&pdev->dev, "32-bit PCI DMA addresses" - "not supported by the card\n"); + dev_err(&pdev->dev, "32-bit PCI DMA addresses not supported by the card\n"); goto err_out_disable_dev; } diff --git a/drivers/net/ethernet/renesas/Kconfig b/drivers/net/ethernet/renesas/Kconfig index 4f132cf177cd..85ec447c2d18 100644 --- a/drivers/net/ethernet/renesas/Kconfig +++ b/drivers/net/ethernet/renesas/Kconfig @@ -27,7 +27,7 @@ config SH_ETH Renesas SuperH Ethernet device driver. This driver supporting CPUs are: - SH7619, SH7710, SH7712, SH7724, SH7734, SH7763, SH7757, - R8A7740, R8A777x and R8A779x. + R8A7740, R8A774x, R8A777x and R8A779x. config RAVB tristate "Renesas Ethernet AVB support" diff --git a/drivers/net/ethernet/renesas/ravb.h b/drivers/net/ethernet/renesas/ravb.h index 4e5d5e953e15..f1109661a533 100644 --- a/drivers/net/ethernet/renesas/ravb.h +++ b/drivers/net/ethernet/renesas/ravb.h @@ -1011,7 +1011,6 @@ struct ravb_private { struct work_struct work; /* MII transceiver section. */ struct mii_bus *mii_bus; /* MDIO bus control */ - struct phy_device *phydev; /* PHY device control */ int link; phy_interface_t phy_interface; int msg_enable; diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 1e1cc0fad17f..630536bc72f9 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -942,7 +942,7 @@ out: static void ravb_adjust_link(struct net_device *ndev) { struct ravb_private *priv = netdev_priv(ndev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = ndev->phydev; bool new_state = false; if (phydev->link) { @@ -1032,48 +1032,47 @@ static int ravb_phy_init(struct net_device *ndev) phy_attached_info(phydev); - priv->phydev = phydev; - return 0; } /* PHY control start function */ static int ravb_phy_start(struct net_device *ndev) { - struct ravb_private *priv = netdev_priv(ndev); int error; error = ravb_phy_init(ndev); if (error) return error; - phy_start(priv->phydev); + phy_start(ndev->phydev); return 0; } -static int ravb_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +static int ravb_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct ravb_private *priv = netdev_priv(ndev); int error = -ENODEV; unsigned long flags; - if (priv->phydev) { + if (ndev->phydev) { spin_lock_irqsave(&priv->lock, flags); - error = phy_ethtool_gset(priv->phydev, ecmd); + error = phy_ethtool_ksettings_get(ndev->phydev, cmd); spin_unlock_irqrestore(&priv->lock, flags); } return error; } -static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) +static int ravb_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct ravb_private *priv = netdev_priv(ndev); unsigned long flags; int error; - if (!priv->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&priv->lock, flags); @@ -1081,11 +1080,11 @@ static int ravb_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) /* Disable TX and RX */ ravb_rcv_snd_disable(ndev); - error = phy_ethtool_sset(priv->phydev, ecmd); + error = phy_ethtool_ksettings_set(ndev->phydev, cmd); if (error) goto error_exit; - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) priv->duplex = 1; else priv->duplex = 0; @@ -1110,9 +1109,9 @@ static int ravb_nway_reset(struct net_device *ndev) int error = -ENODEV; unsigned long flags; - if (priv->phydev) { + if (ndev->phydev) { spin_lock_irqsave(&priv->lock, flags); - error = phy_start_aneg(priv->phydev); + error = phy_start_aneg(ndev->phydev); spin_unlock_irqrestore(&priv->lock, flags); } @@ -1309,8 +1308,6 @@ static int ravb_get_ts_info(struct net_device *ndev, } static const struct ethtool_ops ravb_ethtool_ops = { - .get_settings = ravb_get_settings, - .set_settings = ravb_set_settings, .nway_reset = ravb_nway_reset, .get_msglevel = ravb_get_msglevel, .set_msglevel = ravb_set_msglevel, @@ -1321,6 +1318,8 @@ static const struct ethtool_ops ravb_ethtool_ops = { .get_ringparam = ravb_get_ringparam, .set_ringparam = ravb_set_ringparam, .get_ts_info = ravb_get_ts_info, + .get_link_ksettings = ravb_get_link_ksettings, + .set_link_ksettings = ravb_set_link_ksettings, }; static inline int ravb_hook_irq(unsigned int irq, irq_handler_t handler, @@ -1661,10 +1660,9 @@ static int ravb_close(struct net_device *ndev) } /* PHY disconnect */ - if (priv->phydev) { - phy_stop(priv->phydev); - phy_disconnect(priv->phydev); - priv->phydev = NULL; + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); } if (priv->chip_id != RCAR_GEN2) { @@ -1753,8 +1751,7 @@ static int ravb_hwtstamp_set(struct net_device *ndev, struct ifreq *req) /* ioctl to device function */ static int ravb_do_ioctl(struct net_device *ndev, struct ifreq *req, int cmd) { - struct ravb_private *priv = netdev_priv(ndev); - struct phy_device *phydev = priv->phydev; + struct phy_device *phydev = ndev->phydev; if (!netif_running(ndev)) return -EINVAL; @@ -1876,6 +1873,20 @@ static int ravb_set_gti(struct net_device *ndev) return 0; } +static void ravb_set_config_mode(struct net_device *ndev) +{ + struct ravb_private *priv = netdev_priv(ndev); + + if (priv->chip_id == RCAR_GEN2) { + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); + /* Set CSEL value */ + ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); + } else { + ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | + CCC_GAC | CCC_CSEL_HPB); + } +} + static int ravb_probe(struct platform_device *pdev) { struct device_node *np = pdev->dev.of_node; @@ -1978,14 +1989,7 @@ static int ravb_probe(struct platform_device *pdev) ndev->ethtool_ops = &ravb_ethtool_ops; /* Set AVB config mode */ - if (chip_id == RCAR_GEN2) { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG); - /* Set CSEL value */ - ravb_modify(ndev, CCC, CCC_CSEL, CCC_CSEL_HPB); - } else { - ravb_modify(ndev, CCC, CCC_OPC, CCC_OPC_CONFIG | - CCC_GAC | CCC_CSEL_HPB); - } + ravb_set_config_mode(ndev); /* Set GTI value */ error = ravb_set_gti(ndev); @@ -2096,8 +2100,55 @@ static int ravb_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_PM -static int ravb_runtime_nop(struct device *dev) +static int __maybe_unused ravb_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + int ret = 0; + + if (netif_running(ndev)) { + netif_device_detach(ndev); + ret = ravb_close(ndev); + } + + return ret; +} + +static int __maybe_unused ravb_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct ravb_private *priv = netdev_priv(ndev); + int ret = 0; + + /* All register have been reset to default values. + * Restore all registers which where setup at probe time and + * reopen device if it was running before system suspended. + */ + + /* Set AVB config mode */ + ravb_set_config_mode(ndev); + + /* Set GTI value */ + ret = ravb_set_gti(ndev); + if (ret) + return ret; + + /* Request GTI loading */ + ravb_modify(ndev, GCCR, GCCR_LTI, GCCR_LTI); + + /* Restore descriptor base address table */ + ravb_write(ndev, priv->desc_bat_dma, DBAT); + + if (netif_running(ndev)) { + ret = ravb_open(ndev); + if (ret < 0) + return ret; + netif_device_attach(ndev); + } + + return ret; +} + +static int __maybe_unused ravb_runtime_nop(struct device *dev) { /* Runtime PM callback shared between ->runtime_suspend() * and ->runtime_resume(). Simply returns success. @@ -2110,20 +2161,16 @@ static int ravb_runtime_nop(struct device *dev) } static const struct dev_pm_ops ravb_dev_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(ravb_suspend, ravb_resume) SET_RUNTIME_PM_OPS(ravb_runtime_nop, ravb_runtime_nop, NULL) }; -#define RAVB_PM_OPS (&ravb_dev_pm_ops) -#else -#define RAVB_PM_OPS NULL -#endif - static struct platform_driver ravb_driver = { .probe = ravb_probe, .remove = ravb_remove, .driver = { .name = "ravb", - .pm = RAVB_PM_OPS, + .pm = &ravb_dev_pm_ops, .of_match_table = ravb_match_table, }, }; diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 054e795df90f..05b0dc55de77 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -1728,7 +1728,7 @@ out: static void sh_eth_adjust_link(struct net_device *ndev) { struct sh_eth_private *mdp = netdev_priv(ndev); - struct phy_device *phydev = mdp->phydev; + struct phy_device *phydev = ndev->phydev; int new_state = 0; if (phydev->link) { @@ -1805,51 +1805,48 @@ static int sh_eth_phy_init(struct net_device *ndev) phy_attached_info(phydev); - mdp->phydev = phydev; - return 0; } /* PHY control start function */ static int sh_eth_phy_start(struct net_device *ndev) { - struct sh_eth_private *mdp = netdev_priv(ndev); int ret; ret = sh_eth_phy_init(ndev); if (ret) return ret; - phy_start(mdp->phydev); + phy_start(ndev->phydev); return 0; } -static int sh_eth_get_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) +static int sh_eth_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) { struct sh_eth_private *mdp = netdev_priv(ndev); unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); - ret = phy_ethtool_gset(mdp->phydev, ecmd); + ret = phy_ethtool_ksettings_get(ndev->phydev, cmd); spin_unlock_irqrestore(&mdp->lock, flags); return ret; } -static int sh_eth_set_settings(struct net_device *ndev, - struct ethtool_cmd *ecmd) +static int sh_eth_set_link_ksettings(struct net_device *ndev, + const struct ethtool_link_ksettings *cmd) { struct sh_eth_private *mdp = netdev_priv(ndev); unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); @@ -1857,11 +1854,11 @@ static int sh_eth_set_settings(struct net_device *ndev, /* disable tx and rx */ sh_eth_rcv_snd_disable(ndev); - ret = phy_ethtool_sset(mdp->phydev, ecmd); + ret = phy_ethtool_ksettings_set(ndev->phydev, cmd); if (ret) goto error_exit; - if (ecmd->duplex == DUPLEX_FULL) + if (cmd->base.duplex == DUPLEX_FULL) mdp->duplex = 1; else mdp->duplex = 0; @@ -2072,11 +2069,11 @@ static int sh_eth_nway_reset(struct net_device *ndev) unsigned long flags; int ret; - if (!mdp->phydev) + if (!ndev->phydev) return -ENODEV; spin_lock_irqsave(&mdp->lock, flags); - ret = phy_start_aneg(mdp->phydev); + ret = phy_start_aneg(ndev->phydev); spin_unlock_irqrestore(&mdp->lock, flags); return ret; @@ -2203,8 +2200,6 @@ static int sh_eth_set_ringparam(struct net_device *ndev, } static const struct ethtool_ops sh_eth_ethtool_ops = { - .get_settings = sh_eth_get_settings, - .set_settings = sh_eth_set_settings, .get_regs_len = sh_eth_get_regs_len, .get_regs = sh_eth_get_regs, .nway_reset = sh_eth_nway_reset, @@ -2216,6 +2211,8 @@ static const struct ethtool_ops sh_eth_ethtool_ops = { .get_sset_count = sh_eth_get_sset_count, .get_ringparam = sh_eth_get_ringparam, .set_ringparam = sh_eth_set_ringparam, + .get_link_ksettings = sh_eth_get_link_ksettings, + .set_link_ksettings = sh_eth_set_link_ksettings, }; /* network device open function */ @@ -2413,10 +2410,9 @@ static int sh_eth_close(struct net_device *ndev) sh_eth_dev_exit(ndev); /* PHY Disconnect */ - if (mdp->phydev) { - phy_stop(mdp->phydev); - phy_disconnect(mdp->phydev); - mdp->phydev = NULL; + if (ndev->phydev) { + phy_stop(ndev->phydev); + phy_disconnect(ndev->phydev); } free_irq(ndev->irq, ndev); @@ -2434,8 +2430,7 @@ static int sh_eth_close(struct net_device *ndev) /* ioctl to device function */ static int sh_eth_do_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd) { - struct sh_eth_private *mdp = netdev_priv(ndev); - struct phy_device *phydev = mdp->phydev; + struct phy_device *phydev = ndev->phydev; if (!netif_running(ndev)) return -EINVAL; @@ -2964,6 +2959,8 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) static const struct of_device_id sh_eth_match_table[] = { { .compatible = "renesas,gether-r8a7740", .data = &r8a7740_data }, + { .compatible = "renesas,ether-r8a7743", .data = &r8a779x_data }, + { .compatible = "renesas,ether-r8a7745", .data = &r8a779x_data }, { .compatible = "renesas,ether-r8a7778", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7779", .data = &r8a777x_data }, { .compatible = "renesas,ether-r8a7790", .data = &r8a779x_data }, diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index c62380e34a1d..d050f37f3e0f 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -518,7 +518,6 @@ struct sh_eth_private { /* MII transceiver section. */ u32 phy_id; /* PHY ID */ struct mii_bus *mii_bus; /* MDIO bus control */ - struct phy_device *phydev; /* PHY device control */ int link; phy_interface_t phy_interface; int msg_enable; diff --git a/drivers/net/ethernet/rocker/rocker.h b/drivers/net/ethernet/rocker/rocker.h index 1ab995f7146b..2eb9b49569d5 100644 --- a/drivers/net/ethernet/rocker/rocker.h +++ b/drivers/net/ethernet/rocker/rocker.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -52,6 +53,9 @@ struct rocker_port { struct rocker_dma_ring_info rx_ring; }; +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker); + struct rocker_world_ops; struct rocker { @@ -66,6 +70,7 @@ struct rocker { spinlock_t cmd_ring_lock; /* for cmd ring accesses */ struct rocker_dma_ring_info cmd_ring; struct rocker_dma_ring_info event_ring; + struct notifier_block fib_nb; struct rocker_world_ops *wops; void *wpriv; }; @@ -117,11 +122,6 @@ struct rocker_world_ops { int (*port_obj_vlan_dump)(const struct rocker_port *rocker_port, struct switchdev_obj_port_vlan *vlan, switchdev_obj_dump_cb_t *cb); - int (*port_obj_fib4_add)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans); - int (*port_obj_fib4_del)(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4); int (*port_obj_fdb_add)(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans); @@ -141,6 +141,11 @@ struct rocker_world_ops { int (*port_ev_mac_vlan_seen)(struct rocker_port *rocker_port, const unsigned char *addr, __be16 vlan_id); + int (*fib4_add)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + int (*fib4_del)(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info); + void (*fib4_abort)(struct rocker *rocker); }; extern struct rocker_world_ops rocker_ofdpa_ops; diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index f0b09b05ed3f..5424fb341613 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -1624,29 +1624,6 @@ rocker_world_port_obj_vlan_dump(const struct rocker_port *rocker_port, return wops->port_obj_vlan_dump(rocker_port, vlan, cb); } -static int -rocker_world_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_add) - return -EOPNOTSUPP; - return wops->port_obj_fib4_add(rocker_port, fib4, trans); -} - -static int -rocker_world_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct rocker_world_ops *wops = rocker_port->rocker->wops; - - if (!wops->port_obj_fib4_del) - return -EOPNOTSUPP; - return wops->port_obj_fib4_del(rocker_port, fib4); -} - static int rocker_world_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, @@ -1733,6 +1710,34 @@ static int rocker_world_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return wops->port_ev_mac_vlan_seen(rocker_port, addr, vlan_id); } +static int rocker_world_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_add) + return 0; + return wops->fib4_add(rocker, fen_info); +} + +static int rocker_world_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (!wops->fib4_del) + return 0; + return wops->fib4_del(rocker, fen_info); +} + +static void rocker_world_fib4_abort(struct rocker *rocker) +{ + struct rocker_world_ops *wops = rocker->wops; + + if (wops->fib4_abort) + wops->fib4_abort(rocker); +} + /***************** * Net device ops *****************/ @@ -2096,11 +2101,6 @@ static int rocker_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_add(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj), - trans); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_add(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj), @@ -2125,10 +2125,6 @@ static int rocker_port_obj_del(struct net_device *dev, err = rocker_world_port_obj_vlan_del(rocker_port, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; - case SWITCHDEV_OBJ_ID_IPV4_FIB: - err = rocker_world_port_obj_fib4_del(rocker_port, - SWITCHDEV_OBJ_IPV4_FIB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_FDB: err = rocker_world_port_obj_fdb_del(rocker_port, SWITCHDEV_OBJ_PORT_FDB(obj)); @@ -2175,6 +2171,31 @@ static const struct switchdev_ops rocker_port_switchdev_ops = { .switchdev_port_obj_dump = rocker_port_obj_dump, }; +static int rocker_router_fib_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct rocker *rocker = container_of(nb, struct rocker, fib_nb); + struct fib_entry_notifier_info *fen_info = ptr; + int err; + + switch (event) { + case FIB_EVENT_ENTRY_ADD: + err = rocker_world_fib4_add(rocker, fen_info); + if (err) + rocker_world_fib4_abort(rocker); + else + break; + case FIB_EVENT_ENTRY_DEL: + rocker_world_fib4_del(rocker, fen_info); + break; + case FIB_EVENT_RULE_ADD: /* fall through */ + case FIB_EVENT_RULE_DEL: + rocker_world_fib4_abort(rocker); + break; + } + return NOTIFY_DONE; +} + /******************** * ethtool interface ********************/ @@ -2412,7 +2433,7 @@ static int rocker_port_rx_proc(const struct rocker *rocker, skb->protocol = eth_type_trans(skb, rocker_port->dev); if (rx_flags & ROCKER_RX_FLAGS_FWD_OFFLOAD) - skb->offload_fwd_mark = rocker_port->dev->offload_fwd_mark; + skb->offload_fwd_mark = 1; rocker_port->dev->stats.rx_packets++; rocker_port->dev->stats.rx_bytes += skb->len; @@ -2740,6 +2761,9 @@ static int rocker_probe(struct pci_dev *pdev, const struct pci_device_id *id) goto err_probe_ports; } + rocker->fib_nb.notifier_call = rocker_router_fib_event; + register_fib_notifier(&rocker->fib_nb); + dev_info(&pdev->dev, "Rocker switch with id %*phN\n", (int)sizeof(rocker->hw.id), &rocker->hw.id); @@ -2771,6 +2795,7 @@ static void rocker_remove(struct pci_dev *pdev) { struct rocker *rocker = pci_get_drvdata(pdev); + unregister_fib_notifier(&rocker->fib_nb); rocker_write32(rocker, CONTROL, ROCKER_CONTROL_RESET); rocker_remove_ports(rocker); free_irq(rocker_msix_vector(rocker, ROCKER_MSIX_VEC_EVENT), rocker); @@ -2799,6 +2824,37 @@ static bool rocker_port_dev_check(const struct net_device *dev) return dev->netdev_ops == &rocker_port_netdev_ops; } +static bool rocker_port_dev_check_under(const struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + if (!rocker_port_dev_check(dev)) + return false; + + rocker_port = netdev_priv(dev); + if (rocker_port->rocker != rocker) + return false; + + return true; +} + +struct rocker_port *rocker_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct net_device *lower_dev; + struct list_head *iter; + + if (rocker_port_dev_check_under(dev, rocker)) + return netdev_priv(dev); + + netdev_for_each_all_lower_dev(dev, lower_dev, iter) { + if (rocker_port_dev_check_under(lower_dev, rocker)) + return netdev_priv(lower_dev); + } + return NULL; +} + static int rocker_netdevice_event(struct notifier_block *unused, unsigned long event, void *ptr) { diff --git a/drivers/net/ethernet/rocker/rocker_ofdpa.c b/drivers/net/ethernet/rocker/rocker_ofdpa.c index 1ca796316173..431a60804272 100644 --- a/drivers/net/ethernet/rocker/rocker_ofdpa.c +++ b/drivers/net/ethernet/rocker/rocker_ofdpa.c @@ -99,6 +99,7 @@ struct ofdpa_flow_tbl_entry { struct ofdpa_flow_tbl_key key; size_t key_len; u32 key_crc32; /* key */ + struct fib_info *fi; }; struct ofdpa_group_tbl_entry { @@ -189,6 +190,7 @@ struct ofdpa { spinlock_t neigh_tbl_lock; /* for neigh tbl accesses */ u32 neigh_tbl_next_index; unsigned long ageing_time; + bool fib_aborted; }; struct ofdpa_port { @@ -1043,7 +1045,8 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, __be16 eth_type, __be32 dst, __be32 dst_mask, u32 priority, enum rocker_of_dpa_table_id goto_tbl, - u32 group_id, int flags) + u32 group_id, struct fib_info *fi, + int flags) { struct ofdpa_flow_tbl_entry *entry; @@ -1060,6 +1063,7 @@ static int ofdpa_flow_tbl_ucast4_routing(struct ofdpa_port *ofdpa_port, entry->key.ucast_routing.group_id = group_id; entry->key_len = offsetof(struct ofdpa_flow_tbl_key, ucast_routing.group_id); + entry->fi = fi; return ofdpa_flow_tbl_do(ofdpa_port, trans, flags, entry); } @@ -1425,7 +1429,7 @@ static int ofdpa_port_ipv4_neigh(struct ofdpa_port *ofdpa_port, eth_type, ip_addr, inet_make_mask(32), priority, goto_tbl, - group_id, flags); + group_id, NULL, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) /32 unicast route %pI4 group 0x%08x\n", @@ -2390,7 +2394,7 @@ found: static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, struct switchdev_trans *trans, __be32 dst, - int dst_len, const struct fib_info *fi, + int dst_len, struct fib_info *fi, u32 tb_id, int flags) { const struct fib_nh *nh; @@ -2426,7 +2430,7 @@ static int ofdpa_port_fib_ipv4(struct ofdpa_port *ofdpa_port, err = ofdpa_flow_tbl_ucast4_routing(ofdpa_port, trans, eth_type, dst, dst_mask, priority, goto_tbl, - group_id, flags); + group_id, fi, flags); if (err) netdev_err(ofdpa_port->dev, "Error (%d) IPv4 route %pI4\n", err, &dst); @@ -2558,7 +2562,6 @@ static int ofdpa_port_init(struct rocker_port *rocker_port) struct ofdpa_port *ofdpa_port = rocker_port->wpriv; int err; - switchdev_port_fwd_mark_set(ofdpa_port->dev, NULL, false); rocker_port_set_learning(rocker_port, !!(ofdpa_port->brport_flags & BR_LEARNING)); @@ -2719,28 +2722,6 @@ static int ofdpa_port_obj_vlan_dump(const struct rocker_port *rocker_port, return err; } -static int ofdpa_port_obj_fib4_add(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4, - struct switchdev_trans *trans) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, trans, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, 0); -} - -static int ofdpa_port_obj_fib4_del(struct rocker_port *rocker_port, - const struct switchdev_obj_ipv4_fib *fib4) -{ - struct ofdpa_port *ofdpa_port = rocker_port->wpriv; - - return ofdpa_port_fib_ipv4(ofdpa_port, NULL, - htonl(fib4->dst), fib4->dst_len, - fib4->fi, fib4->tb_id, - OFDPA_OP_FLAG_REMOVE); -} - static int ofdpa_port_obj_fdb_add(struct rocker_port *rocker_port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) @@ -2817,7 +2798,6 @@ static int ofdpa_port_bridge_join(struct ofdpa_port *ofdpa_port, ofdpa_port_internal_vlan_id_get(ofdpa_port, bridge->ifindex); ofdpa_port->bridge_dev = bridge; - switchdev_port_fwd_mark_set(ofdpa_port->dev, bridge, true); return ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); } @@ -2836,8 +2816,6 @@ static int ofdpa_port_bridge_leave(struct ofdpa_port *ofdpa_port) ofdpa_port_internal_vlan_id_get(ofdpa_port, ofdpa_port->dev->ifindex); - switchdev_port_fwd_mark_set(ofdpa_port->dev, ofdpa_port->bridge_dev, - false); ofdpa_port->bridge_dev = NULL; err = ofdpa_port_vlan_add(ofdpa_port, NULL, OFDPA_UNTAGGED_VID, 0); @@ -2926,6 +2904,82 @@ static int ofdpa_port_ev_mac_vlan_seen(struct rocker_port *rocker_port, return ofdpa_port_fdb(ofdpa_port, NULL, addr, vlan_id, flags); } +static struct ofdpa_port *ofdpa_port_dev_lower_find(struct net_device *dev, + struct rocker *rocker) +{ + struct rocker_port *rocker_port; + + rocker_port = rocker_port_dev_lower_find(dev, rocker); + return rocker_port ? rocker_port->wpriv : NULL; +} + +static int ofdpa_fib4_add(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + int err; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + err = ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, 0); + if (err) + return err; + fib_info_offload_inc(fen_info->fi); + return 0; +} + +static int ofdpa_fib4_del(struct rocker *rocker, + const struct fib_entry_notifier_info *fen_info) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + + if (ofdpa->fib_aborted) + return 0; + ofdpa_port = ofdpa_port_dev_lower_find(fen_info->fi->fib_dev, rocker); + if (!ofdpa_port) + return 0; + fib_info_offload_dec(fen_info->fi); + return ofdpa_port_fib_ipv4(ofdpa_port, NULL, htonl(fen_info->dst), + fen_info->dst_len, fen_info->fi, + fen_info->tb_id, OFDPA_OP_FLAG_REMOVE); +} + +static void ofdpa_fib4_abort(struct rocker *rocker) +{ + struct ofdpa *ofdpa = rocker->wpriv; + struct ofdpa_port *ofdpa_port; + struct ofdpa_flow_tbl_entry *flow_entry; + struct hlist_node *tmp; + unsigned long flags; + int bkt; + + if (ofdpa->fib_aborted) + return; + + spin_lock_irqsave(&ofdpa->flow_tbl_lock, flags); + hash_for_each_safe(ofdpa->flow_tbl, bkt, tmp, flow_entry, entry) { + if (flow_entry->key.tbl_id != + ROCKER_OF_DPA_TABLE_ID_UNICAST_ROUTING) + continue; + ofdpa_port = ofdpa_port_dev_lower_find(flow_entry->fi->fib_dev, + rocker); + if (!ofdpa_port) + continue; + fib_info_offload_dec(flow_entry->fi); + ofdpa_flow_tbl_del(ofdpa_port, NULL, OFDPA_OP_FLAG_REMOVE, + flow_entry); + } + spin_unlock_irqrestore(&ofdpa->flow_tbl_lock, flags); + ofdpa->fib_aborted = true; +} + struct rocker_world_ops rocker_ofdpa_ops = { .kind = "ofdpa", .priv_size = sizeof(struct ofdpa), @@ -2945,8 +2999,6 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_obj_vlan_add = ofdpa_port_obj_vlan_add, .port_obj_vlan_del = ofdpa_port_obj_vlan_del, .port_obj_vlan_dump = ofdpa_port_obj_vlan_dump, - .port_obj_fib4_add = ofdpa_port_obj_fib4_add, - .port_obj_fib4_del = ofdpa_port_obj_fib4_del, .port_obj_fdb_add = ofdpa_port_obj_fdb_add, .port_obj_fdb_del = ofdpa_port_obj_fdb_del, .port_obj_fdb_dump = ofdpa_port_obj_fdb_dump, @@ -2955,4 +3007,7 @@ struct rocker_world_ops rocker_ofdpa_ops = { .port_neigh_update = ofdpa_port_neigh_update, .port_neigh_destroy = ofdpa_port_neigh_destroy, .port_ev_mac_vlan_seen = ofdpa_port_ev_mac_vlan_seen, + .fib4_add = ofdpa_fib4_add, + .fib4_del = ofdpa_fib4_del, + .fib4_abort = ofdpa_fib4_abort, }; diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index e00a669e9e09..00279da6a1e8 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -177,7 +177,7 @@ static int efx_ef10_get_vf_index(struct efx_nic *efx) static int efx_ef10_init_datapath_caps(struct efx_nic *efx) { - MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_OUT_LEN); + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_CAPABILITIES_V2_OUT_LEN); struct efx_ef10_nic_data *nic_data = efx->nic_data; size_t outlen; int rc; @@ -188,7 +188,7 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) outbuf, sizeof(outbuf), &outlen); if (rc) return rc; - if (outlen < sizeof(outbuf)) { + if (outlen < MC_CMD_GET_CAPABILITIES_OUT_LEN) { netif_err(efx, drv, efx->net_dev, "unable to read datapath firmware capabilities\n"); return -EIO; @@ -197,6 +197,12 @@ static int efx_ef10_init_datapath_caps(struct efx_nic *efx) nic_data->datapath_caps = MCDI_DWORD(outbuf, GET_CAPABILITIES_OUT_FLAGS1); + if (outlen >= MC_CMD_GET_CAPABILITIES_V2_OUT_LEN) + nic_data->datapath_caps2 = MCDI_DWORD(outbuf, + GET_CAPABILITIES_V2_OUT_FLAGS2); + else + nic_data->datapath_caps2 = 0; + /* record the DPCPU firmware IDs to determine VEB vswitching support. */ nic_data->rx_dpcpu_fw_id = @@ -227,6 +233,116 @@ static int efx_ef10_get_sysclk_freq(struct efx_nic *efx) return rc > 0 ? rc : -ERANGE; } +static int efx_ef10_get_timer_workarounds(struct efx_nic *efx) +{ + struct efx_ef10_nic_data *nic_data = efx->nic_data; + unsigned int implemented; + unsigned int enabled; + int rc; + + nic_data->workaround_35388 = false; + nic_data->workaround_61265 = false; + + rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled); + + if (rc == -ENOSYS) { + /* Firmware without GET_WORKAROUNDS - not a problem. */ + rc = 0; + } else if (rc == 0) { + /* Bug61265 workaround is always enabled if implemented. */ + if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG61265) + nic_data->workaround_61265 = true; + + if (enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) { + nic_data->workaround_35388 = true; + } else if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG35388) { + /* Workaround is implemented but not enabled. + * Try to enable it. + */ + rc = efx_mcdi_set_workaround(efx, + MC_CMD_WORKAROUND_BUG35388, + true, NULL); + if (rc == 0) + nic_data->workaround_35388 = true; + /* If we failed to set the workaround just carry on. */ + rc = 0; + } + } + + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 35388 is %sabled\n", + nic_data->workaround_35388 ? "en" : "dis"); + netif_dbg(efx, probe, efx->net_dev, + "workaround for bug 61265 is %sabled\n", + nic_data->workaround_61265 ? "en" : "dis"); + + return rc; +} + +static void efx_ef10_process_timer_config(struct efx_nic *efx, + const efx_dword_t *data) +{ + unsigned int max_count; + + if (EFX_EF10_WORKAROUND_61265(efx)) { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS); + efx->timer_max_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS); + } else if (EFX_EF10_WORKAROUND_35388(efx)) { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT); + max_count = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT); + efx->timer_max_ns = max_count * efx->timer_quantum_ns; + } else { + efx->timer_quantum_ns = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT); + max_count = MCDI_DWORD(data, + GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT); + efx->timer_max_ns = max_count * efx->timer_quantum_ns; + } + + netif_dbg(efx, probe, efx->net_dev, + "got timer properties from MC: quantum %u ns; max %u ns\n", + efx->timer_quantum_ns, efx->timer_max_ns); +} + +static int efx_ef10_get_timer_config(struct efx_nic *efx) +{ + MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN); + int rc; + + rc = efx_ef10_get_timer_workarounds(efx); + if (rc) + return rc; + + rc = efx_mcdi_rpc_quiet(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, NULL, 0, + outbuf, sizeof(outbuf), NULL); + + if (rc == 0) { + efx_ef10_process_timer_config(efx, outbuf); + } else if (rc == -ENOSYS || rc == -EPERM) { + /* Not available - fall back to Huntington defaults. */ + unsigned int quantum; + + rc = efx_ef10_get_sysclk_freq(efx); + if (rc < 0) + return rc; + + quantum = 1536000 / rc; /* 1536 cycles */ + efx->timer_quantum_ns = quantum; + efx->timer_max_ns = efx->type->timer_period_max * quantum; + rc = 0; + } else { + efx_mcdi_display_error(efx, MC_CMD_GET_EVQ_TMR_PROPERTIES, + MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN, + NULL, 0, rc); + } + + return rc; +} + static int efx_ef10_get_mac_address_pf(struct efx_nic *efx, u8 *mac_address) { MCDI_DECLARE_BUF(outbuf, MC_CMD_GET_MAC_ADDRESSES_OUT_LEN); @@ -527,32 +643,9 @@ static int efx_ef10_probe(struct efx_nic *efx) if (rc) goto fail5; - rc = efx_ef10_get_sysclk_freq(efx); + rc = efx_ef10_get_timer_config(efx); if (rc < 0) goto fail5; - efx->timer_quantum_ns = 1536000 / rc; /* 1536 cycles */ - - /* Check whether firmware supports bug 35388 workaround. - * First try to enable it, then if we get EPERM, just - * ask if it's already enabled - */ - rc = efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG35388, true, NULL); - if (rc == 0) { - nic_data->workaround_35388 = true; - } else if (rc == -EPERM) { - unsigned int enabled; - - rc = efx_mcdi_get_workarounds(efx, NULL, &enabled); - if (rc) - goto fail3; - nic_data->workaround_35388 = enabled & - MC_CMD_GET_WORKAROUNDS_OUT_BUG35388; - } else if (rc != -ENOSYS && rc != -ENOENT) { - goto fail5; - } - netif_dbg(efx, probe, efx->net_dev, - "workaround for bug 35388 is %sabled\n", - nic_data->workaround_35388 ? "en" : "dis"); rc = efx_mcdi_mon_probe(efx); if (rc && rc != -EPERM) @@ -1440,9 +1533,10 @@ static const struct efx_hw_stat_desc efx_ef10_stat_desc[EF10_STAT_COUNT] = { (1ULL << GENERIC_STAT_rx_nodesc_trunc) | \ (1ULL << GENERIC_STAT_rx_noskb_drops)) -/* These statistics are only provided by the 10G MAC. For a 10G/40G - * switchable port we do not expose these because they might not - * include all the packets they should. +/* On 7000 series NICs, these statistics are only provided by the 10G MAC. + * For a 10G/40G switchable port we do not expose these because they might + * not include all the packets they should. + * On 8000 series NICs these statistics are always provided. */ #define HUNT_10G_ONLY_STAT_MASK ((1ULL << EF10_STAT_port_tx_control) | \ (1ULL << EF10_STAT_port_tx_lt64) | \ @@ -1488,10 +1582,15 @@ static u64 efx_ef10_raw_stat_mask(struct efx_nic *efx) 1 << MC_CMD_DRV_ATTACH_EXT_OUT_FLAG_LINKCTRL)) return 0; - if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) + if (port_caps & (1 << MC_CMD_PHY_CAP_40000FDX_LBN)) { raw_mask |= HUNT_40G_EXTRA_STAT_MASK; - else + /* 8000 series have everything even at 40G */ + if (nic_data->datapath_caps2 & + (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN)) + raw_mask |= HUNT_10G_ONLY_STAT_MASK; + } else { raw_mask |= HUNT_10G_ONLY_STAT_MASK; + } if (nic_data->datapath_caps & (1 << MC_CMD_GET_CAPABILITIES_OUT_PM_AND_RXDP_COUNTERS_LBN)) @@ -1617,7 +1716,6 @@ static int efx_ef10_try_update_nic_stats_pf(struct efx_nic *efx) efx_ef10_get_stat_mask(efx, mask); dma_stats = efx->stats_buffer.addr; - nic_data = efx->nic_data; generation_end = dma_stats[MC_CMD_MAC_GENERATION_END]; if (generation_end == EFX_MC_STATS_GENERATION_INVALID) @@ -1744,27 +1842,43 @@ static size_t efx_ef10_update_stats_vf(struct efx_nic *efx, u64 *full_stats, static void efx_ef10_push_irq_moderation(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - unsigned int mode, value; + unsigned int mode, usecs; efx_dword_t timer_cmd; - if (channel->irq_moderation) { + if (channel->irq_moderation_us) { mode = 3; - value = channel->irq_moderation - 1; + usecs = channel->irq_moderation_us; } else { mode = 0; - value = 0; + usecs = 0; } - if (EFX_EF10_WORKAROUND_35388(efx)) { + if (EFX_EF10_WORKAROUND_61265(efx)) { + MCDI_DECLARE_BUF(inbuf, MC_CMD_SET_EVQ_TMR_IN_LEN); + unsigned int ns = usecs * 1000; + + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_INSTANCE, + channel->channel); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS, ns); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS, ns); + MCDI_SET_DWORD(inbuf, SET_EVQ_TMR_IN_TMR_MODE, mode); + + efx_mcdi_rpc_async(efx, MC_CMD_SET_EVQ_TMR, + inbuf, sizeof(inbuf), 0, NULL, 0); + } else if (EFX_EF10_WORKAROUND_35388(efx)) { + unsigned int ticks = efx_usecs_to_ticks(efx, usecs); + EFX_POPULATE_DWORD_3(timer_cmd, ERF_DD_EVQ_IND_TIMER_FLAGS, EFE_DD_EVQ_IND_TIMER_FLAGS, ERF_DD_EVQ_IND_TIMER_MODE, mode, - ERF_DD_EVQ_IND_TIMER_VAL, value); + ERF_DD_EVQ_IND_TIMER_VAL, ticks); efx_writed_page(efx, &timer_cmd, ER_DD_EVQ_INDIRECT, channel->channel); } else { + unsigned int ticks = efx_usecs_to_ticks(efx, usecs); + EFX_POPULATE_DWORD_2(timer_cmd, ERF_DZ_TC_TIMER_MODE, mode, - ERF_DZ_TC_TIMER_VAL, value); + ERF_DZ_TC_TIMER_VAL, ticks); efx_writed_page(efx, &timer_cmd, ER_DZ_EVQ_TMR, channel->channel); } @@ -1935,14 +2049,18 @@ static irqreturn_t efx_ef10_legacy_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static void efx_ef10_irq_test_generate(struct efx_nic *efx) +static int efx_ef10_irq_test_generate(struct efx_nic *efx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_TRIGGER_INTERRUPT_IN_LEN); + if (efx_mcdi_set_workaround(efx, MC_CMD_WORKAROUND_BUG41750, true, + NULL) == 0) + return -ENOTSUPP; + BUILD_BUG_ON(MC_CMD_TRIGGER_INTERRUPT_OUT_LEN != 0); MCDI_SET_DWORD(inbuf, TRIGGER_INTERRUPT_IN_INTR_LEVEL, efx->irq_level); - (void) efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, + return efx_mcdi_rpc(efx, MC_CMD_TRIGGER_INTERRUPT, inbuf, sizeof(inbuf), NULL, 0, NULL); } @@ -2536,13 +2654,12 @@ fail: static int efx_ef10_ev_init(struct efx_channel *channel) { MCDI_DECLARE_BUF(inbuf, - MC_CMD_INIT_EVQ_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / - EFX_BUF_SIZE)); - MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_OUT_LEN); + MC_CMD_INIT_EVQ_V2_IN_LEN(EFX_MAX_EVQ_SIZE * 8 / + EFX_BUF_SIZE)); + MCDI_DECLARE_BUF(outbuf, MC_CMD_INIT_EVQ_V2_OUT_LEN); size_t entries = channel->eventq.buf.len / EFX_BUF_SIZE; struct efx_nic *efx = channel->efx; struct efx_ef10_nic_data *nic_data; - bool supports_rx_merge; size_t inlen, outlen; unsigned int enabled, implemented; dma_addr_t dma_addr; @@ -2550,9 +2667,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel) int i; nic_data = efx->nic_data; - supports_rx_merge = - !!(nic_data->datapath_caps & - 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); /* Fill event queue with all ones (i.e. empty events) */ memset(channel->eventq.buf.addr, 0xff, channel->eventq.buf.len); @@ -2561,11 +2675,6 @@ static int efx_ef10_ev_init(struct efx_channel *channel) MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_INSTANCE, channel->channel); /* INIT_EVQ expects index in vector table, not absolute */ MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_IRQ_NUM, channel->channel); - MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, - INIT_EVQ_IN_FLAG_INTERRUPTING, 1, - INIT_EVQ_IN_FLAG_RX_MERGE, 1, - INIT_EVQ_IN_FLAG_TX_MERGE, 1, - INIT_EVQ_IN_FLAG_CUT_THRU, !supports_rx_merge); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_MODE, MC_CMD_INIT_EVQ_IN_TMR_MODE_DIS); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_TMR_LOAD, 0); @@ -2574,6 +2683,27 @@ static int efx_ef10_ev_init(struct efx_channel *channel) MC_CMD_INIT_EVQ_IN_COUNT_MODE_DIS); MCDI_SET_DWORD(inbuf, INIT_EVQ_IN_COUNT_THRSHLD, 0); + if (nic_data->datapath_caps2 & + 1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN) { + /* Use the new generic approach to specifying event queue + * configuration, requesting lower latency or higher throughput. + * The options that actually get used appear in the output. + */ + MCDI_POPULATE_DWORD_2(inbuf, INIT_EVQ_V2_IN_FLAGS, + INIT_EVQ_V2_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_V2_IN_FLAG_TYPE, + MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO); + } else { + bool cut_thru = !(nic_data->datapath_caps & + 1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN); + + MCDI_POPULATE_DWORD_4(inbuf, INIT_EVQ_IN_FLAGS, + INIT_EVQ_IN_FLAG_INTERRUPTING, 1, + INIT_EVQ_IN_FLAG_RX_MERGE, 1, + INIT_EVQ_IN_FLAG_TX_MERGE, 1, + INIT_EVQ_IN_FLAG_CUT_THRU, cut_thru); + } + dma_addr = channel->eventq.buf.dma_addr; for (i = 0; i < entries; ++i) { MCDI_SET_ARRAY_QWORD(inbuf, INIT_EVQ_IN_DMA_ADDR, i, dma_addr); @@ -2584,6 +2714,13 @@ static int efx_ef10_ev_init(struct efx_channel *channel) rc = efx_mcdi_rpc(efx, MC_CMD_INIT_EVQ, inbuf, inlen, outbuf, sizeof(outbuf), &outlen); + + if (outlen >= MC_CMD_INIT_EVQ_V2_OUT_LEN) + netif_dbg(efx, drv, efx->net_dev, + "Channel %d using event queue flags %08x\n", + channel->channel, + MCDI_DWORD(outbuf, INIT_EVQ_V2_OUT_FLAGS)); + /* IRQ return is ignored */ if (channel->channel || rc) return rc; @@ -2591,8 +2728,8 @@ static int efx_ef10_ev_init(struct efx_channel *channel) /* Successfully created event queue on channel 0 */ rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled); if (rc == -ENOSYS) { - /* GET_WORKAROUNDS was implemented before the bug26807 - * workaround, thus the latter must be unavailable in this fw + /* GET_WORKAROUNDS was implemented before this workaround, + * thus it must be unavailable in this firmware. */ nic_data->workaround_26807 = false; rc = 0; diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 14b821b1c880..3cf3557106c2 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -281,6 +281,27 @@ static int efx_process_channel(struct efx_channel *channel, int budget) * NAPI guarantees serialisation of polls of the same device, which * provides the guarantee required by efx_process_channel(). */ +static void efx_update_irq_mod(struct efx_nic *efx, struct efx_channel *channel) +{ + int step = efx->irq_mod_step_us; + + if (channel->irq_mod_score < irq_adapt_low_thresh) { + if (channel->irq_moderation_us > step) { + channel->irq_moderation_us -= step; + efx->type->push_irq_moderation(channel); + } + } else if (channel->irq_mod_score > irq_adapt_high_thresh) { + if (channel->irq_moderation_us < + efx->irq_rx_moderation_us) { + channel->irq_moderation_us += step; + efx->type->push_irq_moderation(channel); + } + } + + channel->irq_count = 0; + channel->irq_mod_score = 0; +} + static int efx_poll(struct napi_struct *napi, int budget) { struct efx_channel *channel = @@ -301,22 +322,7 @@ static int efx_poll(struct napi_struct *napi, int budget) if (efx_channel_has_rx_queue(channel) && efx->irq_rx_adaptive && unlikely(++channel->irq_count == 1000)) { - if (unlikely(channel->irq_mod_score < - irq_adapt_low_thresh)) { - if (channel->irq_moderation > 1) { - channel->irq_moderation -= 1; - efx->type->push_irq_moderation(channel); - } - } else if (unlikely(channel->irq_mod_score > - irq_adapt_high_thresh)) { - if (channel->irq_moderation < - efx->irq_rx_moderation) { - channel->irq_moderation += 1; - efx->type->push_irq_moderation(channel); - } - } - channel->irq_count = 0; - channel->irq_mod_score = 0; + efx_update_irq_mod(efx, channel); } efx_filter_rfs_expire(channel); @@ -1703,6 +1709,7 @@ static int efx_probe_nic(struct efx_nic *efx) netif_set_real_num_rx_queues(efx->net_dev, efx->n_rx_channels); /* Initialise the interrupt moderation settings */ + efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); efx_init_irq_moderation(efx, tx_irq_mod_usec, rx_irq_mod_usec, true, true); @@ -1949,14 +1956,21 @@ static void efx_remove_all(struct efx_nic *efx) * Interrupt moderation * **************************************************************************/ - -static unsigned int irq_mod_ticks(unsigned int usecs, unsigned int quantum_ns) +unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs) { if (usecs == 0) return 0; - if (usecs * 1000 < quantum_ns) + if (usecs * 1000 < efx->timer_quantum_ns) return 1; /* never round down to 0 */ - return usecs * 1000 / quantum_ns; + return usecs * 1000 / efx->timer_quantum_ns; +} + +unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks) +{ + /* We must round up when converting ticks to microseconds + * because we round down when converting the other way. + */ + return DIV_ROUND_UP(ticks * efx->timer_quantum_ns, 1000); } /* Set interrupt moderation parameters */ @@ -1965,21 +1979,16 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, bool rx_may_override_tx) { struct efx_channel *channel; - unsigned int irq_mod_max = DIV_ROUND_UP(efx->type->timer_period_max * - efx->timer_quantum_ns, - 1000); - unsigned int tx_ticks; - unsigned int rx_ticks; + unsigned int timer_max_us; EFX_ASSERT_RESET_SERIALISED(efx); - if (tx_usecs > irq_mod_max || rx_usecs > irq_mod_max) + timer_max_us = efx->timer_max_ns / 1000; + + if (tx_usecs > timer_max_us || rx_usecs > timer_max_us) return -EINVAL; - tx_ticks = irq_mod_ticks(tx_usecs, efx->timer_quantum_ns); - rx_ticks = irq_mod_ticks(rx_usecs, efx->timer_quantum_ns); - - if (tx_ticks != rx_ticks && efx->tx_channel_offset == 0 && + if (tx_usecs != rx_usecs && efx->tx_channel_offset == 0 && !rx_may_override_tx) { netif_err(efx, drv, efx->net_dev, "Channels are shared. " "RX and TX IRQ moderation must be equal\n"); @@ -1987,12 +1996,12 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, } efx->irq_rx_adaptive = rx_adaptive; - efx->irq_rx_moderation = rx_ticks; + efx->irq_rx_moderation_us = rx_usecs; efx_for_each_channel(channel, efx) { if (efx_channel_has_rx_queue(channel)) - channel->irq_moderation = rx_ticks; + channel->irq_moderation_us = rx_usecs; else if (efx_channel_has_tx_queues(channel)) - channel->irq_moderation = tx_ticks; + channel->irq_moderation_us = tx_usecs; } return 0; @@ -2001,26 +2010,21 @@ int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, void efx_get_irq_moderation(struct efx_nic *efx, unsigned int *tx_usecs, unsigned int *rx_usecs, bool *rx_adaptive) { - /* We must round up when converting ticks to microseconds - * because we round down when converting the other way. - */ - *rx_adaptive = efx->irq_rx_adaptive; - *rx_usecs = DIV_ROUND_UP(efx->irq_rx_moderation * - efx->timer_quantum_ns, - 1000); + *rx_usecs = efx->irq_rx_moderation_us; /* If channels are shared between RX and TX, so is IRQ * moderation. Otherwise, IRQ moderation is the same for all * TX channels and is not adaptive. */ - if (efx->tx_channel_offset == 0) + if (efx->tx_channel_offset == 0) { *tx_usecs = *rx_usecs; - else - *tx_usecs = DIV_ROUND_UP( - efx->channel[efx->tx_channel_offset]->irq_moderation * - efx->timer_quantum_ns, - 1000); + } else { + struct efx_channel *tx_channel; + + tx_channel = efx->channel[efx->tx_channel_offset]; + *tx_usecs = tx_channel->irq_moderation_us; + } } /************************************************************************** @@ -2259,8 +2263,18 @@ static int efx_change_mtu(struct net_device *net_dev, int new_mtu) rc = efx_check_disabled(efx); if (rc) return rc; - if (new_mtu > EFX_MAX_MTU) + if (new_mtu > EFX_MAX_MTU) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too big (max: %d)\n", + new_mtu, EFX_MAX_MTU); return -EINVAL; + } + if (new_mtu < EFX_MIN_MTU) { + netif_err(efx, drv, efx->net_dev, + "Requested MTU of %d too small (min: %d)\n", + new_mtu, EFX_MIN_MTU); + return -EINVAL; + } netif_dbg(efx, drv, efx->net_dev, "changing MTU to %d\n", new_mtu); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index c3ae739e9c7a..342ae16e1f2d 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -204,6 +204,8 @@ int efx_try_recovery(struct efx_nic *efx); /* Global */ void efx_schedule_reset(struct efx_nic *efx, enum reset_type type); +unsigned int efx_usecs_to_ticks(struct efx_nic *efx, unsigned int usecs); +unsigned int efx_ticks_to_usecs(struct efx_nic *efx, unsigned int ticks); int efx_init_irq_moderation(struct efx_nic *efx, unsigned int tx_usecs, unsigned int rx_usecs, bool rx_adaptive, bool rx_may_override_tx); diff --git a/drivers/net/ethernet/sfc/falcon.c b/drivers/net/ethernet/sfc/falcon.c index d790cb8d9db3..1a7092602aec 100644 --- a/drivers/net/ethernet/sfc/falcon.c +++ b/drivers/net/ethernet/sfc/falcon.c @@ -378,12 +378,15 @@ static void falcon_push_irq_moderation(struct efx_channel *channel) struct efx_nic *efx = channel->efx; /* Set timer register */ - if (channel->irq_moderation) { + if (channel->irq_moderation_us) { + unsigned int ticks; + + ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us); EFX_POPULATE_DWORD_2(timer_cmd, FRF_AB_TC_TIMER_MODE, FFE_BB_TIMER_MODE_INT_HLDOFF, FRF_AB_TC_TIMER_VAL, - channel->irq_moderation - 1); + ticks - 1); } else { EFX_POPULATE_DWORD_2(timer_cmd, FRF_AB_TC_TIMER_MODE, @@ -2373,6 +2376,8 @@ static int falcon_probe_nic(struct efx_nic *efx) EFX_MAX_CHANNELS); efx->max_tx_channels = efx->max_channels; efx->timer_quantum_ns = 4968; /* 621 cycles */ + efx->timer_max_ns = efx->type->timer_period_max * + efx->timer_quantum_ns; /* Initialise I2C adapter */ board = falcon_board(efx); diff --git a/drivers/net/ethernet/sfc/falcon_boards.c b/drivers/net/ethernet/sfc/falcon_boards.c index 1736f4b806af..f6883b2b5da3 100644 --- a/drivers/net/ethernet/sfc/falcon_boards.c +++ b/drivers/net/ethernet/sfc/falcon_boards.c @@ -64,7 +64,7 @@ #define LM87_ALARM_TEMP_INT 0x10 #define LM87_ALARM_TEMP_EXT1 0x20 -#if defined(CONFIG_SENSORS_LM87) || defined(CONFIG_SENSORS_LM87_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM87) static int efx_poke_lm87(struct i2c_client *client, const u8 *reg_values) { @@ -455,7 +455,7 @@ static int sfe4001_init(struct efx_nic *efx) struct falcon_board *board = falcon_board(efx); int rc; -#if defined(CONFIG_SENSORS_LM90) || defined(CONFIG_SENSORS_LM90_MODULE) +#if IS_ENABLED(CONFIG_SENSORS_LM90) board->hwmon_client = i2c_new_device(&board->i2c_adap, &sfe4001_hwmon_info); #else diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 4c83739d158f..4762ec444cb8 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -1477,9 +1477,10 @@ void efx_farch_irq_disable_master(struct efx_nic *efx) * Interrupt must already have been enabled, otherwise nasty things * may happen. */ -void efx_farch_irq_test_generate(struct efx_nic *efx) +int efx_farch_irq_test_generate(struct efx_nic *efx) { efx_farch_interrupts(efx, true, true); + return 0; } /* Process a fatal interrupt diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c index d28e7dd8fa3c..241520943ada 100644 --- a/drivers/net/ethernet/sfc/mcdi.c +++ b/drivers/net/ethernet/sfc/mcdi.c @@ -548,7 +548,10 @@ static bool efx_mcdi_complete_async(struct efx_mcdi_iface *mcdi, bool timeout) efx_mcdi_display_error(efx, async->cmd, async->inlen, errbuf, err_len, rc); } - async->complete(efx, async->cookie, rc, outbuf, data_len); + + if (async->complete) + async->complete(efx, async->cookie, rc, outbuf, + min(async->outlen, data_len)); kfree(async); efx_mcdi_release(mcdi); @@ -1153,7 +1156,8 @@ void efx_mcdi_flush_async(struct efx_nic *efx) * acquired locks in the wrong order. */ list_for_each_entry_safe(async, next, &mcdi->async_list, list) { - async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); + if (async->complete) + async->complete(efx, async->cookie, -ENETDOWN, NULL, 0); list_del(&async->list); kfree(async); } diff --git a/drivers/net/ethernet/sfc/mcdi_pcol.h b/drivers/net/ethernet/sfc/mcdi_pcol.h index c9a5b003caaf..ccceafc15896 100644 --- a/drivers/net/ethernet/sfc/mcdi_pcol.h +++ b/drivers/net/ethernet/sfc/mcdi_pcol.h @@ -2645,16 +2645,20 @@ #define MC_CMD_POLL_BIST_MEM_BUS_MC 0x0 /* enum: CSR IREG bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_CSR 0x1 -/* enum: RX DPCPU bus. */ +/* enum: RX0 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX 0x2 /* enum: TX0 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX0 0x3 /* enum: TX1 DPCPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_TX1 0x4 -/* enum: RX DICPU bus. */ +/* enum: RX0 DICPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX 0x5 /* enum: TX DICPU bus. */ #define MC_CMD_POLL_BIST_MEM_BUS_DICPU_TX 0x6 +/* enum: RX1 DPCPU bus. */ +#define MC_CMD_POLL_BIST_MEM_BUS_DPCPU_RX1 0x7 +/* enum: RX1 DICPU bus. */ +#define MC_CMD_POLL_BIST_MEM_BUS_DICPU_RX1 0x8 /* Pattern written to RAM / register */ #define MC_CMD_POLL_BIST_OUT_MEM_EXPECT_OFST 16 /* Actual value read from RAM / register */ @@ -3612,6 +3616,8 @@ #define MC_CMD_NVRAM_INFO_OUT_PROTECTED_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_TLV_LBN 1 #define MC_CMD_NVRAM_INFO_OUT_TLV_WIDTH 1 +#define MC_CMD_NVRAM_INFO_OUT_CMAC_LBN 6 +#define MC_CMD_NVRAM_INFO_OUT_CMAC_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_A_B_LBN 7 #define MC_CMD_NVRAM_INFO_OUT_A_B_WIDTH 1 #define MC_CMD_NVRAM_INFO_OUT_PHYSDEV_OFST 16 @@ -4389,6 +4395,8 @@ * the command will fail with MC_CMD_ERR_FILTERS_PRESENT. */ #define MC_CMD_WORKAROUND_BUG26807 0x6 +/* enum: Bug 61265 work around (broken EVQ TMR writes). */ +#define MC_CMD_WORKAROUND_BUG61265 0x7 /* 0 = disable the workaround indicated by TYPE; any non-zero value = enable * the workaround */ @@ -4413,7 +4421,6 @@ * (GET_PHY_CFG_OUT_MEDIA_TYPE); the valid 'page number' input values, and the * output data, are interpreted on a per-type basis. For SFP+: PAGE=0 or 1 * returns a 128-byte block read from module I2C address 0xA0 offset 0 or 0x80. - * Anything else: currently undefined. Locks required: None. Return code: 0. */ #define MC_CMD_GET_PHY_MEDIA_INFO 0x4b @@ -5479,6 +5486,8 @@ #define LICENSED_V3_FEATURES_TX_SNIFF_WIDTH 1 #define LICENSED_V3_FEATURES_PROXY_FILTER_OPS_LBN 8 #define LICENSED_V3_FEATURES_PROXY_FILTER_OPS_WIDTH 1 +#define LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_LBN 9 +#define LICENSED_V3_FEATURES_EVENT_CUT_THROUGH_WIDTH 1 #define LICENSED_V3_FEATURES_MASK_LBN 0 #define LICENSED_V3_FEATURES_MASK_WIDTH 64 @@ -5634,6 +5643,109 @@ /* Only valid if INTRFLAG was true */ #define MC_CMD_INIT_EVQ_OUT_IRQ_OFST 0 +/* MC_CMD_INIT_EVQ_V2_IN msgrequest */ +#define MC_CMD_INIT_EVQ_V2_IN_LENMIN 44 +#define MC_CMD_INIT_EVQ_V2_IN_LENMAX 548 +#define MC_CMD_INIT_EVQ_V2_IN_LEN(num) (36+8*(num)) +/* Size, in entries */ +#define MC_CMD_INIT_EVQ_V2_IN_SIZE_OFST 0 +/* Desired instance. Must be set to a specific instance, which is a function + * local queue index. + */ +#define MC_CMD_INIT_EVQ_V2_IN_INSTANCE_OFST 4 +/* The initial timer value. The load value is ignored if the timer mode is DIS. + */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_LOAD_OFST 8 +/* The reload value is ignored in one-shot modes */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_RELOAD_OFST 12 +/* tbd */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAGS_OFST 16 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_LBN 0 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INTERRUPTING_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_LBN 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RPTR_DOS_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_LBN 2 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_INT_ARMD_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_LBN 3 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_CUT_THRU_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_LBN 4 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_RX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_LBN 5 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_LBN 6 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_USE_TIMER_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LBN 7 +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_WIDTH 4 +/* enum: All initialisation flags specified by host. */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_MANUAL 0x0 +/* enum: MEDFORD only. Certain initialisation flags specified by host may be + * over-ridden by firmware based on licenses and firmware variant in order to + * provide the lowest latency achievable. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_LOW_LATENCY 0x1 +/* enum: MEDFORD only. Certain initialisation flags specified by host may be + * over-ridden by firmware based on licenses and firmware variant in order to + * provide the best throughput achievable. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_THROUGHPUT 0x2 +/* enum: MEDFORD only. Certain initialisation flags may be over-ridden by + * firmware based on licenses and firmware variant. See + * MC_CMD_INIT_EVQ_V2/MC_CMD_INIT_EVQ_V2_OUT/FLAGS for list of affected flags. + */ +#define MC_CMD_INIT_EVQ_V2_IN_FLAG_TYPE_AUTO 0x3 +#define MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_OFST 20 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_MODE_DIS 0x0 +/* enum: Immediate */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_IMMED_START 0x1 +/* enum: Triggered */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_TRIG_START 0x2 +/* enum: Hold-off */ +#define MC_CMD_INIT_EVQ_V2_IN_TMR_INT_HLDOFF 0x3 +/* Target EVQ for wakeups if in wakeup mode. */ +#define MC_CMD_INIT_EVQ_V2_IN_TARGET_EVQ_OFST 24 +/* Target interrupt if in interrupting mode (note union with target EVQ). Use + * MC_CMD_RESOURCE_INSTANCE_ANY unless a specific one required for test + * purposes. + */ +#define MC_CMD_INIT_EVQ_V2_IN_IRQ_NUM_OFST 24 +/* Event Counter Mode. */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_OFST 28 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_DIS 0x0 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RX 0x1 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_TX 0x2 +/* enum: Disabled */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_MODE_RXTX 0x3 +/* Event queue packet count threshold. */ +#define MC_CMD_INIT_EVQ_V2_IN_COUNT_THRSHLD_OFST 32 +/* 64-bit address of 4k of 4k-aligned host memory buffer */ +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_OFST 36 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LEN 8 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_LO_OFST 36 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_HI_OFST 40 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MINNUM 1 +#define MC_CMD_INIT_EVQ_V2_IN_DMA_ADDR_MAXNUM 64 + +/* MC_CMD_INIT_EVQ_V2_OUT msgresponse */ +#define MC_CMD_INIT_EVQ_V2_OUT_LEN 8 +/* Only valid if INTRFLAG was true */ +#define MC_CMD_INIT_EVQ_V2_OUT_IRQ_OFST 0 +/* Actual configuration applied on the card */ +#define MC_CMD_INIT_EVQ_V2_OUT_FLAGS_OFST 4 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_LBN 0 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_CUT_THRU_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_LBN 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_LBN 2 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_TX_MERGE_WIDTH 1 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_LBN 3 +#define MC_CMD_INIT_EVQ_V2_OUT_FLAG_RXQ_FORCE_EV_MERGING_WIDTH 1 + /* QUEUE_CRC_MODE structuredef */ #define QUEUE_CRC_MODE_LEN 1 #define QUEUE_CRC_MODE_MODE_LBN 0 @@ -5697,8 +5809,8 @@ #define MC_CMD_INIT_RXQ_IN_FLAG_PREFIX_WIDTH 1 #define MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_LBN 9 #define MC_CMD_INIT_RXQ_IN_FLAG_DISABLE_SCATTER_WIDTH 1 -#define MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_LBN 10 -#define MC_CMD_INIT_RXQ_IN_FLAG_FORCE_EV_MERGING_WIDTH 1 +#define MC_CMD_INIT_RXQ_IN_UNUSED_LBN 10 +#define MC_CMD_INIT_RXQ_IN_UNUSED_WIDTH 1 /* Owner ID to use if in buffer mode (zero if physical) */ #define MC_CMD_INIT_RXQ_IN_OWNER_ID_OFST 20 /* The port ID associated with the v-adaptor which should contain this DMAQ. */ @@ -7854,6 +7966,20 @@ #define MC_CMD_GET_CAPABILITIES_V2_OUT_EVENT_CUT_THROUGH_WIDTH 1 #define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_LBN 4 #define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_VFIFO_ULL_MODE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_RX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V2_OUT_TX_SNIFF_WIDTH 1 /* Number of FATSOv2 contexts per datapath supported by this NIC. Not present * on older firmware (check the length). */ @@ -7910,6 +8036,288 @@ #define MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_OFST 70 #define MC_CMD_GET_CAPABILITIES_V2_OUT_SIZE_PIO_BUFF_LEN 2 +/* MC_CMD_GET_CAPABILITIES_V3_OUT msgresponse */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_LEN 73 +/* First word of flags. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS1_OFST 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_LBN 3 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VPORT_RECONFIGURE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_LBN 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_STRIPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_QUERY_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_PORT_VLAN_RESTRICT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_DRV_ATTACH_PREBOOT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_FORCE_EVENT_MERGING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SET_MAC_ENHANCED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_UNKNOWN_UCAST_DST_FILTER_ALWAYS_MULTI_RECIPIENT_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VADAPTOR_PERMIT_SET_MAC_WHEN_FILTERS_INSTALLED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_SECURITY_FILTERING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_LBN 13 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ADDITIONAL_RSS_MODES_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_LBN 14 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_QBB_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_LBN 15 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_VAR_BUFFERS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_LBN 16 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_RSS_LIMITED_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_LBN 17 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PACKED_STREAM_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_LBN 18 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_INCLUDE_FCS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_LBN 19 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VLAN_INSERTION_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_LBN 20 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_VLAN_STRIPPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_LBN 21 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_LBN 22 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_0_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_LBN 23 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_PREFIX_LEN_14_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_LBN 24 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_LBN 25 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_BATCHING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_LBN 26 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MCAST_FILTER_CHAINING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_LBN 27 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PM_AND_RXDP_COUNTERS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_LBN 28 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DISABLE_SCATTER_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_LBN 29 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MCAST_UDP_LOOPBACK_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_LBN 30 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVB_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_LBN 31 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VXLAN_NVGRE_WIDTH 1 +/* RxDPCPU firmware id. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_OFST 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DPCPU_FW_ID_LEN 2 +/* enum: Standard RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP 0x0 +/* enum: Low latency RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_LOW_LATENCY 0x1 +/* enum: Packed stream RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_PACKED_STREAM 0x2 +/* enum: BIST RXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_BIST 0x10a +/* enum: RXDP Test firmware image 1 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_CUT_THROUGH 0x101 +/* enum: RXDP Test firmware image 2 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD 0x102 +/* enum: RXDP Test firmware image 3 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_TO_MC_STORE_FORWARD_FIRST 0x103 +/* enum: RXDP Test firmware image 4 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_EVERY_EVENT_BATCHABLE 0x104 +/* enum: RXDP Test firmware image 5 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_BACKPRESSURE 0x105 +/* enum: RXDP Test firmware image 6 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_PACKET_EDITS 0x106 +/* enum: RXDP Test firmware image 7 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_RX_HDR_SPLIT 0x107 +/* enum: RXDP Test firmware image 8 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DISABLE_DL 0x108 +/* enum: RXDP Test firmware image 9 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXDP_TEST_FW_DOORBELL_DELAY 0x10b +/* TxDPCPU firmware id. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_OFST 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DPCPU_FW_ID_LEN 2 +/* enum: Standard TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP 0x0 +/* enum: Low latency TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_LOW_LATENCY 0x1 +/* enum: High packet rate TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_HIGH_PACKET_RATE 0x3 +/* enum: BIST TXDP firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_BIST 0x12d +/* enum: TXDP Test firmware image 1 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_TSO_EDIT 0x101 +/* enum: TXDP Test firmware image 2 */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_PACKET_EDITS 0x102 +/* enum: TXDP CSR bus test firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXDP_TEST_FW_CSR 0x103 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_OFST 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_REV_WIDTH 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_VERSION_TYPE_WIDTH 4 +/* enum: reserved value - do not use (may indicate alternative interpretation + * of REV field in future) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RESERVED 0x0 +/* enum: Trivial RX PD firmware for early Huntington development (Huntington + * development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_FIRST_PKT 0x1 +/* enum: RX PD firmware with approximately Siena-compatible behaviour + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT 0x2 +/* enum: Virtual switching (full feature) RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_VSWITCH 0x3 +/* enum: siena_compat variant RX PD firmware using PM rather than MAC + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_SIENA_COMPAT_PM 0x4 +/* enum: Low latency RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LOW_LATENCY 0x5 +/* enum: Packed stream RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_PACKED_STREAM 0x6 +/* enum: RX PD firmware handling layer 2 only for high packet rate performance + * tests (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine RX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_RULES_ENGINE 0x8 +/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe +/* enum: RX PD firmware parsing but not filtering network overlay tunnel + * encapsulations (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RXPD_FW_TYPE_TESTFW_ENCAP_PARSING_ONLY 0xf +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_OFST 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_REV_WIDTH 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_LBN 12 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_VERSION_TYPE_WIDTH 4 +/* enum: reserved value - do not use (may indicate alternative interpretation + * of REV field in future) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RESERVED 0x0 +/* enum: Trivial TX PD firmware for early Huntington development (Huntington + * development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_FIRST_PKT 0x1 +/* enum: TX PD firmware with approximately Siena-compatible behaviour + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT 0x2 +/* enum: Virtual switching (full feature) TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_VSWITCH 0x3 +/* enum: siena_compat variant TX PD firmware using PM rather than MAC + * (Huntington development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_SIENA_COMPAT_PM 0x4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LOW_LATENCY 0x5 /* enum */ +/* enum: TX PD firmware handling layer 2 only for high packet rate performance + * tests (Medford development only) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_LAYER2_PERF 0x7 +/* enum: Rules engine TX PD production firmware */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_RULES_ENGINE 0x8 +/* enum: RX PD firmware for GUE parsing prototype (Medford development only) */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TXPD_FW_TYPE_TESTFW_GUE_PROTOTYPE 0xe +/* Hardware capabilities of NIC */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_HW_CAPABILITIES_OFST 12 +/* Licensed capabilities */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_LICENSE_CAPABILITIES_OFST 16 +/* Second word of flags. Not present on older firmware (check the length). */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_FLAGS2_OFST 20 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_LBN 0 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_LBN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_ENCAP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_LBN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVQ_TIMER_CTRL_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_LBN 3 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_EVENT_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_LBN 4 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_CUT_THROUGH_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_LBN 5 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_VFIFO_ULL_MODE_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_LBN 6 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_MAC_STATS_40G_TX_SIZE_BINS_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_LBN 7 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INIT_EVQ_V2_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_LBN 8 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_MAC_TIMESTAMPING_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_LBN 9 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TIMESTAMP_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_LBN 10 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_SNIFF_WIDTH 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_LBN 11 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_SNIFF_WIDTH 1 +/* Number of FATSOv2 contexts per datapath supported by this NIC. Not present + * on older firmware (check the length). + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_OFST 24 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_TSO_V2_N_CONTEXTS_LEN 2 +/* One byte per PF containing the number of the external port assigned to this + * PF, indexed by PF number. Special values indicate that a PF is either not + * present or not assigned. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_OFST 26 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_LEN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PFS_TO_PORTS_ASSIGNMENT_NUM 16 +/* enum: The caller is not permitted to access information on this PF. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff +/* enum: PF does not exist. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe +/* enum: PF does exist but is not assigned to any external port. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_ASSIGNED 0xfd +/* enum: This value indicates that PF is assigned, but it cannot be expressed + * in this field. It is intended for a possible future situation where a more + * complex scheme of PFs to ports mapping is being used. The future driver + * should look for a new field supporting the new scheme. The current/old + * driver should treat this value as PF_NOT_ASSIGNED. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_INCOMPATIBLE_ASSIGNMENT 0xfc +/* One byte per PF containing the number of its VFs, indexed by PF number. A + * special value indicates that a PF is not present. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_OFST 42 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_LEN 1 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VFS_PER_PF_NUM 16 +/* enum: The caller is not permitted to access information on this PF. */ +/* MC_CMD_GET_CAPABILITIES_V3_OUT_ACCESS_NOT_PERMITTED 0xff */ +/* enum: PF does not exist. */ +/* MC_CMD_GET_CAPABILITIES_V3_OUT_PF_NOT_PRESENT 0xfe */ +/* Number of VIs available for each external port */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_OFST 58 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_LEN 2 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_VIS_PER_PORT_NUM 4 +/* Size of RX descriptor cache expressed as binary logarithm The actual size + * equals (2 ^ RX_DESC_CACHE_SIZE) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_OFST 66 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_RX_DESC_CACHE_SIZE_LEN 1 +/* Size of TX descriptor cache expressed as binary logarithm The actual size + * equals (2 ^ TX_DESC_CACHE_SIZE) + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_OFST 67 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_TX_DESC_CACHE_SIZE_LEN 1 +/* Total number of available PIO buffers */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_OFST 68 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_NUM_PIO_BUFFS_LEN 2 +/* Size of a single PIO buffer */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_OFST 70 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_SIZE_PIO_BUFF_LEN 2 +/* On chips later than Medford the amount of address space assigned to each VI + * is configurable. This is a global setting that the driver must query to + * discover the VI to address mapping. Cut-through PIO (CTPIO) in not available + * with 8k VI windows. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_OFST 72 +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_LEN 1 +/* enum: Each VI occupies 8k as on Huntington and Medford. PIO is at offset 4k. + * CTPIO is not mapped. + */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_8K 0x0 +/* enum: Each VI occupies 16k. PIO is at offset 4k. CTPIO is at offset 12k. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_16K 0x1 +/* enum: Each VI occupies 64k. PIO is at offset 4k. CTPIO is at offset 12k. */ +#define MC_CMD_GET_CAPABILITIES_V3_OUT_VI_WINDOW_MODE_64K 0x2 + /***********************************/ /* MC_CMD_V2_EXTN @@ -9026,7 +9434,7 @@ */ #define MC_CMD_GET_RXDP_CONFIG 0xc2 -#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_ADMIN +#define MC_CMD_0xc2_PRIVILEGE_CTG SRIOV_CTG_GENERAL /* MC_CMD_GET_RXDP_CONFIG_IN msgrequest */ #define MC_CMD_GET_RXDP_CONFIG_IN_LEN 0 @@ -10125,7 +10533,9 @@ * that this operation returns a zero-length response */ #define MC_CMD_LICENSING_V3_IN_OP_UPDATE_LICENSE 0x0 -/* enum: report counts of installed licenses */ +/* enum: report counts of installed licenses Returns EAGAIN if license + * processing (updating) has been started but not yet completed. + */ #define MC_CMD_LICENSING_V3_IN_OP_REPORT_LICENSE 0x1 /* MC_CMD_LICENSING_V3_OUT msgresponse */ @@ -10763,6 +11173,8 @@ #define MC_CMD_GET_WORKAROUNDS_OUT_BUG42008 0x20 /* enum: Bug 26807 features present in firmware (multicast filter chaining) */ #define MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 0x40 +/* enum: Bug 61265 work around (broken EVQ TMR writes). */ +#define MC_CMD_GET_WORKAROUNDS_OUT_BUG61265 0x80 /***********************************/ @@ -11280,22 +11692,110 @@ #define MC_CMD_0x118_PRIVILEGE_CTG SRIOV_CTG_ADMIN /* MC_CMD_RX_BALANCING_IN msgrequest */ -#define MC_CMD_RX_BALANCING_IN_LEN 4 +#define MC_CMD_RX_BALANCING_IN_LEN 16 /* The RX port whose upconverter table will be modified */ #define MC_CMD_RX_BALANCING_IN_PORT_OFST 0 -#define MC_CMD_RX_BALANCING_IN_PORT_LEN 1 /* The VLAN priority associated to the table index and vFIFO */ -#define MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 1 -#define MC_CMD_RX_BALANCING_IN_PRIORITY_LEN 1 +#define MC_CMD_RX_BALANCING_IN_PRIORITY_OFST 4 /* The resulting bit of SRC^DST for indexing the table */ -#define MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 2 -#define MC_CMD_RX_BALANCING_IN_SRC_DST_LEN 1 +#define MC_CMD_RX_BALANCING_IN_SRC_DST_OFST 8 /* The RX engine to which the vFIFO in the table entry will point to */ -#define MC_CMD_RX_BALANCING_IN_ENG_OFST 3 -#define MC_CMD_RX_BALANCING_IN_ENG_LEN 1 +#define MC_CMD_RX_BALANCING_IN_ENG_OFST 12 /* MC_CMD_RX_BALANCING_OUT msgresponse */ #define MC_CMD_RX_BALANCING_OUT_LEN 0 +/***********************************/ +/* MC_CMD_SET_EVQ_TMR + * Update the timer load, timer reload and timer mode values for a given EVQ. + * The requested timer values (in TMR_LOAD_REQ_NS and TMR_RELOAD_REQ_NS) will + * be rounded up to the granularity supported by the hardware, then truncated + * to the range supported by the hardware. The resulting value after the + * rounding and truncation will be returned to the caller (in TMR_LOAD_ACT_NS + * and TMR_RELOAD_ACT_NS). + */ +#define MC_CMD_SET_EVQ_TMR 0x120 + +#define MC_CMD_0x120_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_SET_EVQ_TMR_IN msgrequest */ +#define MC_CMD_SET_EVQ_TMR_IN_LEN 16 +/* Function-relative queue instance */ +#define MC_CMD_SET_EVQ_TMR_IN_INSTANCE_OFST 0 +/* Requested value for timer load (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_LOAD_REQ_NS_OFST 4 +/* Requested value for timer reload (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_RELOAD_REQ_NS_OFST 8 +/* Timer mode. Meanings as per EVQ_TMR_REG.TC_TIMER_VAL */ +#define MC_CMD_SET_EVQ_TMR_IN_TMR_MODE_OFST 12 +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_DIS 0x0 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_IMMED_START 0x1 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_TRIG_START 0x2 /* enum */ +#define MC_CMD_SET_EVQ_TMR_IN_TIMER_MODE_INT_HLDOFF 0x3 /* enum */ + +/* MC_CMD_SET_EVQ_TMR_OUT msgresponse */ +#define MC_CMD_SET_EVQ_TMR_OUT_LEN 8 +/* Actual value for timer load (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_OUT_TMR_LOAD_ACT_NS_OFST 0 +/* Actual value for timer reload (in nanoseconds) */ +#define MC_CMD_SET_EVQ_TMR_OUT_TMR_RELOAD_ACT_NS_OFST 4 + + +/***********************************/ +/* MC_CMD_GET_EVQ_TMR_PROPERTIES + * Query properties about the event queue timers. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES 0x122 + +#define MC_CMD_0x122_PRIVILEGE_CTG SRIOV_CTG_GENERAL + +/* MC_CMD_GET_EVQ_TMR_PROPERTIES_IN msgrequest */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_IN_LEN 0 + +/* MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT msgresponse */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_LEN 36 +/* Reserved for future use. */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_FLAGS_OFST 0 +/* For timers updated via writes to EVQ_TMR_REG, this is the time interval (in + * nanoseconds) for each increment of the timer load/reload count. The + * requested duration of a timer is this value multiplied by the timer + * load/reload count. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_NS_PER_COUNT_OFST 4 +/* For timers updated via writes to EVQ_TMR_REG, this is the maximum value + * allowed for timer load/reload counts. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_MAX_COUNT_OFST 8 +/* For timers updated via writes to EVQ_TMR_REG, timer load/reload counts not a + * multiple of this step size will be rounded in an implementation defined + * manner. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_TMR_REG_STEP_OFST 12 +/* Maximum timer duration (in nanoseconds) for timers updated via MCDI. Only + * meaningful if MC_CMD_SET_EVQ_TMR is implemented. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_MAX_NS_OFST 16 +/* Timer durations requested via MCDI that are not a multiple of this step size + * will be rounded up. Only meaningful if MC_CMD_SET_EVQ_TMR is implemented. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_MCDI_TMR_STEP_NS_OFST 20 +/* For timers updated using the bug35388 workaround, this is the time interval + * (in nanoseconds) for each increment of the timer load/reload count. The + * requested duration of a timer is this value multiplied by the timer + * load/reload count. This field is only meaningful if the bug35388 workaround + * is enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_NS_PER_COUNT_OFST 24 +/* For timers updated using the bug35388 workaround, this is the maximum value + * allowed for timer load/reload counts. This field is only meaningful if the + * bug35388 workaround is enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_MAX_COUNT_OFST 28 +/* For timers updated using the bug35388 workaround, timer load/reload counts + * not a multiple of this step size will be rounded in an implementation + * defined manner. This field is only meaningful if the bug35388 workaround is + * enabled. + */ +#define MC_CMD_GET_EVQ_TMR_PROPERTIES_OUT_BUG35388_TMR_STEP_OFST 32 #endif /* MCDI_PCOL_H */ diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index 9ff062a36ea8..99d8c82124bb 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -76,6 +76,9 @@ /* Maximum possible MTU the driver supports */ #define EFX_MAX_MTU (9 * 1024) +/* Minimum MTU, from RFC791 (IP) */ +#define EFX_MIN_MTU 68 + /* Size of an RX scatter buffer. Small enough to pack 2 into a 4K page, * and should be a multiple of the cache line size. */ @@ -392,7 +395,7 @@ enum efx_sync_events_state { * @eventq_init: Event queue initialised flag * @enabled: Channel enabled indicator * @irq: IRQ number (MSI and MSI-X only) - * @irq_moderation: IRQ moderation value (in hardware ticks) + * @irq_moderation_us: IRQ moderation value (in microseconds) * @napi_dev: Net device used with NAPI * @napi_str: NAPI control structure * @state: state for NAPI vs busy polling @@ -433,7 +436,7 @@ struct efx_channel { bool eventq_init; bool enabled; int irq; - unsigned int irq_moderation; + unsigned int irq_moderation_us; struct net_device *napi_dev; struct napi_struct napi_str; #ifdef CONFIG_NET_RX_BUSY_POLL @@ -810,8 +813,10 @@ struct vfdi_status; * @membase: Memory BAR value * @interrupt_mode: Interrupt mode * @timer_quantum_ns: Interrupt timer quantum, in nanoseconds + * @timer_max_ns: Interrupt timer maximum value, in nanoseconds * @irq_rx_adaptive: Adaptive IRQ moderation enabled for RX event queues - * @irq_rx_moderation: IRQ moderation time for RX event queues + * @irq_rx_mod_step_us: Step size for IRQ moderation for RX event queues + * @irq_rx_moderation_us: IRQ moderation time for RX event queues * @msg_enable: Log message enable flags * @state: Device state number (%STATE_*). Serialised by the rtnl_lock. * @reset_pending: Bitmask for pending resets @@ -940,8 +945,10 @@ struct efx_nic { enum efx_int_mode interrupt_mode; unsigned int timer_quantum_ns; + unsigned int timer_max_ns; bool irq_rx_adaptive; - unsigned int irq_rx_moderation; + unsigned int irq_mod_step_us; + unsigned int irq_rx_moderation_us; u32 msg_enable; enum nic_state state; @@ -1271,7 +1278,7 @@ struct efx_nic_type { int (*mcdi_poll_reboot)(struct efx_nic *efx); void (*mcdi_reboot_detected)(struct efx_nic *efx); void (*irq_enable_master)(struct efx_nic *efx); - void (*irq_test_generate)(struct efx_nic *efx); + int (*irq_test_generate)(struct efx_nic *efx); void (*irq_disable_non_ev)(struct efx_nic *efx); irqreturn_t (*irq_handle_msi)(int irq, void *dev_id); irqreturn_t (*irq_handle_legacy)(int irq, void *dev_id); diff --git a/drivers/net/ethernet/sfc/nic.c b/drivers/net/ethernet/sfc/nic.c index 89b83e59e1dc..aa1945a858d5 100644 --- a/drivers/net/ethernet/sfc/nic.c +++ b/drivers/net/ethernet/sfc/nic.c @@ -66,11 +66,11 @@ void efx_nic_event_test_start(struct efx_channel *channel) channel->efx->type->ev_test_generate(channel); } -void efx_nic_irq_test_start(struct efx_nic *efx) +int efx_nic_irq_test_start(struct efx_nic *efx) { efx->last_irq_cpu = -1; smp_wmb(); - efx->type->irq_test_generate(efx); + return efx->type->irq_test_generate(efx); } /* Hook interrupt handler(s) diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h index 96944c3c9d14..73bee7ea332a 100644 --- a/drivers/net/ethernet/sfc/nic.h +++ b/drivers/net/ethernet/sfc/nic.h @@ -507,10 +507,13 @@ enum { * @stats: Hardware statistics * @workaround_35388: Flag: firmware supports workaround for bug 35388 * @workaround_26807: Flag: firmware supports workaround for bug 26807 + * @workaround_61265: Flag: firmware supports workaround for bug 61265 * @must_check_datapath_caps: Flag: @datapath_caps needs to be revalidated * after MC reboot * @datapath_caps: Capabilities of datapath firmware (FLAGS1 field of * %MC_CMD_GET_CAPABILITIES response) + * @datapath_caps2: Further Capabilities of datapath firmware (FLAGS2 field of + * %MC_CMD_GET_CAPABILITIES response) * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU * @vport_id: The function's vport ID, only relevant for PFs @@ -540,8 +543,10 @@ struct efx_ef10_nic_data { u64 stats[EF10_STAT_COUNT]; bool workaround_35388; bool workaround_26807; + bool workaround_61265; bool must_check_datapath_caps; u32 datapath_caps; + u32 datapath_caps2; unsigned int rx_dpcpu_fw_id; unsigned int tx_dpcpu_fw_id; unsigned int vport_id; @@ -741,12 +746,12 @@ static inline void efx_update_diff_stat(u64 *stat, u64 diff) /* Interrupts */ int efx_nic_init_interrupt(struct efx_nic *efx); -void efx_nic_irq_test_start(struct efx_nic *efx); +int efx_nic_irq_test_start(struct efx_nic *efx); void efx_nic_fini_interrupt(struct efx_nic *efx); /* Falcon/Siena interrupts */ void efx_farch_irq_enable_master(struct efx_nic *efx); -void efx_farch_irq_test_generate(struct efx_nic *efx); +int efx_farch_irq_test_generate(struct efx_nic *efx); void efx_farch_irq_disable_master(struct efx_nic *efx); irqreturn_t efx_farch_msi_interrupt(int irq, void *dev_id); irqreturn_t efx_farch_legacy_interrupt(int irq, void *dev_id); diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c index c771e0af4e06..77a5364f7a10 100644 --- a/drivers/net/ethernet/sfc/ptp.c +++ b/drivers/net/ethernet/sfc/ptp.c @@ -1269,13 +1269,13 @@ int efx_ptp_probe(struct efx_nic *efx, struct efx_channel *channel) if (IS_ERR(ptp->phc_clock)) { rc = PTR_ERR(ptp->phc_clock); goto fail3; - } - - INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); - ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); - if (!ptp->pps_workwq) { - rc = -ENOMEM; - goto fail4; + } else if (ptp->phc_clock) { + INIT_WORK(&ptp->pps_work, efx_ptp_pps_worker); + ptp->pps_workwq = create_singlethread_workqueue("sfc_pps"); + if (!ptp->pps_workwq) { + rc = -ENOMEM; + goto fail4; + } } } ptp->nic_ts_enabled = false; @@ -1306,7 +1306,7 @@ static int efx_ptp_probe_channel(struct efx_channel *channel) { struct efx_nic *efx = channel->efx; - channel->irq_moderation = 0; + channel->irq_moderation_us = 0; channel->rx_queue.core_index = 0; return efx_ptp_probe(efx, channel); diff --git a/drivers/net/ethernet/sfc/selftest.c b/drivers/net/ethernet/sfc/selftest.c index 9d78830da609..cd38b44ae23a 100644 --- a/drivers/net/ethernet/sfc/selftest.c +++ b/drivers/net/ethernet/sfc/selftest.c @@ -135,11 +135,19 @@ static int efx_test_interrupts(struct efx_nic *efx, { unsigned long timeout, wait; int cpu; + int rc; netif_dbg(efx, drv, efx->net_dev, "testing interrupts\n"); tests->interrupt = -1; - efx_nic_irq_test_start(efx); + rc = efx_nic_irq_test_start(efx); + if (rc == -ENOTSUPP) { + netif_dbg(efx, drv, efx->net_dev, + "direct interrupt testing not supported\n"); + tests->interrupt = 0; + return 0; + } + timeout = jiffies + IRQ_TIMEOUT; wait = 1; diff --git a/drivers/net/ethernet/sfc/selftest.h b/drivers/net/ethernet/sfc/selftest.h index 009dbe88f3be..32a427253a03 100644 --- a/drivers/net/ethernet/sfc/selftest.h +++ b/drivers/net/ethernet/sfc/selftest.h @@ -28,7 +28,7 @@ struct efx_loopback_self_tests { /* Efx self test results * For fields which are not counters, 1 indicates success and -1 - * indicates failure. + * indicates failure; 0 indicates test could not be run. */ struct efx_self_tests { /* online tests */ diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c index 2219b5424d2b..04ed1b4c7cd9 100644 --- a/drivers/net/ethernet/sfc/siena.c +++ b/drivers/net/ethernet/sfc/siena.c @@ -34,19 +34,24 @@ static void siena_init_wol(struct efx_nic *efx); static void siena_push_irq_moderation(struct efx_channel *channel) { + struct efx_nic *efx = channel->efx; efx_dword_t timer_cmd; - if (channel->irq_moderation) + if (channel->irq_moderation_us) { + unsigned int ticks; + + ticks = efx_usecs_to_ticks(efx, channel->irq_moderation_us); EFX_POPULATE_DWORD_2(timer_cmd, FRF_CZ_TC_TIMER_MODE, FFE_CZ_TIMER_MODE_INT_HLDOFF, FRF_CZ_TC_TIMER_VAL, - channel->irq_moderation - 1); - else + ticks - 1); + } else { EFX_POPULATE_DWORD_2(timer_cmd, FRF_CZ_TC_TIMER_MODE, FFE_CZ_TIMER_MODE_DIS, FRF_CZ_TC_TIMER_VAL, 0); + } efx_writed_page_locked(channel->efx, &timer_cmd, FR_BZ_TIMER_COMMAND_P0, channel->channel); } @@ -222,6 +227,9 @@ static int siena_probe_nvconfig(struct efx_nic *efx) efx->timer_quantum_ns = (caps & (1 << MC_CMD_CAPABILITIES_TURBO_ACTIVE_LBN)) ? 3072 : 6144; /* 768 cycles */ + efx->timer_max_ns = efx->type->timer_period_max * + efx->timer_quantum_ns; + return rc; } diff --git a/drivers/net/ethernet/sfc/sriov.c b/drivers/net/ethernet/sfc/sriov.c index 816c44689e67..9abcf4aded30 100644 --- a/drivers/net/ethernet/sfc/sriov.c +++ b/drivers/net/ethernet/sfc/sriov.c @@ -22,7 +22,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac) } int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos) + u8 qos, __be16 vlan_proto) { struct efx_nic *efx = netdev_priv(net_dev); @@ -31,6 +31,9 @@ int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, (qos & ~(VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT))) return -EINVAL; + if (vlan_proto != htons(ETH_P_8021Q)) + return -EPROTONOSUPPORT; + return efx->type->sriov_set_vf_vlan(efx, vf_i, vlan, qos); } else { return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/sfc/sriov.h b/drivers/net/ethernet/sfc/sriov.h index 400df526586d..ba1762e7f216 100644 --- a/drivers/net/ethernet/sfc/sriov.h +++ b/drivers/net/ethernet/sfc/sriov.h @@ -16,7 +16,7 @@ int efx_sriov_set_vf_mac(struct net_device *net_dev, int vf_i, u8 *mac); int efx_sriov_set_vf_vlan(struct net_device *net_dev, int vf_i, u16 vlan, - u8 qos); + u8 qos, __be16 vlan_proto); int efx_sriov_set_vf_spoofchk(struct net_device *net_dev, int vf_i, bool spoofchk); int efx_sriov_get_vf_config(struct net_device *net_dev, int vf_i, diff --git a/drivers/net/ethernet/sfc/workarounds.h b/drivers/net/ethernet/sfc/workarounds.h index 2310b75d4ec2..351cd14cb9f9 100644 --- a/drivers/net/ethernet/sfc/workarounds.h +++ b/drivers/net/ethernet/sfc/workarounds.h @@ -50,4 +50,8 @@ #define EFX_WORKAROUND_35388(efx) \ (efx_nic_rev(efx) == EFX_REV_HUNT_A0 && EFX_EF10_WORKAROUND_35388(efx)) +/* Moderation timer access must go through MCDI */ +#define EFX_EF10_WORKAROUND_61265(efx) \ + (((struct efx_ef10_nic_data *)efx->nic_data)->workaround_61265) + #endif /* EFX_WORKAROUNDS_H */ diff --git a/drivers/net/ethernet/sis/sis900.c b/drivers/net/ethernet/sis/sis900.c index 95001ee408ab..6f85276376e8 100644 --- a/drivers/net/ethernet/sis/sis900.c +++ b/drivers/net/ethernet/sis/sis900.c @@ -1426,7 +1426,7 @@ static void sis900_set_mode(struct sis900_private *sp, int speed, int duplex) rx_flags |= RxATX; } -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* Can accept Jumbo packet */ rx_flags |= RxAJAB; #endif @@ -1750,7 +1750,7 @@ static int sis900_rx(struct net_device *net_dev) data_size = rx_status & DSIZE; rx_size = data_size - CRC_SIZE; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) /* ``TOOLONG'' flag means jumbo packet received. */ if ((rx_status & TOOLONG) && data_size <= MAX_FRAME_SIZE) rx_status &= (~ ((unsigned int)TOOLONG)); diff --git a/drivers/net/ethernet/sis/sis900.h b/drivers/net/ethernet/sis/sis900.h index 7d430d322931..f0da3dc52c01 100644 --- a/drivers/net/ethernet/sis/sis900.h +++ b/drivers/net/ethernet/sis/sis900.h @@ -310,7 +310,7 @@ enum sis630_revision_id { #define CRC_SIZE 4 #define MAC_HEADER_SIZE 14 -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define MAX_FRAME_SIZE (1518 + 4) #else #define MAX_FRAME_SIZE 1518 diff --git a/drivers/net/ethernet/smsc/smc91x.c b/drivers/net/ethernet/smsc/smc91x.c index 503a3b6dce91..73212590d04a 100644 --- a/drivers/net/ethernet/smsc/smc91x.c +++ b/drivers/net/ethernet/smsc/smc91x.c @@ -2323,6 +2323,9 @@ static int smc_drv_probe(struct platform_device *pdev) } else { lp->cfg.flags |= SMC91X_USE_16BIT; } + if (!device_property_read_u32(&pdev->dev, "reg-shift", + &val)) + lp->io_shift = val; } #endif diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 4f8910b7db2e..e9b8579e6241 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -62,6 +62,7 @@ #include #include #include +#include #include "smsc911x.h" @@ -147,6 +148,9 @@ struct smsc911x_data { /* regulators */ struct regulator_bulk_data supplies[SMSC911X_NUM_SUPPLIES]; + /* Reset GPIO */ + struct gpio_desc *reset_gpiod; + /* clock */ struct clk *clk; }; @@ -438,6 +442,11 @@ static int smsc911x_request_resources(struct platform_device *pdev) netdev_err(ndev, "couldn't get regulators %d\n", ret); + /* Request optional RESET GPIO */ + pdata->reset_gpiod = devm_gpiod_get_optional(&pdev->dev, + "reset", + GPIOD_OUT_LOW); + /* Request clock */ pdata->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(pdata->clk)) diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig index 8f06a6621ab1..c732b8ce2528 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Kconfig +++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig @@ -104,6 +104,18 @@ config DWMAC_STI device driver. This driver is used on for the STi series SOCs GMAC ethernet controller. +config DWMAC_STM32 + tristate "STM32 DWMAC support" + default ARCH_STM32 + depends on OF && HAS_IOMEM + select MFD_SYSCON + ---help--- + Support for ethernet controller on STM32 SOCs. + + This selects STM32 SoC glue layer support for the stmmac + device driver. This driver is used on for the STM32 series + SOCs GMAC ethernet controller. + config DWMAC_SUNXI tristate "Allwinner GMAC support" default ARCH_SUNXI diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile index 44b630cd1755..f0c9396fa28e 100644 --- a/drivers/net/ethernet/stmicro/stmmac/Makefile +++ b/drivers/net/ethernet/stmicro/stmmac/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_DWMAC_MESON) += dwmac-meson.o obj-$(CONFIG_DWMAC_ROCKCHIP) += dwmac-rk.o obj-$(CONFIG_DWMAC_SOCFPGA) += dwmac-altr-socfpga.o obj-$(CONFIG_DWMAC_STI) += dwmac-sti.o +obj-$(CONFIG_DWMAC_STM32) += dwmac-stm32.o obj-$(CONFIG_DWMAC_SUNXI) += dwmac-sunxi.o obj-$(CONFIG_DWMAC_GENERIC) += dwmac-generic.o stmmac-platform-objs:= stmmac_platform.o diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h index 2533b91f1421..d3292c4a6eda 100644 --- a/drivers/net/ethernet/stmicro/stmmac/common.h +++ b/drivers/net/ethernet/stmicro/stmmac/common.h @@ -30,7 +30,7 @@ #include #include #include -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) +#if IS_ENABLED(CONFIG_VLAN_8021Q) #define STMMAC_VLAN_TAG_USED #include #endif diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c index 92105916ef40..3740a4417fa0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c @@ -30,6 +30,7 @@ #include #include #include +#include #include "stmmac_platform.h" @@ -301,6 +302,118 @@ static const struct rk_gmac_ops rk3288_ops = { .set_rmii_speed = rk3288_set_rmii_speed, }; +#define RK3366_GRF_SOC_CON6 0x0418 +#define RK3366_GRF_SOC_CON7 0x041c + +/* RK3366_GRF_SOC_CON6 */ +#define RK3366_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3366_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3366_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3366_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3366_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3366_GMAC_SPEED_100M GRF_BIT(7) +#define RK3366_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3366_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3366_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3366_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3366_GMAC_RMII_MODE GRF_BIT(6) +#define RK3366_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3366_GRF_SOC_CON7 */ +#define RK3366_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3366_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3366_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3366_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3366_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3366_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3366_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RGMII | + RK3366_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON7, + RK3366_GMAC_RXCLK_DLY_ENABLE | + RK3366_GMAC_TXCLK_DLY_ENABLE | + RK3366_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3366_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3366_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_PHY_INTF_SEL_RMII | RK3366_GMAC_RMII_MODE); +} + +static void rk3366_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3366_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_2_5M | + RK3366_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3366_GRF_SOC_CON6, + RK3366_GMAC_RMII_CLK_25M | + RK3366_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3366_ops = { + .set_to_rgmii = rk3366_set_to_rgmii, + .set_to_rmii = rk3366_set_to_rmii, + .set_rgmii_speed = rk3366_set_rgmii_speed, + .set_rmii_speed = rk3366_set_rmii_speed, +}; + #define RK3368_GRF_SOC_CON15 0x043c #define RK3368_GRF_SOC_CON16 0x0440 @@ -413,6 +526,118 @@ static const struct rk_gmac_ops rk3368_ops = { .set_rmii_speed = rk3368_set_rmii_speed, }; +#define RK3399_GRF_SOC_CON5 0xc214 +#define RK3399_GRF_SOC_CON6 0xc218 + +/* RK3399_GRF_SOC_CON5 */ +#define RK3399_GMAC_PHY_INTF_SEL_RGMII (GRF_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_CLR_BIT(11)) +#define RK3399_GMAC_PHY_INTF_SEL_RMII (GRF_CLR_BIT(9) | GRF_CLR_BIT(10) | \ + GRF_BIT(11)) +#define RK3399_GMAC_FLOW_CTRL GRF_BIT(8) +#define RK3399_GMAC_FLOW_CTRL_CLR GRF_CLR_BIT(8) +#define RK3399_GMAC_SPEED_10M GRF_CLR_BIT(7) +#define RK3399_GMAC_SPEED_100M GRF_BIT(7) +#define RK3399_GMAC_RMII_CLK_25M GRF_BIT(3) +#define RK3399_GMAC_RMII_CLK_2_5M GRF_CLR_BIT(3) +#define RK3399_GMAC_CLK_125M (GRF_CLR_BIT(4) | GRF_CLR_BIT(5)) +#define RK3399_GMAC_CLK_25M (GRF_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_CLK_2_5M (GRF_CLR_BIT(4) | GRF_BIT(5)) +#define RK3399_GMAC_RMII_MODE GRF_BIT(6) +#define RK3399_GMAC_RMII_MODE_CLR GRF_CLR_BIT(6) + +/* RK3399_GRF_SOC_CON6 */ +#define RK3399_GMAC_TXCLK_DLY_ENABLE GRF_BIT(7) +#define RK3399_GMAC_TXCLK_DLY_DISABLE GRF_CLR_BIT(7) +#define RK3399_GMAC_RXCLK_DLY_ENABLE GRF_BIT(15) +#define RK3399_GMAC_RXCLK_DLY_DISABLE GRF_CLR_BIT(15) +#define RK3399_GMAC_CLK_RX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 8) +#define RK3399_GMAC_CLK_TX_DL_CFG(val) HIWORD_UPDATE(val, 0x7F, 0) + +static void rk3399_set_to_rgmii(struct rk_priv_data *bsp_priv, + int tx_delay, int rx_delay) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RGMII | + RK3399_GMAC_RMII_MODE_CLR); + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON6, + RK3399_GMAC_RXCLK_DLY_ENABLE | + RK3399_GMAC_TXCLK_DLY_ENABLE | + RK3399_GMAC_CLK_RX_DL_CFG(rx_delay) | + RK3399_GMAC_CLK_TX_DL_CFG(tx_delay)); +} + +static void rk3399_set_to_rmii(struct rk_priv_data *bsp_priv) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_PHY_INTF_SEL_RMII | RK3399_GMAC_RMII_MODE); +} + +static void rk3399_set_rgmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_2_5M); + else if (speed == 100) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_25M); + else if (speed == 1000) + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_CLK_125M); + else + dev_err(dev, "unknown speed value for RGMII! speed=%d", speed); +} + +static void rk3399_set_rmii_speed(struct rk_priv_data *bsp_priv, int speed) +{ + struct device *dev = &bsp_priv->pdev->dev; + + if (IS_ERR(bsp_priv->grf)) { + dev_err(dev, "%s: Missing rockchip,grf property\n", __func__); + return; + } + + if (speed == 10) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_2_5M | + RK3399_GMAC_SPEED_10M); + } else if (speed == 100) { + regmap_write(bsp_priv->grf, RK3399_GRF_SOC_CON5, + RK3399_GMAC_RMII_CLK_25M | + RK3399_GMAC_SPEED_100M); + } else { + dev_err(dev, "unknown speed value for RMII! speed=%d", speed); + } +} + +static const struct rk_gmac_ops rk3399_ops = { + .set_to_rgmii = rk3399_set_to_rgmii, + .set_to_rmii = rk3399_set_to_rmii, + .set_rgmii_speed = rk3399_set_rgmii_speed, + .set_rmii_speed = rk3399_set_rmii_speed, +}; + static int gmac_clk_init(struct rk_priv_data *bsp_priv) { struct device *dev = &bsp_priv->pdev->dev; @@ -629,6 +854,16 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, "rockchip,grf"); bsp_priv->pdev = pdev; + gmac_clk_init(bsp_priv); + + return bsp_priv; +} + +static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) +{ + int ret; + struct device *dev = &bsp_priv->pdev->dev; + /*rmii or rgmii*/ if (bsp_priv->phy_iface == PHY_INTERFACE_MODE_RGMII) { dev_info(dev, "init for RGMII\n"); @@ -641,15 +876,6 @@ static struct rk_priv_data *rk_gmac_setup(struct platform_device *pdev, dev_err(dev, "NO interface defined!\n"); } - gmac_clk_init(bsp_priv); - - return bsp_priv; -} - -static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) -{ - int ret; - ret = phy_power_on(bsp_priv, true); if (ret) return ret; @@ -658,11 +884,19 @@ static int rk_gmac_powerup(struct rk_priv_data *bsp_priv) if (ret) return ret; + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + return 0; } static void rk_gmac_powerdown(struct rk_priv_data *gmac) { + struct device *dev = &gmac->pdev->dev; + + pm_runtime_put_sync(dev); + pm_runtime_disable(dev); + phy_power_on(gmac, false); gmac_clk_enable(gmac, false); } @@ -760,7 +994,9 @@ static int rk_gmac_probe(struct platform_device *pdev) static const struct of_device_id rk_gmac_dwmac_match[] = { { .compatible = "rockchip,rk3228-gmac", .data = &rk3228_ops }, { .compatible = "rockchip,rk3288-gmac", .data = &rk3288_ops }, + { .compatible = "rockchip,rk3366-gmac", .data = &rk3366_ops }, { .compatible = "rockchip,rk3368-gmac", .data = &rk3368_ops }, + { .compatible = "rockchip,rk3399-gmac", .data = &rk3399_ops }, { } }; MODULE_DEVICE_TABLE(of, rk_gmac_dwmac_match); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c new file mode 100644 index 000000000000..e5a926b8bee7 --- /dev/null +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c @@ -0,0 +1,194 @@ +/* + * dwmac-stm32.c - DWMAC Specific Glue layer for STM32 MCU + * + * Copyright (C) Alexandre Torgue 2015 + * Author: Alexandre Torgue + * License terms: GNU General Public License (GPL), version 2 + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "stmmac_platform.h" + +#define MII_PHY_SEL_MASK BIT(23) + +struct stm32_dwmac { + struct clk *clk_tx; + struct clk *clk_rx; + u32 mode_reg; /* MAC glue-logic mode register */ + struct regmap *regmap; + u32 speed; +}; + +static int stm32_dwmac_init(struct plat_stmmacenet_data *plat_dat) +{ + struct stm32_dwmac *dwmac = plat_dat->bsp_priv; + u32 reg = dwmac->mode_reg; + u32 val; + int ret; + + val = (plat_dat->interface == PHY_INTERFACE_MODE_MII) ? 0 : 1; + ret = regmap_update_bits(dwmac->regmap, reg, MII_PHY_SEL_MASK, val); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk_tx); + if (ret) + return ret; + + ret = clk_prepare_enable(dwmac->clk_rx); + if (ret) + clk_disable_unprepare(dwmac->clk_tx); + + return ret; +} + +static void stm32_dwmac_clk_disable(struct stm32_dwmac *dwmac) +{ + clk_disable_unprepare(dwmac->clk_tx); + clk_disable_unprepare(dwmac->clk_rx); +} + +static int stm32_dwmac_parse_data(struct stm32_dwmac *dwmac, + struct device *dev) +{ + struct device_node *np = dev->of_node; + int err; + + /* Get TX/RX clocks */ + dwmac->clk_tx = devm_clk_get(dev, "mac-clk-tx"); + if (IS_ERR(dwmac->clk_tx)) { + dev_err(dev, "No tx clock provided...\n"); + return PTR_ERR(dwmac->clk_tx); + } + dwmac->clk_rx = devm_clk_get(dev, "mac-clk-rx"); + if (IS_ERR(dwmac->clk_rx)) { + dev_err(dev, "No rx clock provided...\n"); + return PTR_ERR(dwmac->clk_rx); + } + + /* Get mode register */ + dwmac->regmap = syscon_regmap_lookup_by_phandle(np, "st,syscon"); + if (IS_ERR(dwmac->regmap)) + return PTR_ERR(dwmac->regmap); + + err = of_property_read_u32_index(np, "st,syscon", 1, &dwmac->mode_reg); + if (err) + dev_err(dev, "Can't get sysconfig mode offset (%d)\n", err); + + return err; +} + +static int stm32_dwmac_probe(struct platform_device *pdev) +{ + struct plat_stmmacenet_data *plat_dat; + struct stmmac_resources stmmac_res; + struct stm32_dwmac *dwmac; + int ret; + + ret = stmmac_get_platform_resources(pdev, &stmmac_res); + if (ret) + return ret; + + plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac); + if (IS_ERR(plat_dat)) + return PTR_ERR(plat_dat); + + dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL); + if (!dwmac) + return -ENOMEM; + + ret = stm32_dwmac_parse_data(dwmac, &pdev->dev); + if (ret) { + dev_err(&pdev->dev, "Unable to parse OF data\n"); + return ret; + } + + plat_dat->bsp_priv = dwmac; + + ret = stm32_dwmac_init(plat_dat); + if (ret) + return ret; + + ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res); + if (ret) + stm32_dwmac_clk_disable(dwmac); + + return ret; +} + +static int stm32_dwmac_remove(struct platform_device *pdev) +{ + struct net_device *ndev = platform_get_drvdata(pdev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret = stmmac_dvr_remove(&pdev->dev); + + stm32_dwmac_clk_disable(priv->plat->bsp_priv); + + return ret; +} + +#ifdef CONFIG_PM_SLEEP +static int stm32_dwmac_suspend(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + ret = stmmac_suspend(dev); + stm32_dwmac_clk_disable(priv->plat->bsp_priv); + + return ret; +} + +static int stm32_dwmac_resume(struct device *dev) +{ + struct net_device *ndev = dev_get_drvdata(dev); + struct stmmac_priv *priv = netdev_priv(ndev); + int ret; + + ret = stm32_dwmac_init(priv->plat); + if (ret) + return ret; + + ret = stmmac_resume(dev); + + return ret; +} +#endif /* CONFIG_PM_SLEEP */ + +static SIMPLE_DEV_PM_OPS(stm32_dwmac_pm_ops, + stm32_dwmac_suspend, stm32_dwmac_resume); + +static const struct of_device_id stm32_dwmac_match[] = { + { .compatible = "st,stm32-dwmac"}, + { } +}; +MODULE_DEVICE_TABLE(of, stm32_dwmac_match); + +static struct platform_driver stm32_dwmac_driver = { + .probe = stm32_dwmac_probe, + .remove = stm32_dwmac_remove, + .driver = { + .name = "stm32-dwmac", + .pm = &stm32_dwmac_pm_ops, + .of_match_table = stm32_dwmac_match, + }, +}; +module_platform_driver(stm32_dwmac_driver); + +MODULE_AUTHOR("Alexandre Torgue "); +MODULE_DESCRIPTION("STMicroelectronics MCU DWMAC Specific Glue layer"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c index 885a5e64519d..7df4ff158f3d 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac1000_core.c @@ -145,7 +145,7 @@ static void dwmac1000_set_mchash(void __iomem *ioaddr, u32 *mcfilterbits, numhashregs = 8; break; default: - pr_debug("STMMAC: err in setting mulitcast filter\n"); + pr_debug("STMMAC: err in setting multicast filter\n"); return; break; } diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c index 756bb548e81a..0a0d6a86f397 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c @@ -265,6 +265,7 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac) * once needed on other platforms. */ if (of_device_is_compatible(np, "st,spear600-gmac") || + of_device_is_compatible(np, "snps,dwmac-3.50a") || of_device_is_compatible(np, "snps,dwmac-3.70a") || of_device_is_compatible(np, "snps,dwmac")) { /* Note that the max-frame-size parameter as defined in the diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c index 170a18b61281..6e3b82972ce8 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_ptp.c @@ -187,7 +187,7 @@ int stmmac_ptp_register(struct stmmac_priv *priv) if (IS_ERR(priv->ptp_clock)) { priv->ptp_clock = NULL; pr_err("ptp_clock_register() failed on %s\n", priv->dev->name); - } else + } else if (priv->ptp_clock) pr_debug("Added PTP HW clock successfully on %s\n", priv->dev->name); diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index 4490ebaed127..0d0053128542 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2743,7 +2743,7 @@ static void dwceqos_set_msglevel(struct net_device *ndev, u32 msglevel) lp->msg_enable = msglevel; } -static struct ethtool_ops dwceqos_ethtool_ops = { +static const struct ethtool_ops dwceqos_ethtool_ops = { .get_drvinfo = dwceqos_get_drvinfo, .get_link = ethtool_op_get_link, .get_pauseparam = dwceqos_get_pauseparam, @@ -2761,7 +2761,7 @@ static struct ethtool_ops dwceqos_ethtool_ops = { .set_link_ksettings = phy_ethtool_set_link_ksettings, }; -static struct net_device_ops netdev_ops = { +static const struct net_device_ops netdev_ops = { .ndo_open = dwceqos_open, .ndo_stop = dwceqos_stop, .ndo_start_xmit = dwceqos_start_xmit, diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index d300d536d06f..fa0cfda24fd9 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -546,7 +546,8 @@ fatal_error: static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) { - int queue, len; + int queue; + unsigned int len; struct cpmac_desc *desc; struct cpmac_priv *priv = netdev_priv(dev); @@ -556,7 +557,7 @@ static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(skb_padto(skb, ETH_ZLEN))) return NETDEV_TX_OK; - len = max(skb->len, ETH_ZLEN); + len = max_t(unsigned int, skb->len, ETH_ZLEN); queue = skb_get_queue_mapping(skb); netif_stop_subqueue(dev, queue); diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index f85d605e4560..c6cff3d2ff05 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -124,7 +124,7 @@ do { \ #define RX_PRIORITY_MAPPING 0x76543210 #define TX_PRIORITY_MAPPING 0x33221100 -#define CPDMA_TX_PRIORITY_MAP 0x76543210 +#define CPDMA_TX_PRIORITY_MAP 0x01234567 #define CPSW_VLAN_AWARE BIT(1) #define CPSW_ALE_VLAN_AWARE 1 @@ -140,9 +140,11 @@ do { \ #define CPSW_CMINTMAX_INTVL (1000 / CPSW_CMINTMIN_CNT) #define CPSW_CMINTMIN_INTVL ((1000 / CPSW_CMINTMAX_CNT) + 1) -#define cpsw_slave_index(priv) \ - ((priv->data.dual_emac) ? priv->emac_port : \ - priv->data.active_slave) +#define cpsw_slave_index(cpsw, priv) \ + ((cpsw->data.dual_emac) ? priv->emac_port : \ + cpsw->data.active_slave) +#define IRQ_NUM 2 +#define CPSW_MAX_QUEUES 8 static int debug_level; module_param(debug_level, int, 0); @@ -363,38 +365,41 @@ static inline void slave_write(struct cpsw_slave *slave, u32 val, u32 offset) __raw_writel(val, slave->regs + offset); } -struct cpsw_priv { - struct platform_device *pdev; - struct net_device *ndev; - struct napi_struct napi_rx; - struct napi_struct napi_tx; +struct cpsw_common { struct device *dev; struct cpsw_platform_data data; + struct napi_struct napi_rx; + struct napi_struct napi_tx; struct cpsw_ss_regs __iomem *regs; struct cpsw_wr_regs __iomem *wr_regs; u8 __iomem *hw_stats; struct cpsw_host_regs __iomem *host_port_regs; - u32 msg_enable; u32 version; u32 coal_intvl; u32 bus_freq_mhz; int rx_packet_max; - struct clk *clk; - u8 mac_addr[ETH_ALEN]; struct cpsw_slave *slaves; struct cpdma_ctlr *dma; - struct cpdma_chan *txch, *rxch; + struct cpdma_chan *txch[CPSW_MAX_QUEUES]; + struct cpdma_chan *rxch[CPSW_MAX_QUEUES]; struct cpsw_ale *ale; - bool rx_pause; - bool tx_pause; bool quirk_irq; bool rx_irq_disabled; bool tx_irq_disabled; - /* snapshot of IRQ numbers */ - u32 irqs_table[4]; - u32 num_irqs; - struct cpts *cpts; + u32 irqs_table[IRQ_NUM]; + struct cpts *cpts; + int rx_ch_num, tx_ch_num; +}; + +struct cpsw_priv { + struct net_device *ndev; + struct device *dev; + u32 msg_enable; + u8 mac_addr[ETH_ALEN]; + bool rx_pause; + bool tx_pause; u32 emac_port; + struct cpsw_common *cpsw; }; struct cpsw_stats { @@ -455,108 +460,92 @@ static const struct cpsw_stats cpsw_gstrings_stats[] = { { "Rx Start of Frame Overruns", CPSW_STAT(rxsofoverruns) }, { "Rx Middle of Frame Overruns", CPSW_STAT(rxmofoverruns) }, { "Rx DMA Overruns", CPSW_STAT(rxdmaoverruns) }, - { "Rx DMA chan: head_enqueue", CPDMA_RX_STAT(head_enqueue) }, - { "Rx DMA chan: tail_enqueue", CPDMA_RX_STAT(tail_enqueue) }, - { "Rx DMA chan: pad_enqueue", CPDMA_RX_STAT(pad_enqueue) }, - { "Rx DMA chan: misqueued", CPDMA_RX_STAT(misqueued) }, - { "Rx DMA chan: desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) }, - { "Rx DMA chan: pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) }, - { "Rx DMA chan: runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) }, - { "Rx DMA chan: runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) }, - { "Rx DMA chan: empty_dequeue", CPDMA_RX_STAT(empty_dequeue) }, - { "Rx DMA chan: busy_dequeue", CPDMA_RX_STAT(busy_dequeue) }, - { "Rx DMA chan: good_dequeue", CPDMA_RX_STAT(good_dequeue) }, - { "Rx DMA chan: requeue", CPDMA_RX_STAT(requeue) }, - { "Rx DMA chan: teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) }, - { "Tx DMA chan: head_enqueue", CPDMA_TX_STAT(head_enqueue) }, - { "Tx DMA chan: tail_enqueue", CPDMA_TX_STAT(tail_enqueue) }, - { "Tx DMA chan: pad_enqueue", CPDMA_TX_STAT(pad_enqueue) }, - { "Tx DMA chan: misqueued", CPDMA_TX_STAT(misqueued) }, - { "Tx DMA chan: desc_alloc_fail", CPDMA_TX_STAT(desc_alloc_fail) }, - { "Tx DMA chan: pad_alloc_fail", CPDMA_TX_STAT(pad_alloc_fail) }, - { "Tx DMA chan: runt_receive_buf", CPDMA_TX_STAT(runt_receive_buff) }, - { "Tx DMA chan: runt_transmit_buf", CPDMA_TX_STAT(runt_transmit_buff) }, - { "Tx DMA chan: empty_dequeue", CPDMA_TX_STAT(empty_dequeue) }, - { "Tx DMA chan: busy_dequeue", CPDMA_TX_STAT(busy_dequeue) }, - { "Tx DMA chan: good_dequeue", CPDMA_TX_STAT(good_dequeue) }, - { "Tx DMA chan: requeue", CPDMA_TX_STAT(requeue) }, - { "Tx DMA chan: teardown_dequeue", CPDMA_TX_STAT(teardown_dequeue) }, }; -#define CPSW_STATS_LEN ARRAY_SIZE(cpsw_gstrings_stats) +static const struct cpsw_stats cpsw_gstrings_ch_stats[] = { + { "head_enqueue", CPDMA_RX_STAT(head_enqueue) }, + { "tail_enqueue", CPDMA_RX_STAT(tail_enqueue) }, + { "pad_enqueue", CPDMA_RX_STAT(pad_enqueue) }, + { "misqueued", CPDMA_RX_STAT(misqueued) }, + { "desc_alloc_fail", CPDMA_RX_STAT(desc_alloc_fail) }, + { "pad_alloc_fail", CPDMA_RX_STAT(pad_alloc_fail) }, + { "runt_receive_buf", CPDMA_RX_STAT(runt_receive_buff) }, + { "runt_transmit_buf", CPDMA_RX_STAT(runt_transmit_buff) }, + { "empty_dequeue", CPDMA_RX_STAT(empty_dequeue) }, + { "busy_dequeue", CPDMA_RX_STAT(busy_dequeue) }, + { "good_dequeue", CPDMA_RX_STAT(good_dequeue) }, + { "requeue", CPDMA_RX_STAT(requeue) }, + { "teardown_dequeue", CPDMA_RX_STAT(teardown_dequeue) }, +}; -#define napi_to_priv(napi) container_of(napi, struct cpsw_priv, napi) +#define CPSW_STATS_COMMON_LEN ARRAY_SIZE(cpsw_gstrings_stats) +#define CPSW_STATS_CH_LEN ARRAY_SIZE(cpsw_gstrings_ch_stats) + +#define ndev_to_cpsw(ndev) (((struct cpsw_priv *)netdev_priv(ndev))->cpsw) +#define napi_to_cpsw(napi) container_of(napi, struct cpsw_common, napi) #define for_each_slave(priv, func, arg...) \ do { \ struct cpsw_slave *slave; \ + struct cpsw_common *cpsw = (priv)->cpsw; \ int n; \ - if (priv->data.dual_emac) \ - (func)((priv)->slaves + priv->emac_port, ##arg);\ + if (cpsw->data.dual_emac) \ + (func)((cpsw)->slaves + priv->emac_port, ##arg);\ else \ - for (n = (priv)->data.slaves, \ - slave = (priv)->slaves; \ + for (n = cpsw->data.slaves, \ + slave = cpsw->slaves; \ n; n--) \ (func)(slave++, ##arg); \ } while (0) -#define cpsw_get_slave_ndev(priv, __slave_no__) \ - ((__slave_no__ < priv->data.slaves) ? \ - priv->slaves[__slave_no__].ndev : NULL) -#define cpsw_get_slave_priv(priv, __slave_no__) \ - (((__slave_no__ < priv->data.slaves) && \ - (priv->slaves[__slave_no__].ndev)) ? \ - netdev_priv(priv->slaves[__slave_no__].ndev) : NULL) \ -#define cpsw_dual_emac_src_port_detect(status, priv, ndev, skb) \ +#define cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb) \ do { \ - if (!priv->data.dual_emac) \ + if (!cpsw->data.dual_emac) \ break; \ if (CPDMA_RX_SOURCE_PORT(status) == 1) { \ - ndev = cpsw_get_slave_ndev(priv, 0); \ - priv = netdev_priv(ndev); \ + ndev = cpsw->slaves[0].ndev; \ skb->dev = ndev; \ } else if (CPDMA_RX_SOURCE_PORT(status) == 2) { \ - ndev = cpsw_get_slave_ndev(priv, 1); \ - priv = netdev_priv(ndev); \ + ndev = cpsw->slaves[1].ndev; \ skb->dev = ndev; \ } \ } while (0) -#define cpsw_add_mcast(priv, addr) \ +#define cpsw_add_mcast(cpsw, priv, addr) \ do { \ - if (priv->data.dual_emac) { \ - struct cpsw_slave *slave = priv->slaves + \ + if (cpsw->data.dual_emac) { \ + struct cpsw_slave *slave = cpsw->slaves + \ priv->emac_port; \ - int slave_port = cpsw_get_slave_port(priv, \ + int slave_port = cpsw_get_slave_port( \ slave->slave_num); \ - cpsw_ale_add_mcast(priv->ale, addr, \ + cpsw_ale_add_mcast(cpsw->ale, addr, \ 1 << slave_port | ALE_PORT_HOST, \ ALE_VLAN, slave->port_vlan, 0); \ } else { \ - cpsw_ale_add_mcast(priv->ale, addr, \ + cpsw_ale_add_mcast(cpsw->ale, addr, \ ALE_ALL_PORTS, \ 0, 0, 0); \ } \ } while (0) -static inline int cpsw_get_slave_port(struct cpsw_priv *priv, u32 slave_num) +static inline int cpsw_get_slave_port(u32 slave_num) { return slave_num + 1; } static void cpsw_set_promiscious(struct net_device *ndev, bool enable) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpsw_ale *ale = priv->ale; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpsw_ale *ale = cpsw->ale; int i; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { bool flag = false; /* Enabling promiscuous mode for one interface will be * common for both the interface as the interface shares * the same hardware resource. */ - for (i = 0; i < priv->data.slaves; i++) - if (priv->slaves[i].ndev->flags & IFF_PROMISC) + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].ndev->flags & IFF_PROMISC) flag = true; if (!enable && flag) { @@ -579,7 +568,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) unsigned long timeout = jiffies + HZ; /* Disable Learn for all ports (host is port 0 and slaves are port 1 and up */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i <= cpsw->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 1); cpsw_ale_control_set(ale, i, @@ -606,7 +595,7 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) cpsw_ale_control_set(ale, 0, ALE_P0_UNI_FLOOD, 0); /* Enable Learn for all ports (host is port 0 and slaves are port 1 and up */ - for (i = 0; i <= priv->data.slaves; i++) { + for (i = 0; i <= cpsw->data.slaves; i++) { cpsw_ale_control_set(ale, i, ALE_PORT_NOLEARN, 0); cpsw_ale_control_set(ale, i, @@ -620,17 +609,18 @@ static void cpsw_set_promiscious(struct net_device *ndev, bool enable) static void cpsw_ndo_set_rx_mode(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int vid; - if (priv->data.dual_emac) - vid = priv->slaves[priv->emac_port].port_vlan; + if (cpsw->data.dual_emac) + vid = cpsw->slaves[priv->emac_port].port_vlan; else - vid = priv->data.default_vlan; + vid = cpsw->data.default_vlan; if (ndev->flags & IFF_PROMISC) { /* Enable promiscuous mode */ cpsw_set_promiscious(ndev, true); - cpsw_ale_set_allmulti(priv->ale, IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, IFF_ALLMULTI); return; } else { /* Disable promiscuous mode */ @@ -638,51 +628,54 @@ static void cpsw_ndo_set_rx_mode(struct net_device *ndev) } /* Restore allmulti on vlans if necessary */ - cpsw_ale_set_allmulti(priv->ale, priv->ndev->flags & IFF_ALLMULTI); + cpsw_ale_set_allmulti(cpsw->ale, priv->ndev->flags & IFF_ALLMULTI); /* Clear all mcast from ALE */ - cpsw_ale_flush_multicast(priv->ale, ALE_ALL_PORTS, vid); + cpsw_ale_flush_multicast(cpsw->ale, ALE_ALL_PORTS, vid); if (!netdev_mc_empty(ndev)) { struct netdev_hw_addr *ha; /* program multicast address list into ALE register */ netdev_for_each_mc_addr(ha, ndev) { - cpsw_add_mcast(priv, (u8 *)ha->addr); + cpsw_add_mcast(cpsw, priv, (u8 *)ha->addr); } } } -static void cpsw_intr_enable(struct cpsw_priv *priv) +static void cpsw_intr_enable(struct cpsw_common *cpsw) { - __raw_writel(0xFF, &priv->wr_regs->tx_en); - __raw_writel(0xFF, &priv->wr_regs->rx_en); + __raw_writel(0xFF, &cpsw->wr_regs->tx_en); + __raw_writel(0xFF, &cpsw->wr_regs->rx_en); - cpdma_ctlr_int_ctrl(priv->dma, true); + cpdma_ctlr_int_ctrl(cpsw->dma, true); return; } -static void cpsw_intr_disable(struct cpsw_priv *priv) +static void cpsw_intr_disable(struct cpsw_common *cpsw) { - __raw_writel(0, &priv->wr_regs->tx_en); - __raw_writel(0, &priv->wr_regs->rx_en); + __raw_writel(0, &cpsw->wr_regs->tx_en); + __raw_writel(0, &cpsw->wr_regs->rx_en); - cpdma_ctlr_int_ctrl(priv->dma, false); + cpdma_ctlr_int_ctrl(cpsw->dma, false); return; } static void cpsw_tx_handler(void *token, int len, int status) { + struct netdev_queue *txq; struct sk_buff *skb = token; struct net_device *ndev = skb->dev; - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* Check whether the queue is stopped due to stalled tx dma, if the * queue is stopped then start the queue as we have free desc for tx */ - if (unlikely(netif_queue_stopped(ndev))) - netif_wake_queue(ndev); - cpts_tx_timestamp(priv->cpts, skb); + txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + if (unlikely(netif_tx_queue_stopped(txq))) + netif_tx_wake_queue(txq); + + cpts_tx_timestamp(cpsw->cpts, skb); ndev->stats.tx_packets++; ndev->stats.tx_bytes += len; dev_kfree_skb_any(skb); @@ -690,22 +683,23 @@ static void cpsw_tx_handler(void *token, int len, int status) static void cpsw_rx_handler(void *token, int len, int status) { + struct cpdma_chan *ch; struct sk_buff *skb = token; struct sk_buff *new_skb; struct net_device *ndev = skb->dev; - struct cpsw_priv *priv = netdev_priv(ndev); int ret = 0; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - cpsw_dual_emac_src_port_detect(status, priv, ndev, skb); + cpsw_dual_emac_src_port_detect(cpsw, status, ndev, skb); if (unlikely(status < 0) || unlikely(!netif_running(ndev))) { bool ndev_status = false; - struct cpsw_slave *slave = priv->slaves; + struct cpsw_slave *slave = cpsw->slaves; int n; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { /* In dual emac mode check for all interfaces */ - for (n = priv->data.slaves; n; n--, slave++) + for (n = cpsw->data.slaves; n; n--, slave++) if (netif_running(slave->ndev)) ndev_status = true; } @@ -726,10 +720,11 @@ static void cpsw_rx_handler(void *token, int len, int status) return; } - new_skb = netdev_alloc_skb_ip_align(ndev, priv->rx_packet_max); + new_skb = netdev_alloc_skb_ip_align(ndev, cpsw->rx_packet_max); if (new_skb) { + skb_copy_queue_mapping(new_skb, skb); skb_put(skb, len); - cpts_rx_timestamp(priv->cpts, skb); + cpts_rx_timestamp(cpsw->cpts, skb); skb->protocol = eth_type_trans(skb, ndev); netif_receive_skb(skb); ndev->stats.rx_bytes += len; @@ -741,82 +736,116 @@ static void cpsw_rx_handler(void *token, int len, int status) } requeue: - ret = cpdma_chan_submit(priv->rxch, new_skb, new_skb->data, - skb_tailroom(new_skb), 0); + if (netif_dormant(ndev)) { + dev_kfree_skb_any(new_skb); + return; + } + + ch = cpsw->rxch[skb_get_queue_mapping(new_skb)]; + ret = cpdma_chan_submit(ch, new_skb, new_skb->data, + skb_tailroom(new_skb), 0); if (WARN_ON(ret < 0)) dev_kfree_skb_any(new_skb); } static irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id) { - struct cpsw_priv *priv = dev_id; + struct cpsw_common *cpsw = dev_id; - writel(0, &priv->wr_regs->tx_en); - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_TX); + writel(0, &cpsw->wr_regs->tx_en); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_TX); - if (priv->quirk_irq) { - disable_irq_nosync(priv->irqs_table[1]); - priv->tx_irq_disabled = true; + if (cpsw->quirk_irq) { + disable_irq_nosync(cpsw->irqs_table[1]); + cpsw->tx_irq_disabled = true; } - napi_schedule(&priv->napi_tx); + napi_schedule(&cpsw->napi_tx); return IRQ_HANDLED; } static irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id) { - struct cpsw_priv *priv = dev_id; + struct cpsw_common *cpsw = dev_id; - cpdma_ctlr_eoi(priv->dma, CPDMA_EOI_RX); - writel(0, &priv->wr_regs->rx_en); + cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_RX); + writel(0, &cpsw->wr_regs->rx_en); - if (priv->quirk_irq) { - disable_irq_nosync(priv->irqs_table[0]); - priv->rx_irq_disabled = true; + if (cpsw->quirk_irq) { + disable_irq_nosync(cpsw->irqs_table[0]); + cpsw->rx_irq_disabled = true; } - napi_schedule(&priv->napi_rx); + napi_schedule(&cpsw->napi_rx); return IRQ_HANDLED; } static int cpsw_tx_poll(struct napi_struct *napi_tx, int budget) { - struct cpsw_priv *priv = napi_to_priv(napi_tx); - int num_tx; + u32 ch_map; + int num_tx, ch; + struct cpsw_common *cpsw = napi_to_cpsw(napi_tx); - num_tx = cpdma_chan_process(priv->txch, budget); - if (num_tx < budget) { - napi_complete(napi_tx); - writel(0xff, &priv->wr_regs->tx_en); - if (priv->quirk_irq && priv->tx_irq_disabled) { - priv->tx_irq_disabled = false; - enable_irq(priv->irqs_table[1]); + /* process every unprocessed channel */ + ch_map = cpdma_ctrl_txchs_state(cpsw->dma); + for (ch = 0, num_tx = 0; num_tx < budget; ch_map >>= 1, ch++) { + if (!ch_map) { + ch_map = cpdma_ctrl_txchs_state(cpsw->dma); + if (!ch_map) + break; + + ch = 0; } + + if (!(ch_map & 0x01)) + continue; + + num_tx += cpdma_chan_process(cpsw->txch[ch], budget - num_tx); } - if (num_tx) - cpsw_dbg(priv, intr, "poll %d tx pkts\n", num_tx); + if (num_tx < budget) { + napi_complete(napi_tx); + writel(0xff, &cpsw->wr_regs->tx_en); + if (cpsw->quirk_irq && cpsw->tx_irq_disabled) { + cpsw->tx_irq_disabled = false; + enable_irq(cpsw->irqs_table[1]); + } + } return num_tx; } static int cpsw_rx_poll(struct napi_struct *napi_rx, int budget) { - struct cpsw_priv *priv = napi_to_priv(napi_rx); - int num_rx; + u32 ch_map; + int num_rx, ch; + struct cpsw_common *cpsw = napi_to_cpsw(napi_rx); - num_rx = cpdma_chan_process(priv->rxch, budget); - if (num_rx < budget) { - napi_complete(napi_rx); - writel(0xff, &priv->wr_regs->rx_en); - if (priv->quirk_irq && priv->rx_irq_disabled) { - priv->rx_irq_disabled = false; - enable_irq(priv->irqs_table[0]); + /* process every unprocessed channel */ + ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); + for (ch = 0, num_rx = 0; num_rx < budget; ch_map >>= 1, ch++) { + if (!ch_map) { + ch_map = cpdma_ctrl_rxchs_state(cpsw->dma); + if (!ch_map) + break; + + ch = 0; } + + if (!(ch_map & 0x01)) + continue; + + num_rx += cpdma_chan_process(cpsw->rxch[ch], budget - num_rx); } - if (num_rx) - cpsw_dbg(priv, intr, "poll %d rx pkts\n", num_rx); + if (num_rx < budget) { + napi_complete(napi_rx); + writel(0xff, &cpsw->wr_regs->rx_en); + if (cpsw->quirk_irq && cpsw->rx_irq_disabled) { + cpsw->rx_irq_disabled = false; + enable_irq(cpsw->irqs_table[0]); + } + } return num_rx; } @@ -850,17 +879,18 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, struct phy_device *phy = slave->phy; u32 mac_control = 0; u32 slave_port; + struct cpsw_common *cpsw = priv->cpsw; if (!phy) return; - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); if (phy->link) { - mac_control = priv->data.mac_control; + mac_control = cpsw->data.mac_control; /* enable forwarding */ - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); if (phy->speed == 1000) @@ -884,7 +914,7 @@ static void _cpsw_adjust_link(struct cpsw_slave *slave, } else { mac_control = 0; /* disable forwarding */ - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); } @@ -906,19 +936,19 @@ static void cpsw_adjust_link(struct net_device *ndev) if (link) { netif_carrier_on(ndev); if (netif_running(ndev)) - netif_wake_queue(ndev); + netif_tx_wake_all_queues(ndev); } else { netif_carrier_off(ndev); - netif_stop_queue(ndev); + netif_tx_stop_all_queues(ndev); } } static int cpsw_get_coalesce(struct net_device *ndev, struct ethtool_coalesce *coal) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - coal->rx_coalesce_usecs = priv->coal_intvl; + coal->rx_coalesce_usecs = cpsw->coal_intvl; return 0; } @@ -931,11 +961,12 @@ static int cpsw_set_coalesce(struct net_device *ndev, u32 prescale = 0; u32 addnl_dvdr = 1; u32 coal_intvl = 0; + struct cpsw_common *cpsw = priv->cpsw; coal_intvl = coal->rx_coalesce_usecs; - int_ctrl = readl(&priv->wr_regs->int_control); - prescale = priv->bus_freq_mhz * 4; + int_ctrl = readl(&cpsw->wr_regs->int_control); + prescale = cpsw->bus_freq_mhz * 4; if (!coal->rx_coalesce_usecs) { int_ctrl &= ~(CPSW_INTPRESCALE_MASK | CPSW_INTPACEEN); @@ -963,53 +994,69 @@ static int cpsw_set_coalesce(struct net_device *ndev, } num_interrupts = (1000 * addnl_dvdr) / coal_intvl; - writel(num_interrupts, &priv->wr_regs->rx_imax); - writel(num_interrupts, &priv->wr_regs->tx_imax); + writel(num_interrupts, &cpsw->wr_regs->rx_imax); + writel(num_interrupts, &cpsw->wr_regs->tx_imax); int_ctrl |= CPSW_INTPACEEN; int_ctrl &= (~CPSW_INTPRESCALE_MASK); int_ctrl |= (prescale & CPSW_INTPRESCALE_MASK); update_return: - writel(int_ctrl, &priv->wr_regs->int_control); + writel(int_ctrl, &cpsw->wr_regs->int_control); cpsw_notice(priv, timer, "Set coalesce to %d usecs.\n", coal_intvl); - if (priv->data.dual_emac) { - int i; - - for (i = 0; i < priv->data.slaves; i++) { - priv = netdev_priv(priv->slaves[i].ndev); - priv->coal_intvl = coal_intvl; - } - } else { - priv->coal_intvl = coal_intvl; - } + cpsw->coal_intvl = coal_intvl; return 0; } static int cpsw_get_sset_count(struct net_device *ndev, int sset) { + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + switch (sset) { case ETH_SS_STATS: - return CPSW_STATS_LEN; + return (CPSW_STATS_COMMON_LEN + + (cpsw->rx_ch_num + cpsw->tx_ch_num) * + CPSW_STATS_CH_LEN); default: return -EOPNOTSUPP; } } +static void cpsw_add_ch_strings(u8 **p, int ch_num, int rx_dir) +{ + int ch_stats_len; + int line; + int i; + + ch_stats_len = CPSW_STATS_CH_LEN * ch_num; + for (i = 0; i < ch_stats_len; i++) { + line = i % CPSW_STATS_CH_LEN; + snprintf(*p, ETH_GSTRING_LEN, + "%s DMA chan %d: %s", rx_dir ? "Rx" : "Tx", + i / CPSW_STATS_CH_LEN, + cpsw_gstrings_ch_stats[line].stat_string); + *p += ETH_GSTRING_LEN; + } +} + static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) { + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); u8 *p = data; int i; switch (stringset) { case ETH_SS_STATS: - for (i = 0; i < CPSW_STATS_LEN; i++) { + for (i = 0; i < CPSW_STATS_COMMON_LEN; i++) { memcpy(p, cpsw_gstrings_stats[i].stat_string, ETH_GSTRING_LEN); p += ETH_GSTRING_LEN; } + + cpsw_add_ch_strings(&p, cpsw->rx_ch_num, 1); + cpsw_add_ch_strings(&p, cpsw->tx_ch_num, 0); break; } } @@ -1017,86 +1064,78 @@ static void cpsw_get_strings(struct net_device *ndev, u32 stringset, u8 *data) static void cpsw_get_ethtool_stats(struct net_device *ndev, struct ethtool_stats *stats, u64 *data) { - struct cpsw_priv *priv = netdev_priv(ndev); - struct cpdma_chan_stats rx_stats; - struct cpdma_chan_stats tx_stats; - u32 val; u8 *p; - int i; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct cpdma_chan_stats ch_stats; + int i, l, ch; /* Collect Davinci CPDMA stats for Rx and Tx Channel */ - cpdma_chan_get_stats(priv->rxch, &rx_stats); - cpdma_chan_get_stats(priv->txch, &tx_stats); + for (l = 0; l < CPSW_STATS_COMMON_LEN; l++) + data[l] = readl(cpsw->hw_stats + + cpsw_gstrings_stats[l].stat_offset); - for (i = 0; i < CPSW_STATS_LEN; i++) { - switch (cpsw_gstrings_stats[i].type) { - case CPSW_STATS: - val = readl(priv->hw_stats + - cpsw_gstrings_stats[i].stat_offset); - data[i] = val; - break; + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + cpdma_chan_get_stats(cpsw->rxch[ch], &ch_stats); + for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { + p = (u8 *)&ch_stats + + cpsw_gstrings_ch_stats[i].stat_offset; + data[l] = *(u32 *)p; + } + } - case CPDMA_RX_STATS: - p = (u8 *)&rx_stats + - cpsw_gstrings_stats[i].stat_offset; - data[i] = *(u32 *)p; - break; - - case CPDMA_TX_STATS: - p = (u8 *)&tx_stats + - cpsw_gstrings_stats[i].stat_offset; - data[i] = *(u32 *)p; - break; + for (ch = 0; ch < cpsw->tx_ch_num; ch++) { + cpdma_chan_get_stats(cpsw->txch[ch], &ch_stats); + for (i = 0; i < CPSW_STATS_CH_LEN; i++, l++) { + p = (u8 *)&ch_stats + + cpsw_gstrings_ch_stats[i].stat_offset; + data[l] = *(u32 *)p; } } } -static int cpsw_common_res_usage_state(struct cpsw_priv *priv) +static int cpsw_common_res_usage_state(struct cpsw_common *cpsw) { u32 i; u32 usage_count = 0; - if (!priv->data.dual_emac) + if (!cpsw->data.dual_emac) return 0; - for (i = 0; i < priv->data.slaves; i++) - if (priv->slaves[i].open_stat) + for (i = 0; i < cpsw->data.slaves; i++) + if (cpsw->slaves[i].open_stat) usage_count++; return usage_count; } -static inline int cpsw_tx_packet_submit(struct net_device *ndev, - struct cpsw_priv *priv, struct sk_buff *skb) +static inline int cpsw_tx_packet_submit(struct cpsw_priv *priv, + struct sk_buff *skb, + struct cpdma_chan *txch) { - if (!priv->data.dual_emac) - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 0); + struct cpsw_common *cpsw = priv->cpsw; - if (ndev == cpsw_get_slave_ndev(priv, 0)) - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 1); - else - return cpdma_chan_submit(priv->txch, skb, skb->data, - skb->len, 2); + return cpdma_chan_submit(txch, skb, skb->data, skb->len, + priv->emac_port + cpsw->data.dual_emac); } static inline void cpsw_add_dual_emac_def_ale_entries( struct cpsw_priv *priv, struct cpsw_slave *slave, u32 slave_port) { + struct cpsw_common *cpsw = priv->cpsw; u32 port_mask = 1 << slave_port | ALE_PORT_HOST; - if (priv->version == CPSW_VERSION_1) + if (cpsw->version == CPSW_VERSION_1) slave_write(slave, slave->port_vlan, CPSW1_PORT_VLAN); else slave_write(slave, slave->port_vlan, CPSW2_PORT_VLAN); - cpsw_ale_add_vlan(priv->ale, slave->port_vlan, port_mask, + cpsw_ale_add_vlan(cpsw->ale, slave->port_vlan, port_mask, port_mask, port_mask, 0); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, slave->port_vlan, 0); - cpsw_ale_add_ucast(priv->ale, priv->mac_addr, - HOST_PORT_NUM, ALE_VLAN | ALE_SECURE, slave->port_vlan); + cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, + HOST_PORT_NUM, ALE_VLAN | + ALE_SECURE, slave->port_vlan); } static void soft_reset_slave(struct cpsw_slave *slave) @@ -1110,13 +1149,14 @@ static void soft_reset_slave(struct cpsw_slave *slave) static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) { u32 slave_port; + struct cpsw_common *cpsw = priv->cpsw; soft_reset_slave(slave); /* setup priority mapping */ __raw_writel(RX_PRIORITY_MAPPING, &slave->sliver->rx_pri_map); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: slave_write(slave, TX_PRIORITY_MAPPING, CPSW1_TX_PRI_MAP); break; @@ -1128,17 +1168,17 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) } /* setup max packet size, and mac address */ - __raw_writel(priv->rx_packet_max, &slave->sliver->rx_maxlen); + __raw_writel(cpsw->rx_packet_max, &slave->sliver->rx_maxlen); cpsw_set_slave_mac(slave, priv); slave->mac_control = 0; /* no link yet */ - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); - if (priv->data.dual_emac) + if (cpsw->data.dual_emac) cpsw_add_dual_emac_def_ale_entries(priv, slave, slave_port); else - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, 1 << slave_port, 0, 0, ALE_MCAST_FWD_2); if (slave->data->phy_node) { @@ -1168,81 +1208,121 @@ static void cpsw_slave_open(struct cpsw_slave *slave, struct cpsw_priv *priv) phy_start(slave->phy); /* Configure GMII_SEL register */ - cpsw_phy_sel(&priv->pdev->dev, slave->phy->interface, slave->slave_num); + cpsw_phy_sel(cpsw->dev, slave->phy->interface, slave->slave_num); } static inline void cpsw_add_default_vlan(struct cpsw_priv *priv) { - const int vlan = priv->data.default_vlan; + struct cpsw_common *cpsw = priv->cpsw; + const int vlan = cpsw->data.default_vlan; u32 reg; int i; int unreg_mcast_mask; - reg = (priv->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : + reg = (cpsw->version == CPSW_VERSION_1) ? CPSW1_PORT_VLAN : CPSW2_PORT_VLAN; - writel(vlan, &priv->host_port_regs->port_vlan); + writel(vlan, &cpsw->host_port_regs->port_vlan); - for (i = 0; i < priv->data.slaves; i++) - slave_write(priv->slaves + i, vlan, reg); + for (i = 0; i < cpsw->data.slaves; i++) + slave_write(cpsw->slaves + i, vlan, reg); if (priv->ndev->flags & IFF_ALLMULTI) unreg_mcast_mask = ALE_ALL_PORTS; else unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; - cpsw_ale_add_vlan(priv->ale, vlan, ALE_ALL_PORTS, + cpsw_ale_add_vlan(cpsw->ale, vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, ALE_ALL_PORTS, unreg_mcast_mask); } static void cpsw_init_host_port(struct cpsw_priv *priv) { - u32 control_reg; u32 fifo_mode; + u32 control_reg; + struct cpsw_common *cpsw = priv->cpsw; /* soft reset the controller and initialize ale */ - soft_reset("cpsw", &priv->regs->soft_reset); - cpsw_ale_start(priv->ale); + soft_reset("cpsw", &cpsw->regs->soft_reset); + cpsw_ale_start(cpsw->ale); /* switch to vlan unaware mode */ - cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_VLAN_AWARE, CPSW_ALE_VLAN_AWARE); - control_reg = readl(&priv->regs->control); + control_reg = readl(&cpsw->regs->control); control_reg |= CPSW_VLAN_AWARE; - writel(control_reg, &priv->regs->control); - fifo_mode = (priv->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : + writel(control_reg, &cpsw->regs->control); + fifo_mode = (cpsw->data.dual_emac) ? CPSW_FIFO_DUAL_MAC_MODE : CPSW_FIFO_NORMAL_MODE; - writel(fifo_mode, &priv->host_port_regs->tx_in_ctl); + writel(fifo_mode, &cpsw->host_port_regs->tx_in_ctl); /* setup host port priority mapping */ __raw_writel(CPDMA_TX_PRIORITY_MAP, - &priv->host_port_regs->cpdma_tx_pri_map); - __raw_writel(0, &priv->host_port_regs->cpdma_rx_chan_map); + &cpsw->host_port_regs->cpdma_tx_pri_map); + __raw_writel(0, &cpsw->host_port_regs->cpdma_rx_chan_map); - cpsw_ale_control_set(priv->ale, HOST_PORT_NUM, + cpsw_ale_control_set(cpsw->ale, HOST_PORT_NUM, ALE_PORT_STATE, ALE_PORT_STATE_FORWARD); - if (!priv->data.dual_emac) { - cpsw_ale_add_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM, + if (!cpsw->data.dual_emac) { + cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, 0, 0); - cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, ALE_PORT_HOST, 0, 0, ALE_MCAST_FWD_2); } } -static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) +static int cpsw_fill_rx_channels(struct cpsw_priv *priv) +{ + struct cpsw_common *cpsw = priv->cpsw; + struct sk_buff *skb; + int ch_buf_num; + int ch, i, ret; + + for (ch = 0; ch < cpsw->rx_ch_num; ch++) { + ch_buf_num = cpdma_chan_get_rx_buf_num(cpsw->rxch[ch]); + for (i = 0; i < ch_buf_num; i++) { + skb = __netdev_alloc_skb_ip_align(priv->ndev, + cpsw->rx_packet_max, + GFP_KERNEL); + if (!skb) { + cpsw_err(priv, ifup, "cannot allocate skb\n"); + return -ENOMEM; + } + + skb_set_queue_mapping(skb, ch); + ret = cpdma_chan_submit(cpsw->rxch[ch], skb, skb->data, + skb_tailroom(skb), 0); + if (ret < 0) { + cpsw_err(priv, ifup, + "cannot submit skb to channel %d rx, error %d\n", + ch, ret); + kfree_skb(skb); + return ret; + } + kmemleak_not_leak(skb); + } + + cpsw_info(priv, ifup, "ch %d rx, submitted %d descriptors\n", + ch, ch_buf_num); + } + + return 0; +} + +static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_common *cpsw) { u32 slave_port; - slave_port = cpsw_get_slave_port(priv, slave->slave_num); + slave_port = cpsw_get_slave_port(slave->slave_num); if (!slave->phy) return; phy_stop(slave->phy); phy_disconnect(slave->phy); slave->phy = NULL; - cpsw_ale_control_set(priv->ale, slave_port, + cpsw_ale_control_set(cpsw->ale, slave_port, ALE_PORT_STATE, ALE_PORT_STATE_DISABLE); soft_reset_slave(slave); } @@ -1250,115 +1330,111 @@ static void cpsw_slave_stop(struct cpsw_slave *slave, struct cpsw_priv *priv) static int cpsw_ndo_open(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); - int i, ret; + struct cpsw_common *cpsw = priv->cpsw; + int ret; u32 reg; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } - if (!cpsw_common_res_usage_state(priv)) - cpsw_intr_disable(priv); + if (!cpsw_common_res_usage_state(cpsw)) + cpsw_intr_disable(cpsw); netif_carrier_off(ndev); - reg = priv->version; + /* Notify the stack of the actual queue counts. */ + ret = netif_set_real_num_tx_queues(ndev, cpsw->tx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of tx queues\n"); + goto err_cleanup; + } + + ret = netif_set_real_num_rx_queues(ndev, cpsw->rx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of rx queues\n"); + goto err_cleanup; + } + + reg = cpsw->version; dev_info(priv->dev, "initializing cpsw version %d.%d (%d)\n", CPSW_MAJOR_VERSION(reg), CPSW_MINOR_VERSION(reg), CPSW_RTL_VERSION(reg)); /* initialize host and slave ports */ - if (!cpsw_common_res_usage_state(priv)) + if (!cpsw_common_res_usage_state(cpsw)) cpsw_init_host_port(priv); for_each_slave(priv, cpsw_slave_open, priv); /* Add default VLAN */ - if (!priv->data.dual_emac) + if (!cpsw->data.dual_emac) cpsw_add_default_vlan(priv); else - cpsw_ale_add_vlan(priv->ale, priv->data.default_vlan, + cpsw_ale_add_vlan(cpsw->ale, cpsw->data.default_vlan, ALE_ALL_PORTS, ALE_ALL_PORTS, 0, 0); - if (!cpsw_common_res_usage_state(priv)) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0); - int buf_num; - + if (!cpsw_common_res_usage_state(cpsw)) { /* setup tx dma to fixed prio and zero offset */ - cpdma_control_set(priv->dma, CPDMA_TX_PRIO_FIXED, 1); - cpdma_control_set(priv->dma, CPDMA_RX_BUFFER_OFFSET, 0); + cpdma_control_set(cpsw->dma, CPDMA_TX_PRIO_FIXED, 1); + cpdma_control_set(cpsw->dma, CPDMA_RX_BUFFER_OFFSET, 0); /* disable priority elevation */ - __raw_writel(0, &priv->regs->ptype); + __raw_writel(0, &cpsw->regs->ptype); /* enable statistics collection only on all ports */ - __raw_writel(0x7, &priv->regs->stat_port_en); + __raw_writel(0x7, &cpsw->regs->stat_port_en); /* Enable internal fifo flow control */ - writel(0x7, &priv->regs->flow_control); + writel(0x7, &cpsw->regs->flow_control); - napi_enable(&priv_sl0->napi_rx); - napi_enable(&priv_sl0->napi_tx); + napi_enable(&cpsw->napi_rx); + napi_enable(&cpsw->napi_tx); - if (priv_sl0->tx_irq_disabled) { - priv_sl0->tx_irq_disabled = false; - enable_irq(priv->irqs_table[1]); + if (cpsw->tx_irq_disabled) { + cpsw->tx_irq_disabled = false; + enable_irq(cpsw->irqs_table[1]); } - if (priv_sl0->rx_irq_disabled) { - priv_sl0->rx_irq_disabled = false; - enable_irq(priv->irqs_table[0]); + if (cpsw->rx_irq_disabled) { + cpsw->rx_irq_disabled = false; + enable_irq(cpsw->irqs_table[0]); } - buf_num = cpdma_chan_get_rx_buf_num(priv->dma); - for (i = 0; i < buf_num; i++) { - struct sk_buff *skb; + ret = cpsw_fill_rx_channels(priv); + if (ret < 0) + goto err_cleanup; - ret = -ENOMEM; - skb = __netdev_alloc_skb_ip_align(priv->ndev, - priv->rx_packet_max, GFP_KERNEL); - if (!skb) - goto err_cleanup; - ret = cpdma_chan_submit(priv->rxch, skb, skb->data, - skb_tailroom(skb), 0); - if (ret < 0) { - kfree_skb(skb); - goto err_cleanup; - } - kmemleak_not_leak(skb); - } - /* continue even if we didn't manage to submit all - * receive descs - */ - cpsw_info(priv, ifup, "submitted %d rx descriptors\n", i); - - if (cpts_register(&priv->pdev->dev, priv->cpts, - priv->data.cpts_clock_mult, - priv->data.cpts_clock_shift)) + if (cpts_register(cpsw->dev, cpsw->cpts, + cpsw->data.cpts_clock_mult, + cpsw->data.cpts_clock_shift)) dev_err(priv->dev, "error registering cpts device\n"); } /* Enable Interrupt pacing if configured */ - if (priv->coal_intvl != 0) { + if (cpsw->coal_intvl != 0) { struct ethtool_coalesce coal; - coal.rx_coalesce_usecs = priv->coal_intvl; + coal.rx_coalesce_usecs = cpsw->coal_intvl; cpsw_set_coalesce(ndev, &coal); } - cpdma_ctlr_start(priv->dma); - cpsw_intr_enable(priv); + cpdma_ctlr_start(cpsw->dma); + cpsw_intr_enable(cpsw); + + if (cpsw->data.dual_emac) + cpsw->slaves[priv->emac_port].open_stat = true; + + netif_tx_start_all_queues(ndev); - if (priv->data.dual_emac) - priv->slaves[priv->emac_port].open_stat = true; return 0; err_cleanup: - cpdma_ctlr_stop(priv->dma); - for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&priv->pdev->dev); + cpdma_ctlr_stop(cpsw->dma); + for_each_slave(priv, cpsw_slave_stop, cpsw); + pm_runtime_put_sync(cpsw->dev); netif_carrier_off(priv->ndev); return ret; } @@ -1366,25 +1442,24 @@ err_cleanup: static int cpsw_ndo_stop(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; cpsw_info(priv, ifdown, "shutting down cpsw device\n"); - netif_stop_queue(priv->ndev); + netif_tx_stop_all_queues(priv->ndev); netif_carrier_off(priv->ndev); - if (cpsw_common_res_usage_state(priv) <= 1) { - struct cpsw_priv *priv_sl0 = cpsw_get_slave_priv(priv, 0); - - napi_disable(&priv_sl0->napi_rx); - napi_disable(&priv_sl0->napi_tx); - cpts_unregister(priv->cpts); - cpsw_intr_disable(priv); - cpdma_ctlr_stop(priv->dma); - cpsw_ale_stop(priv->ale); + if (cpsw_common_res_usage_state(cpsw) <= 1) { + napi_disable(&cpsw->napi_rx); + napi_disable(&cpsw->napi_tx); + cpts_unregister(cpsw->cpts); + cpsw_intr_disable(cpsw); + cpdma_ctlr_stop(cpsw->dma); + cpsw_ale_stop(cpsw->ale); } - for_each_slave(priv, cpsw_slave_stop, priv); - pm_runtime_put_sync(&priv->pdev->dev); - if (priv->data.dual_emac) - priv->slaves[priv->emac_port].open_stat = false; + for_each_slave(priv, cpsw_slave_stop, cpsw); + pm_runtime_put_sync(cpsw->dev); + if (cpsw->data.dual_emac) + cpsw->slaves[priv->emac_port].open_stat = false; return 0; } @@ -1392,7 +1467,10 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); - int ret; + struct cpsw_common *cpsw = priv->cpsw; + struct netdev_queue *txq; + struct cpdma_chan *txch; + int ret, q_idx; netif_trans_update(ndev); @@ -1403,12 +1481,17 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, } if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && - priv->cpts->tx_enable) + cpsw->cpts->tx_enable) skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; skb_tx_timestamp(skb); - ret = cpsw_tx_packet_submit(ndev, priv, skb); + q_idx = skb_get_queue_mapping(skb); + if (q_idx >= cpsw->tx_ch_num) + q_idx = q_idx % cpsw->tx_ch_num; + + txch = cpsw->txch[q_idx]; + ret = cpsw_tx_packet_submit(priv, skb, txch); if (unlikely(ret != 0)) { cpsw_err(priv, tx_err, "desc submit failed\n"); goto fail; @@ -1417,24 +1500,27 @@ static netdev_tx_t cpsw_ndo_start_xmit(struct sk_buff *skb, /* If there is no more tx desc left free then we need to * tell the kernel to stop sending us tx frames. */ - if (unlikely(!cpdma_check_free_tx_desc(priv->txch))) - netif_stop_queue(ndev); + if (unlikely(!cpdma_check_free_tx_desc(txch))) { + txq = netdev_get_tx_queue(ndev, q_idx); + netif_tx_stop_queue(txq); + } return NETDEV_TX_OK; fail: ndev->stats.tx_dropped++; - netif_stop_queue(ndev); + txq = netdev_get_tx_queue(ndev, skb_get_queue_mapping(skb)); + netif_tx_stop_queue(txq); return NETDEV_TX_BUSY; } #ifdef CONFIG_TI_CPTS -static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) +static void cpsw_hwtstamp_v1(struct cpsw_common *cpsw) { - struct cpsw_slave *slave = &priv->slaves[priv->data.active_slave]; + struct cpsw_slave *slave = &cpsw->slaves[cpsw->data.active_slave]; u32 ts_en, seq_id; - if (!priv->cpts->tx_enable && !priv->cpts->rx_enable) { + if (!cpsw->cpts->tx_enable && !cpsw->cpts->rx_enable) { slave_write(slave, 0, CPSW1_TS_CTL); return; } @@ -1442,10 +1528,10 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) seq_id = (30 << CPSW_V1_SEQ_ID_OFS_SHIFT) | ETH_P_1588; ts_en = EVENT_MSG_BITS << CPSW_V1_MSG_TYPE_OFS; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ts_en |= CPSW_V1_TS_TX_EN; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ts_en |= CPSW_V1_TS_RX_EN; slave_write(slave, ts_en, CPSW1_TS_CTL); @@ -1455,32 +1541,33 @@ static void cpsw_hwtstamp_v1(struct cpsw_priv *priv) static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) { struct cpsw_slave *slave; + struct cpsw_common *cpsw = priv->cpsw; u32 ctrl, mtype; - if (priv->data.dual_emac) - slave = &priv->slaves[priv->emac_port]; + if (cpsw->data.dual_emac) + slave = &cpsw->slaves[priv->emac_port]; else - slave = &priv->slaves[priv->data.active_slave]; + slave = &cpsw->slaves[cpsw->data.active_slave]; ctrl = slave_read(slave, CPSW2_CONTROL); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_2: ctrl &= ~CTRL_V2_ALL_TS_MASK; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ctrl |= CTRL_V2_TX_TS_BITS; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ctrl |= CTRL_V2_RX_TS_BITS; break; case CPSW_VERSION_3: default: ctrl &= ~CTRL_V3_ALL_TS_MASK; - if (priv->cpts->tx_enable) + if (cpsw->cpts->tx_enable) ctrl |= CTRL_V3_TX_TS_BITS; - if (priv->cpts->rx_enable) + if (cpsw->cpts->rx_enable) ctrl |= CTRL_V3_RX_TS_BITS; break; } @@ -1489,18 +1576,19 @@ static void cpsw_hwtstamp_v2(struct cpsw_priv *priv) slave_write(slave, mtype, CPSW2_TS_SEQ_MTYPE); slave_write(slave, ctrl, CPSW2_CONTROL); - __raw_writel(ETH_P_1588, &priv->regs->ts_ltype); + __raw_writel(ETH_P_1588, &cpsw->regs->ts_ltype); } static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) { struct cpsw_priv *priv = netdev_priv(dev); - struct cpts *cpts = priv->cpts; struct hwtstamp_config cfg; + struct cpsw_common *cpsw = priv->cpsw; + struct cpts *cpts = cpsw->cpts; - if (priv->version != CPSW_VERSION_1 && - priv->version != CPSW_VERSION_2 && - priv->version != CPSW_VERSION_3) + if (cpsw->version != CPSW_VERSION_1 && + cpsw->version != CPSW_VERSION_2 && + cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg))) @@ -1540,9 +1628,9 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) cpts->tx_enable = cfg.tx_type == HWTSTAMP_TX_ON; - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: - cpsw_hwtstamp_v1(priv); + cpsw_hwtstamp_v1(cpsw); break; case CPSW_VERSION_2: case CPSW_VERSION_3: @@ -1557,13 +1645,13 @@ static int cpsw_hwtstamp_set(struct net_device *dev, struct ifreq *ifr) static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) { - struct cpsw_priv *priv = netdev_priv(dev); - struct cpts *cpts = priv->cpts; + struct cpsw_common *cpsw = ndev_to_cpsw(dev); + struct cpts *cpts = cpsw->cpts; struct hwtstamp_config cfg; - if (priv->version != CPSW_VERSION_1 && - priv->version != CPSW_VERSION_2 && - priv->version != CPSW_VERSION_3) + if (cpsw->version != CPSW_VERSION_1 && + cpsw->version != CPSW_VERSION_2 && + cpsw->version != CPSW_VERSION_3) return -EOPNOTSUPP; cfg.flags = 0; @@ -1579,7 +1667,8 @@ static int cpsw_hwtstamp_get(struct net_device *dev, struct ifreq *ifr) static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) { struct cpsw_priv *priv = netdev_priv(dev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); if (!netif_running(dev)) return -EINVAL; @@ -1593,27 +1682,33 @@ static int cpsw_ndo_ioctl(struct net_device *dev, struct ifreq *req, int cmd) #endif } - if (!priv->slaves[slave_no].phy) + if (!cpsw->slaves[slave_no].phy) return -EOPNOTSUPP; - return phy_mii_ioctl(priv->slaves[slave_no].phy, req, cmd); + return phy_mii_ioctl(cpsw->slaves[slave_no].phy, req, cmd); } static void cpsw_ndo_tx_timeout(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + int ch; cpsw_err(priv, tx_err, "transmit timeout, restarting dma\n"); ndev->stats.tx_errors++; - cpsw_intr_disable(priv); - cpdma_chan_stop(priv->txch); - cpdma_chan_start(priv->txch); - cpsw_intr_enable(priv); + cpsw_intr_disable(cpsw); + for (ch = 0; ch < cpsw->tx_ch_num; ch++) { + cpdma_chan_stop(cpsw->txch[ch]); + cpdma_chan_start(cpsw->txch[ch]); + } + + cpsw_intr_enable(cpsw); } static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) { struct cpsw_priv *priv = netdev_priv(ndev); struct sockaddr *addr = (struct sockaddr *)p; + struct cpsw_common *cpsw = priv->cpsw; int flags = 0; u16 vid = 0; int ret; @@ -1621,27 +1716,27 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } - if (priv->data.dual_emac) { - vid = priv->slaves[priv->emac_port].port_vlan; + if (cpsw->data.dual_emac) { + vid = cpsw->slaves[priv->emac_port].port_vlan; flags = ALE_VLAN; } - cpsw_ale_del_ucast(priv->ale, priv->mac_addr, HOST_PORT_NUM, + cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, flags, vid); - cpsw_ale_add_ucast(priv->ale, addr->sa_data, HOST_PORT_NUM, + cpsw_ale_add_ucast(cpsw->ale, addr->sa_data, HOST_PORT_NUM, flags, vid); memcpy(priv->mac_addr, addr->sa_data, ETH_ALEN); memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN); for_each_slave(priv, cpsw_set_slave_mac, priv); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(cpsw->dev); return 0; } @@ -1649,12 +1744,12 @@ static int cpsw_ndo_set_mac_address(struct net_device *ndev, void *p) #ifdef CONFIG_NET_POLL_CONTROLLER static void cpsw_ndo_poll_controller(struct net_device *ndev) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - cpsw_intr_disable(priv); - cpsw_rx_interrupt(priv->irqs_table[0], priv); - cpsw_tx_interrupt(priv->irqs_table[1], priv); - cpsw_intr_enable(priv); + cpsw_intr_disable(cpsw); + cpsw_rx_interrupt(cpsw->irqs_table[0], cpsw); + cpsw_tx_interrupt(cpsw->irqs_table[1], cpsw); + cpsw_intr_enable(cpsw); } #endif @@ -1664,8 +1759,9 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, int ret; int unreg_mcast_mask = 0; u32 port_mask; + struct cpsw_common *cpsw = priv->cpsw; - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { port_mask = (1 << (priv->emac_port + 1)) | ALE_PORT_HOST; if (priv->ndev->flags & IFF_ALLMULTI) @@ -1679,27 +1775,27 @@ static inline int cpsw_add_vlan_ale_entry(struct cpsw_priv *priv, unreg_mcast_mask = ALE_PORT_1 | ALE_PORT_2; } - ret = cpsw_ale_add_vlan(priv->ale, vid, port_mask, 0, port_mask, + ret = cpsw_ale_add_vlan(cpsw->ale, vid, port_mask, 0, port_mask, unreg_mcast_mask); if (ret != 0) return ret; - ret = cpsw_ale_add_ucast(priv->ale, priv->mac_addr, + ret = cpsw_ale_add_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); if (ret != 0) goto clean_vid; - ret = cpsw_ale_add_mcast(priv->ale, priv->ndev->broadcast, + ret = cpsw_ale_add_mcast(cpsw->ale, priv->ndev->broadcast, port_mask, ALE_VLAN, vid, 0); if (ret != 0) goto clean_vlan_ucast; return 0; clean_vlan_ucast: - cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); clean_vid: - cpsw_ale_del_vlan(priv->ale, vid, 0); + cpsw_ale_del_vlan(cpsw->ale, vid, 0); return ret; } @@ -1707,26 +1803,27 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; - if (vid == priv->data.default_vlan) + if (vid == cpsw->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { /* In dual EMAC, reserved VLAN id should not be used for * creating VLAN interfaces as this can break the dual * EMAC port separation */ int i; - for (i = 0; i < priv->data.slaves; i++) { - if (vid == priv->slaves[i].port_vlan) + for (i = 0; i < cpsw->data.slaves; i++) { + if (vid == cpsw->slaves[i].port_vlan) return -EINVAL; } } @@ -1734,7 +1831,7 @@ static int cpsw_ndo_vlan_rx_add_vid(struct net_device *ndev, dev_info(priv->dev, "Adding vlanid %d to vlan filter\n", vid); ret = cpsw_add_vlan_ale_entry(priv, vid); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(cpsw->dev); return ret; } @@ -1742,39 +1839,40 @@ static int cpsw_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto, u16 vid) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; - if (vid == priv->data.default_vlan) + if (vid == cpsw->data.default_vlan) return 0; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); return ret; } - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (vid == priv->slaves[i].port_vlan) + for (i = 0; i < cpsw->data.slaves; i++) { + if (vid == cpsw->slaves[i].port_vlan) return -EINVAL; } } dev_info(priv->dev, "removing vlanid %d from vlan filter\n", vid); - ret = cpsw_ale_del_vlan(priv->ale, vid, 0); + ret = cpsw_ale_del_vlan(cpsw->ale, vid, 0); if (ret != 0) return ret; - ret = cpsw_ale_del_ucast(priv->ale, priv->mac_addr, + ret = cpsw_ale_del_ucast(cpsw->ale, priv->mac_addr, HOST_PORT_NUM, ALE_VLAN, vid); if (ret != 0) return ret; - ret = cpsw_ale_del_mcast(priv->ale, priv->ndev->broadcast, + ret = cpsw_ale_del_mcast(cpsw->ale, priv->ndev->broadcast, 0, ALE_VLAN, vid); - pm_runtime_put(&priv->pdev->dev); + pm_runtime_put(cpsw->dev); return ret; } @@ -1797,31 +1895,32 @@ static const struct net_device_ops cpsw_netdev_ops = { static int cpsw_get_regs_len(struct net_device *ndev) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - return priv->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32); + return cpsw->data.ale_entries * ALE_ENTRY_WORDS * sizeof(u32); } static void cpsw_get_regs(struct net_device *ndev, struct ethtool_regs *regs, void *p) { - struct cpsw_priv *priv = netdev_priv(ndev); u32 *reg = p; + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); /* update CPSW IP version */ - regs->version = priv->version; + regs->version = cpsw->version; - cpsw_ale_dump(priv->ale, reg); + cpsw_ale_dump(cpsw->ale, reg); } static void cpsw_get_drvinfo(struct net_device *ndev, struct ethtool_drvinfo *info) { - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + struct platform_device *pdev = to_platform_device(cpsw->dev); strlcpy(info->driver, "cpsw", sizeof(info->driver)); strlcpy(info->version, "1.0", sizeof(info->version)); - strlcpy(info->bus_info, priv->pdev->name, sizeof(info->bus_info)); + strlcpy(info->bus_info, pdev->name, sizeof(info->bus_info)); } static u32 cpsw_get_msglevel(struct net_device *ndev) @@ -1840,7 +1939,7 @@ static int cpsw_get_ts_info(struct net_device *ndev, struct ethtool_ts_info *info) { #ifdef CONFIG_TI_CPTS - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); info->so_timestamping = SOF_TIMESTAMPING_TX_HARDWARE | @@ -1849,7 +1948,7 @@ static int cpsw_get_ts_info(struct net_device *ndev, SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RAW_HARDWARE; - info->phc_index = priv->cpts->phc_index; + info->phc_index = cpsw->cpts->phc_index; info->tx_types = (1 << HWTSTAMP_TX_OFF) | (1 << HWTSTAMP_TX_ON); @@ -1872,10 +1971,11 @@ static int cpsw_get_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_gset(priv->slaves[slave_no].phy, ecmd); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_gset(cpsw->slaves[slave_no].phy, ecmd); else return -EOPNOTSUPP; } @@ -1883,10 +1983,11 @@ static int cpsw_get_settings(struct net_device *ndev, static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_sset(priv->slaves[slave_no].phy, ecmd); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_sset(cpsw->slaves[slave_no].phy, ecmd); else return -EOPNOTSUPP; } @@ -1894,22 +1995,24 @@ static int cpsw_set_settings(struct net_device *ndev, struct ethtool_cmd *ecmd) static void cpsw_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); wol->supported = 0; wol->wolopts = 0; - if (priv->slaves[slave_no].phy) - phy_ethtool_get_wol(priv->slaves[slave_no].phy, wol); + if (cpsw->slaves[slave_no].phy) + phy_ethtool_get_wol(cpsw->slaves[slave_no].phy, wol); } static int cpsw_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol) { struct cpsw_priv *priv = netdev_priv(ndev); - int slave_no = cpsw_slave_index(priv); + struct cpsw_common *cpsw = priv->cpsw; + int slave_no = cpsw_slave_index(cpsw, priv); - if (priv->slaves[slave_no].phy) - return phy_ethtool_set_wol(priv->slaves[slave_no].phy, wol); + if (cpsw->slaves[slave_no].phy) + return phy_ethtool_set_wol(cpsw->slaves[slave_no].phy, wol); else return -EOPNOTSUPP; } @@ -1940,12 +2043,13 @@ static int cpsw_set_pauseparam(struct net_device *ndev, static int cpsw_ethtool_op_begin(struct net_device *ndev) { struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; int ret; - ret = pm_runtime_get_sync(&priv->pdev->dev); + ret = pm_runtime_get_sync(cpsw->dev); if (ret < 0) { cpsw_err(priv, drv, "ethtool begin failed %d\n", ret); - pm_runtime_put_noidle(&priv->pdev->dev); + pm_runtime_put_noidle(cpsw->dev); } return ret; @@ -1956,11 +2060,185 @@ static void cpsw_ethtool_op_complete(struct net_device *ndev) struct cpsw_priv *priv = netdev_priv(ndev); int ret; - ret = pm_runtime_put(&priv->pdev->dev); + ret = pm_runtime_put(priv->cpsw->dev); if (ret < 0) cpsw_err(priv, drv, "ethtool complete failed %d\n", ret); } +static void cpsw_get_channels(struct net_device *ndev, + struct ethtool_channels *ch) +{ + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); + + ch->max_combined = 0; + ch->max_rx = CPSW_MAX_QUEUES; + ch->max_tx = CPSW_MAX_QUEUES; + ch->max_other = 0; + ch->other_count = 0; + ch->rx_count = cpsw->rx_ch_num; + ch->tx_count = cpsw->tx_ch_num; + ch->combined_count = 0; +} + +static int cpsw_check_ch_settings(struct cpsw_common *cpsw, + struct ethtool_channels *ch) +{ + if (ch->combined_count) + return -EINVAL; + + /* verify we have at least one channel in each direction */ + if (!ch->rx_count || !ch->tx_count) + return -EINVAL; + + if (ch->rx_count > cpsw->data.channels || + ch->tx_count > cpsw->data.channels) + return -EINVAL; + + return 0; +} + +static int cpsw_update_channels_res(struct cpsw_priv *priv, int ch_num, int rx) +{ + int (*poll)(struct napi_struct *, int); + struct cpsw_common *cpsw = priv->cpsw; + void (*handler)(void *, int, int); + struct cpdma_chan **chan; + int ret, *ch; + + if (rx) { + ch = &cpsw->rx_ch_num; + chan = cpsw->rxch; + handler = cpsw_rx_handler; + poll = cpsw_rx_poll; + } else { + ch = &cpsw->tx_ch_num; + chan = cpsw->txch; + handler = cpsw_tx_handler; + poll = cpsw_tx_poll; + } + + while (*ch < ch_num) { + chan[*ch] = cpdma_chan_create(cpsw->dma, *ch, handler, rx); + + if (IS_ERR(chan[*ch])) + return PTR_ERR(chan[*ch]); + + if (!chan[*ch]) + return -EINVAL; + + cpsw_info(priv, ifup, "created new %d %s channel\n", *ch, + (rx ? "rx" : "tx")); + (*ch)++; + } + + while (*ch > ch_num) { + (*ch)--; + + ret = cpdma_chan_destroy(chan[*ch]); + if (ret) + return ret; + + cpsw_info(priv, ifup, "destroyed %d %s channel\n", *ch, + (rx ? "rx" : "tx")); + } + + return 0; +} + +static int cpsw_update_channels(struct cpsw_priv *priv, + struct ethtool_channels *ch) +{ + int ret; + + ret = cpsw_update_channels_res(priv, ch->rx_count, 1); + if (ret) + return ret; + + ret = cpsw_update_channels_res(priv, ch->tx_count, 0); + if (ret) + return ret; + + return 0; +} + +static int cpsw_set_channels(struct net_device *ndev, + struct ethtool_channels *chs) +{ + struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_slave *slave; + int i, ret; + + ret = cpsw_check_ch_settings(cpsw, chs); + if (ret < 0) + return ret; + + /* Disable NAPI scheduling */ + cpsw_intr_disable(cpsw); + + /* Stop all transmit queues for every network device. + * Disable re-using rx descriptors with dormant_on. + */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + netif_tx_stop_all_queues(slave->ndev); + netif_dormant_on(slave->ndev); + } + + /* Handle rest of tx packets and stop cpdma channels */ + cpdma_ctlr_stop(cpsw->dma); + ret = cpsw_update_channels(priv, chs); + if (ret) + goto err; + + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + + /* Inform stack about new count of queues */ + ret = netif_set_real_num_tx_queues(slave->ndev, + cpsw->tx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of tx queues\n"); + goto err; + } + + ret = netif_set_real_num_rx_queues(slave->ndev, + cpsw->rx_ch_num); + if (ret) { + dev_err(priv->dev, "cannot set real number of rx queues\n"); + goto err; + } + + /* Enable rx packets handling */ + netif_dormant_off(slave->ndev); + } + + if (cpsw_common_res_usage_state(cpsw)) { + ret = cpsw_fill_rx_channels(priv); + if (ret) + goto err; + + /* After this receive is started */ + cpdma_ctlr_start(cpsw->dma); + cpsw_intr_enable(cpsw); + } + + /* Resume transmit for every affected interface */ + for (i = cpsw->data.slaves, slave = cpsw->slaves; i; i--, slave++) { + if (!(slave->ndev && netif_running(slave->ndev))) + continue; + netif_tx_start_all_queues(slave->ndev); + } + return 0; +err: + dev_err(priv->dev, "cannot update channels number, closing device\n"); + dev_close(ndev); + return ret; +} + static const struct ethtool_ops cpsw_ethtool_ops = { .get_drvinfo = cpsw_get_drvinfo, .get_msglevel = cpsw_get_msglevel, @@ -1982,14 +2260,16 @@ static const struct ethtool_ops cpsw_ethtool_ops = { .get_regs = cpsw_get_regs, .begin = cpsw_ethtool_op_begin, .complete = cpsw_ethtool_op_complete, + .get_channels = cpsw_get_channels, + .set_channels = cpsw_set_channels, }; -static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_priv *priv, +static void cpsw_slave_init(struct cpsw_slave *slave, struct cpsw_common *cpsw, u32 slave_reg_ofs, u32 sliver_reg_ofs) { - void __iomem *regs = priv->regs; + void __iomem *regs = cpsw->regs; int slave_num = slave->slave_num; - struct cpsw_slave_data *data = priv->data.slave_data + slave_num; + struct cpsw_slave_data *data = cpsw->data.slave_data + slave_num; slave->data = data; slave->regs = regs + slave_reg_ofs; @@ -2160,71 +2440,50 @@ no_phy_slave: return 0; } -static int cpsw_probe_dual_emac(struct platform_device *pdev, - struct cpsw_priv *priv) +static int cpsw_probe_dual_emac(struct cpsw_priv *priv) { - struct cpsw_platform_data *data = &priv->data; + struct cpsw_common *cpsw = priv->cpsw; + struct cpsw_platform_data *data = &cpsw->data; struct net_device *ndev; struct cpsw_priv *priv_sl2; - int ret = 0, i; + int ret = 0; - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); + ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); if (!ndev) { - dev_err(&pdev->dev, "cpsw: error allocating net_device\n"); + dev_err(cpsw->dev, "cpsw: error allocating net_device\n"); return -ENOMEM; } priv_sl2 = netdev_priv(ndev); - priv_sl2->data = *data; - priv_sl2->pdev = pdev; + priv_sl2->cpsw = cpsw; priv_sl2->ndev = ndev; priv_sl2->dev = &ndev->dev; priv_sl2->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); - priv_sl2->rx_packet_max = max(rx_packet_max, 128); if (is_valid_ether_addr(data->slave_data[1].mac_addr)) { memcpy(priv_sl2->mac_addr, data->slave_data[1].mac_addr, ETH_ALEN); - dev_info(&pdev->dev, "cpsw: Detected MACID = %pM\n", priv_sl2->mac_addr); + dev_info(cpsw->dev, "cpsw: Detected MACID = %pM\n", + priv_sl2->mac_addr); } else { random_ether_addr(priv_sl2->mac_addr); - dev_info(&pdev->dev, "cpsw: Random MACID = %pM\n", priv_sl2->mac_addr); + dev_info(cpsw->dev, "cpsw: Random MACID = %pM\n", + priv_sl2->mac_addr); } memcpy(ndev->dev_addr, priv_sl2->mac_addr, ETH_ALEN); - priv_sl2->slaves = priv->slaves; - priv_sl2->clk = priv->clk; - - priv_sl2->coal_intvl = 0; - priv_sl2->bus_freq_mhz = priv->bus_freq_mhz; - - priv_sl2->regs = priv->regs; - priv_sl2->host_port_regs = priv->host_port_regs; - priv_sl2->wr_regs = priv->wr_regs; - priv_sl2->hw_stats = priv->hw_stats; - priv_sl2->dma = priv->dma; - priv_sl2->txch = priv->txch; - priv_sl2->rxch = priv->rxch; - priv_sl2->ale = priv->ale; priv_sl2->emac_port = 1; - priv->slaves[1].ndev = ndev; - priv_sl2->cpts = priv->cpts; - priv_sl2->version = priv->version; - - for (i = 0; i < priv->num_irqs; i++) { - priv_sl2->irqs_table[i] = priv->irqs_table[i]; - priv_sl2->num_irqs = priv->num_irqs; - } + cpsw->slaves[1].ndev = ndev; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; /* register the network device */ - SET_NETDEV_DEV(ndev, &pdev->dev); + SET_NETDEV_DEV(ndev, cpsw->dev); ret = register_netdev(ndev); if (ret) { - dev_err(&pdev->dev, "cpsw: error registering net device\n"); + dev_err(cpsw->dev, "cpsw: error registering net device\n"); free_netdev(ndev); ret = -ENODEV; } @@ -2272,6 +2531,7 @@ MODULE_DEVICE_TABLE(of, cpsw_of_mtable); static int cpsw_probe(struct platform_device *pdev) { + struct clk *clk; struct cpsw_platform_data *data; struct net_device *ndev; struct cpsw_priv *priv; @@ -2282,10 +2542,14 @@ static int cpsw_probe(struct platform_device *pdev) const struct of_device_id *of_id; struct gpio_descs *mode; u32 slave_offset, sliver_offset, slave_size; + struct cpsw_common *cpsw; int ret = 0, i; int irq; - ndev = alloc_etherdev(sizeof(struct cpsw_priv)); + cpsw = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_common), GFP_KERNEL); + cpsw->dev = &pdev->dev; + + ndev = alloc_etherdev_mq(sizeof(struct cpsw_priv), CPSW_MAX_QUEUES); if (!ndev) { dev_err(&pdev->dev, "error allocating net_device\n"); return -ENOMEM; @@ -2293,13 +2557,13 @@ static int cpsw_probe(struct platform_device *pdev) platform_set_drvdata(pdev, ndev); priv = netdev_priv(ndev); - priv->pdev = pdev; + priv->cpsw = cpsw; priv->ndev = ndev; priv->dev = &ndev->dev; priv->msg_enable = netif_msg_init(debug_level, CPSW_DEBUG); - priv->rx_packet_max = max(rx_packet_max, 128); - priv->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL); - if (!priv->cpts) { + cpsw->rx_packet_max = max(rx_packet_max, 128); + cpsw->cpts = devm_kzalloc(&pdev->dev, sizeof(struct cpts), GFP_KERNEL); + if (!cpsw->cpts) { dev_err(&pdev->dev, "error allocating cpts\n"); ret = -ENOMEM; goto clean_ndev_ret; @@ -2320,12 +2584,14 @@ static int cpsw_probe(struct platform_device *pdev) /* Select default pin state */ pinctrl_pm_select_default_state(&pdev->dev); - if (cpsw_probe_dt(&priv->data, pdev)) { + if (cpsw_probe_dt(&cpsw->data, pdev)) { dev_err(&pdev->dev, "cpsw: platform data missing\n"); ret = -ENODEV; goto clean_runtime_disable_ret; } - data = &priv->data; + data = &cpsw->data; + cpsw->rx_ch_num = 1; + cpsw->tx_ch_num = 1; if (is_valid_ether_addr(data->slave_data[0].mac_addr)) { memcpy(priv->mac_addr, data->slave_data[0].mac_addr, ETH_ALEN); @@ -2337,27 +2603,26 @@ static int cpsw_probe(struct platform_device *pdev) memcpy(ndev->dev_addr, priv->mac_addr, ETH_ALEN); - priv->slaves = devm_kzalloc(&pdev->dev, + cpsw->slaves = devm_kzalloc(&pdev->dev, sizeof(struct cpsw_slave) * data->slaves, GFP_KERNEL); - if (!priv->slaves) { + if (!cpsw->slaves) { ret = -ENOMEM; goto clean_runtime_disable_ret; } for (i = 0; i < data->slaves; i++) - priv->slaves[i].slave_num = i; + cpsw->slaves[i].slave_num = i; - priv->slaves[0].ndev = ndev; + cpsw->slaves[0].ndev = ndev; priv->emac_port = 0; - priv->clk = devm_clk_get(&pdev->dev, "fck"); - if (IS_ERR(priv->clk)) { + clk = devm_clk_get(&pdev->dev, "fck"); + if (IS_ERR(clk)) { dev_err(priv->dev, "fck is not found\n"); ret = -ENODEV; goto clean_runtime_disable_ret; } - priv->coal_intvl = 0; - priv->bus_freq_mhz = clk_get_rate(priv->clk) / 1000000; + cpsw->bus_freq_mhz = clk_get_rate(clk) / 1000000; ss_res = platform_get_resource(pdev, IORESOURCE_MEM, 0); ss_regs = devm_ioremap_resource(&pdev->dev, ss_res); @@ -2365,7 +2630,7 @@ static int cpsw_probe(struct platform_device *pdev) ret = PTR_ERR(ss_regs); goto clean_runtime_disable_ret; } - priv->regs = ss_regs; + cpsw->regs = ss_regs; /* Need to enable clocks with runtime PM api to access module * registers @@ -2375,24 +2640,24 @@ static int cpsw_probe(struct platform_device *pdev) pm_runtime_put_noidle(&pdev->dev); goto clean_runtime_disable_ret; } - priv->version = readl(&priv->regs->id_ver); + cpsw->version = readl(&cpsw->regs->id_ver); pm_runtime_put_sync(&pdev->dev); res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - priv->wr_regs = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(priv->wr_regs)) { - ret = PTR_ERR(priv->wr_regs); + cpsw->wr_regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(cpsw->wr_regs)) { + ret = PTR_ERR(cpsw->wr_regs); goto clean_runtime_disable_ret; } memset(&dma_params, 0, sizeof(dma_params)); memset(&ale_params, 0, sizeof(ale_params)); - switch (priv->version) { + switch (cpsw->version) { case CPSW_VERSION_1: - priv->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET; - priv->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET; - priv->hw_stats = ss_regs + CPSW1_HW_STATS; + cpsw->host_port_regs = ss_regs + CPSW1_HOST_PORT_OFFSET; + cpsw->cpts->reg = ss_regs + CPSW1_CPTS_OFFSET; + cpsw->hw_stats = ss_regs + CPSW1_HW_STATS; dma_params.dmaregs = ss_regs + CPSW1_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW1_STATERAM_OFFSET; ale_params.ale_regs = ss_regs + CPSW1_ALE_OFFSET; @@ -2404,9 +2669,9 @@ static int cpsw_probe(struct platform_device *pdev) case CPSW_VERSION_2: case CPSW_VERSION_3: case CPSW_VERSION_4: - priv->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET; - priv->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET; - priv->hw_stats = ss_regs + CPSW2_HW_STATS; + cpsw->host_port_regs = ss_regs + CPSW2_HOST_PORT_OFFSET; + cpsw->cpts->reg = ss_regs + CPSW2_CPTS_OFFSET; + cpsw->hw_stats = ss_regs + CPSW2_HW_STATS; dma_params.dmaregs = ss_regs + CPSW2_CPDMA_OFFSET; dma_params.txhdp = ss_regs + CPSW2_STATERAM_OFFSET; ale_params.ale_regs = ss_regs + CPSW2_ALE_OFFSET; @@ -2417,13 +2682,14 @@ static int cpsw_probe(struct platform_device *pdev) (u32 __force) ss_res->start + CPSW2_BD_OFFSET; break; default: - dev_err(priv->dev, "unknown version 0x%08x\n", priv->version); + dev_err(priv->dev, "unknown version 0x%08x\n", cpsw->version); ret = -ENODEV; goto clean_runtime_disable_ret; } - for (i = 0; i < priv->data.slaves; i++) { - struct cpsw_slave *slave = &priv->slaves[i]; - cpsw_slave_init(slave, priv, slave_offset, sliver_offset); + for (i = 0; i < cpsw->data.slaves; i++) { + struct cpsw_slave *slave = &cpsw->slaves[i]; + + cpsw_slave_init(slave, cpsw, slave_offset, sliver_offset); slave_offset += slave_size; sliver_offset += SLIVER_SIZE; } @@ -2443,19 +2709,16 @@ static int cpsw_probe(struct platform_device *pdev) dma_params.has_ext_regs = true; dma_params.desc_hw_addr = dma_params.desc_mem_phys; - priv->dma = cpdma_ctlr_create(&dma_params); - if (!priv->dma) { + cpsw->dma = cpdma_ctlr_create(&dma_params); + if (!cpsw->dma) { dev_err(priv->dev, "error initializing dma\n"); ret = -ENOMEM; goto clean_runtime_disable_ret; } - priv->txch = cpdma_chan_create(priv->dma, tx_chan_num(0), - cpsw_tx_handler); - priv->rxch = cpdma_chan_create(priv->dma, rx_chan_num(0), - cpsw_rx_handler); - - if (WARN_ON(!priv->txch || !priv->rxch)) { + cpsw->txch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_tx_handler, 0); + cpsw->rxch[0] = cpdma_chan_create(cpsw->dma, 0, cpsw_rx_handler, 1); + if (WARN_ON(!cpsw->rxch[0] || !cpsw->txch[0])) { dev_err(priv->dev, "error initializing dma channels\n"); ret = -ENOMEM; goto clean_dma_ret; @@ -2466,8 +2729,8 @@ static int cpsw_probe(struct platform_device *pdev) ale_params.ale_entries = data->ale_entries; ale_params.ale_ports = data->slaves; - priv->ale = cpsw_ale_create(&ale_params); - if (!priv->ale) { + cpsw->ale = cpsw_ale_create(&ale_params); + if (!cpsw->ale) { dev_err(priv->dev, "error initializing ale engine\n"); ret = -ENODEV; goto clean_dma_ret; @@ -2484,7 +2747,7 @@ static int cpsw_probe(struct platform_device *pdev) if (of_id) { pdev->id_entry = of_id->data; if (pdev->id_entry->driver_data) - priv->quirk_irq = true; + cpsw->quirk_irq = true; } /* Grab RX and TX IRQs. Note that we also have RX_THRESHOLD and @@ -2502,9 +2765,9 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - priv->irqs_table[0] = irq; + cpsw->irqs_table[0] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_rx_interrupt, - 0, dev_name(&pdev->dev), priv); + 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); goto clean_ale_ret; @@ -2517,21 +2780,20 @@ static int cpsw_probe(struct platform_device *pdev) goto clean_ale_ret; } - priv->irqs_table[1] = irq; + cpsw->irqs_table[1] = irq; ret = devm_request_irq(&pdev->dev, irq, cpsw_tx_interrupt, - 0, dev_name(&pdev->dev), priv); + 0, dev_name(&pdev->dev), cpsw); if (ret < 0) { dev_err(priv->dev, "error attaching irq (%d)\n", ret); goto clean_ale_ret; } - priv->num_irqs = 2; ndev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; ndev->netdev_ops = &cpsw_netdev_ops; ndev->ethtool_ops = &cpsw_ethtool_ops; - netif_napi_add(ndev, &priv->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); - netif_tx_napi_add(ndev, &priv->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); + netif_napi_add(ndev, &cpsw->napi_rx, cpsw_rx_poll, CPSW_POLL_WEIGHT); + netif_tx_napi_add(ndev, &cpsw->napi_tx, cpsw_tx_poll, CPSW_POLL_WEIGHT); /* register the network device */ SET_NETDEV_DEV(ndev, &pdev->dev); @@ -2545,8 +2807,8 @@ static int cpsw_probe(struct platform_device *pdev) cpsw_notice(priv, probe, "initialized device (regs %pa, irq %d)\n", &ss_res->start, ndev->irq); - if (priv->data.dual_emac) { - ret = cpsw_probe_dual_emac(pdev, priv); + if (cpsw->data.dual_emac) { + ret = cpsw_probe_dual_emac(priv); if (ret) { cpsw_err(priv, probe, "error probe slave 2 emac interface\n"); goto clean_ale_ret; @@ -2556,9 +2818,9 @@ static int cpsw_probe(struct platform_device *pdev) return 0; clean_ale_ret: - cpsw_ale_destroy(priv->ale); + cpsw_ale_destroy(cpsw->ale); clean_dma_ret: - cpdma_ctlr_destroy(priv->dma); + cpdma_ctlr_destroy(cpsw->dma); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); clean_ndev_ret: @@ -2569,7 +2831,7 @@ clean_ndev_ret: static int cpsw_remove(struct platform_device *pdev) { struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); int ret; ret = pm_runtime_get_sync(&pdev->dev); @@ -2578,17 +2840,17 @@ static int cpsw_remove(struct platform_device *pdev) return ret; } - if (priv->data.dual_emac) - unregister_netdev(cpsw_get_slave_ndev(priv, 1)); + if (cpsw->data.dual_emac) + unregister_netdev(cpsw->slaves[1].ndev); unregister_netdev(ndev); - cpsw_ale_destroy(priv->ale); - cpdma_ctlr_destroy(priv->dma); + cpsw_ale_destroy(cpsw->ale); + cpdma_ctlr_destroy(cpsw->dma); of_platform_depopulate(&pdev->dev); pm_runtime_put_sync(&pdev->dev); pm_runtime_disable(&pdev->dev); - if (priv->data.dual_emac) - free_netdev(cpsw_get_slave_ndev(priv, 1)); + if (cpsw->data.dual_emac) + free_netdev(cpsw->slaves[1].ndev); free_netdev(ndev); return 0; } @@ -2598,14 +2860,14 @@ static int cpsw_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = ndev_to_cpsw(ndev); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (netif_running(priv->slaves[i].ndev)) - cpsw_ndo_stop(priv->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + if (netif_running(cpsw->slaves[i].ndev)) + cpsw_ndo_stop(cpsw->slaves[i].ndev); } } else { if (netif_running(ndev)) @@ -2613,7 +2875,7 @@ static int cpsw_suspend(struct device *dev) } /* Select sleep pin state */ - pinctrl_pm_select_sleep_state(&pdev->dev); + pinctrl_pm_select_sleep_state(dev); return 0; } @@ -2622,17 +2884,17 @@ static int cpsw_resume(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); struct net_device *ndev = platform_get_drvdata(pdev); - struct cpsw_priv *priv = netdev_priv(ndev); + struct cpsw_common *cpsw = netdev_priv(ndev); /* Select default pin state */ - pinctrl_pm_select_default_state(&pdev->dev); + pinctrl_pm_select_default_state(dev); - if (priv->data.dual_emac) { + if (cpsw->data.dual_emac) { int i; - for (i = 0; i < priv->data.slaves; i++) { - if (netif_running(priv->slaves[i].ndev)) - cpsw_ndo_open(priv->slaves[i].ndev); + for (i = 0; i < cpsw->data.slaves; i++) { + if (netif_running(cpsw->slaves[i].ndev)) + cpsw_ndo_open(cpsw->slaves[i].ndev); } } else { if (netif_running(ndev)) diff --git a/drivers/net/ethernet/ti/davinci_cpdma.c b/drivers/net/ethernet/ti/davinci_cpdma.c index 19e5f32a8a64..c3f35f11a8fd 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.c +++ b/drivers/net/ethernet/ti/davinci_cpdma.c @@ -86,7 +86,7 @@ struct cpdma_desc_pool { void __iomem *iomap; /* ioremap map */ void *cpumap; /* dma_alloc map */ int desc_size, mem_size; - int num_desc, used_desc; + int num_desc; struct device *dev; struct gen_pool *gen_pool; }; @@ -104,6 +104,7 @@ struct cpdma_ctlr { struct cpdma_desc_pool *pool; spinlock_t lock; struct cpdma_chan *channels[2 * CPDMA_MAX_CHANNELS]; + int chan_num; }; struct cpdma_chan { @@ -123,6 +124,13 @@ struct cpdma_chan { int int_set, int_clear, td; }; +#define tx_chan_num(chan) (chan) +#define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) +#define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) +#define is_tx_chan(chan) (!is_rx_chan(chan)) +#define __chan_linear(chan_num) ((chan_num) & (CPDMA_MAX_CHANNELS - 1)) +#define chan_linear(chan) __chan_linear((chan)->chan_num) + /* The following make access to common cpdma_ctlr params more readable */ #define dmaregs params.dmaregs #define num_chan params.num_chan @@ -148,7 +156,10 @@ static void cpdma_desc_pool_destroy(struct cpdma_desc_pool *pool) if (!pool) return; - WARN_ON(pool->used_desc); + WARN(gen_pool_size(pool->gen_pool) != gen_pool_avail(pool->gen_pool), + "cpdma_desc_pool size %d != avail %d", + gen_pool_size(pool->gen_pool), + gen_pool_avail(pool->gen_pool)); if (pool->cpumap) dma_free_coherent(pool->dev, pool->mem_size, pool->cpumap, pool->phys); @@ -232,21 +243,14 @@ desc_from_phys(struct cpdma_desc_pool *pool, dma_addr_t dma) static struct cpdma_desc __iomem * cpdma_desc_alloc(struct cpdma_desc_pool *pool) { - struct cpdma_desc __iomem *desc = NULL; - - desc = (struct cpdma_desc __iomem *)gen_pool_alloc(pool->gen_pool, - pool->desc_size); - if (desc) - pool->used_desc++; - - return desc; + return (struct cpdma_desc __iomem *) + gen_pool_alloc(pool->gen_pool, pool->desc_size); } static void cpdma_desc_free(struct cpdma_desc_pool *pool, struct cpdma_desc __iomem *desc, int num_desc) { gen_pool_free(pool->gen_pool, (unsigned long)desc, pool->desc_size); - pool->used_desc--; } struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) @@ -260,6 +264,7 @@ struct cpdma_ctlr *cpdma_ctlr_create(struct cpdma_params *params) ctlr->state = CPDMA_STATE_IDLE; ctlr->params = *params; ctlr->dev = params->dev; + ctlr->chan_num = 0; spin_lock_init(&ctlr->lock); ctlr->pool = cpdma_desc_pool_create(ctlr->dev, @@ -336,12 +341,14 @@ int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr) } ctlr->state = CPDMA_STATE_TEARDOWN; + spin_unlock_irqrestore(&ctlr->lock, flags); for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { if (ctlr->channels[i]) cpdma_chan_stop(ctlr->channels[i]); } + spin_lock_irqsave(&ctlr->lock, flags); dma_reg_write(ctlr, CPDMA_RXINTMASKCLEAR, 0xffffffff); dma_reg_write(ctlr, CPDMA_TXINTMASKCLEAR, 0xffffffff); @@ -403,13 +410,52 @@ void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value) } EXPORT_SYMBOL_GPL(cpdma_ctlr_eoi); -struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, - cpdma_handler_fn handler) +u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr) { + return dma_reg_read(ctlr, CPDMA_RXINTSTATMASKED); +} +EXPORT_SYMBOL_GPL(cpdma_ctrl_rxchs_state); + +u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr) +{ + return dma_reg_read(ctlr, CPDMA_TXINTSTATMASKED); +} +EXPORT_SYMBOL_GPL(cpdma_ctrl_txchs_state); + +/** + * cpdma_chan_split_pool - Splits ctrl pool between all channels. + * Has to be called under ctlr lock + */ +static void cpdma_chan_split_pool(struct cpdma_ctlr *ctlr) +{ + struct cpdma_desc_pool *pool = ctlr->pool; + struct cpdma_chan *chan; + int ch_desc_num; + int i; + + if (!ctlr->chan_num) + return; + + /* calculate average size of pool slice */ + ch_desc_num = pool->num_desc / ctlr->chan_num; + + /* split ctlr pool */ + for (i = 0; i < ARRAY_SIZE(ctlr->channels); i++) { + chan = ctlr->channels[i]; + if (chan) + chan->desc_num = ch_desc_num; + } +} + +struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, + cpdma_handler_fn handler, int rx_type) +{ + int offset = chan_num * 4; struct cpdma_chan *chan; - int offset = (chan_num % CPDMA_MAX_CHANNELS) * 4; unsigned long flags; + chan_num = rx_type ? rx_chan_num(chan_num) : tx_chan_num(chan_num); + if (__chan_linear(chan_num) >= ctlr->num_chan) return NULL; @@ -451,14 +497,25 @@ struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, spin_lock_init(&chan->lock); ctlr->channels[chan_num] = chan; + ctlr->chan_num++; + + cpdma_chan_split_pool(ctlr); + spin_unlock_irqrestore(&ctlr->lock, flags); return chan; } EXPORT_SYMBOL_GPL(cpdma_chan_create); -int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr) +int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan) { - return ctlr->pool->num_desc / 2; + unsigned long flags; + int desc_num; + + spin_lock_irqsave(&chan->lock, flags); + desc_num = chan->desc_num; + spin_unlock_irqrestore(&chan->lock, flags); + + return desc_num; } EXPORT_SYMBOL_GPL(cpdma_chan_get_rx_buf_num); @@ -475,6 +532,10 @@ int cpdma_chan_destroy(struct cpdma_chan *chan) if (chan->state != CPDMA_STATE_IDLE) cpdma_chan_stop(chan); ctlr->channels[chan->chan_num] = NULL; + ctlr->chan_num--; + + cpdma_chan_split_pool(ctlr); + spin_unlock_irqrestore(&ctlr->lock, flags); return 0; } diff --git a/drivers/net/ethernet/ti/davinci_cpdma.h b/drivers/net/ethernet/ti/davinci_cpdma.h index 4b46cd6e9a3f..a07b22b12bc1 100644 --- a/drivers/net/ethernet/ti/davinci_cpdma.h +++ b/drivers/net/ethernet/ti/davinci_cpdma.h @@ -17,13 +17,6 @@ #define CPDMA_MAX_CHANNELS BITS_PER_LONG -#define tx_chan_num(chan) (chan) -#define rx_chan_num(chan) ((chan) + CPDMA_MAX_CHANNELS) -#define is_rx_chan(chan) ((chan)->chan_num >= CPDMA_MAX_CHANNELS) -#define is_tx_chan(chan) (!is_rx_chan(chan)) -#define __chan_linear(chan_num) ((chan_num) & (CPDMA_MAX_CHANNELS - 1)) -#define chan_linear(chan) __chan_linear((chan)->chan_num) - #define CPDMA_RX_SOURCE_PORT(__status__) ((__status__ >> 16) & 0x7) #define CPDMA_EOI_RX_THRESH 0x0 @@ -79,8 +72,8 @@ int cpdma_ctlr_start(struct cpdma_ctlr *ctlr); int cpdma_ctlr_stop(struct cpdma_ctlr *ctlr); struct cpdma_chan *cpdma_chan_create(struct cpdma_ctlr *ctlr, int chan_num, - cpdma_handler_fn handler); -int cpdma_chan_get_rx_buf_num(struct cpdma_ctlr *ctlr); + cpdma_handler_fn handler, int rx_type); +int cpdma_chan_get_rx_buf_num(struct cpdma_chan *chan); int cpdma_chan_destroy(struct cpdma_chan *chan); int cpdma_chan_start(struct cpdma_chan *chan); int cpdma_chan_stop(struct cpdma_chan *chan); @@ -94,6 +87,8 @@ int cpdma_chan_process(struct cpdma_chan *chan, int quota); int cpdma_ctlr_int_ctrl(struct cpdma_ctlr *ctlr, bool enable); void cpdma_ctlr_eoi(struct cpdma_ctlr *ctlr, u32 value); int cpdma_chan_int_ctrl(struct cpdma_chan *chan, bool enable); +u32 cpdma_ctrl_rxchs_state(struct cpdma_ctlr *ctlr); +u32 cpdma_ctrl_txchs_state(struct cpdma_ctlr *ctlr); bool cpdma_check_free_tx_desc(struct cpdma_chan *chan); enum cpdma_control { diff --git a/drivers/net/ethernet/ti/davinci_emac.c b/drivers/net/ethernet/ti/davinci_emac.c index 727a79f3c7dd..2fd94a5bc1f3 100644 --- a/drivers/net/ethernet/ti/davinci_emac.c +++ b/drivers/net/ethernet/ti/davinci_emac.c @@ -597,14 +597,14 @@ static u32 hash_get(u8 *addr) } /** - * hash_add - Hash function to add mac addr from hash table + * emac_hash_add - Hash function to add mac addr from hash table * @priv: The DaVinci EMAC private adapter structure * @mac_addr: mac address to delete from hash table * * Adds mac address to the internal hash table * */ -static int hash_add(struct emac_priv *priv, u8 *mac_addr) +static int emac_hash_add(struct emac_priv *priv, u8 *mac_addr) { struct device *emac_dev = &priv->ndev->dev; u32 rc = 0; @@ -613,7 +613,7 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr) if (hash_value >= EMAC_NUM_MULTICAST_BITS) { if (netif_msg_drv(priv)) { - dev_err(emac_dev, "DaVinci EMAC: hash_add(): Invalid "\ + dev_err(emac_dev, "DaVinci EMAC: emac_hash_add(): Invalid "\ "Hash %08x, should not be greater than %08x", hash_value, (EMAC_NUM_MULTICAST_BITS - 1)); } @@ -639,14 +639,14 @@ static int hash_add(struct emac_priv *priv, u8 *mac_addr) } /** - * hash_del - Hash function to delete mac addr from hash table + * emac_hash_del - Hash function to delete mac addr from hash table * @priv: The DaVinci EMAC private adapter structure * @mac_addr: mac address to delete from hash table * * Removes mac address from the internal hash table * */ -static int hash_del(struct emac_priv *priv, u8 *mac_addr) +static int emac_hash_del(struct emac_priv *priv, u8 *mac_addr) { u32 hash_value; u32 hash_bit; @@ -696,10 +696,10 @@ static void emac_add_mcast(struct emac_priv *priv, u32 action, u8 *mac_addr) switch (action) { case EMAC_MULTICAST_ADD: - update = hash_add(priv, mac_addr); + update = emac_hash_add(priv, mac_addr); break; case EMAC_MULTICAST_DEL: - update = hash_del(priv, mac_addr); + update = emac_hash_del(priv, mac_addr); break; case EMAC_ALL_MULTI_SET: update = 1; @@ -1870,10 +1870,10 @@ static int davinci_emac_probe(struct platform_device *pdev) goto no_pdata; } - priv->txchan = cpdma_chan_create(priv->dma, tx_chan_num(EMAC_DEF_TX_CH), - emac_tx_handler); - priv->rxchan = cpdma_chan_create(priv->dma, rx_chan_num(EMAC_DEF_RX_CH), - emac_rx_handler); + priv->txchan = cpdma_chan_create(priv->dma, EMAC_DEF_TX_CH, + emac_tx_handler, 0); + priv->rxchan = cpdma_chan_create(priv->dma, EMAC_DEF_RX_CH, + emac_rx_handler, 1); if (WARN_ON(!priv->txchan || !priv->rxchan)) { rc = -ENOMEM; goto no_cpdma_chan; diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c index 79f0ec4e51ac..bc258d7e41df 100644 --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c @@ -1791,7 +1791,7 @@ fail_alloc_rx: gelic_card_free_chain(card, card->tx_chain.head); fail_alloc_tx: free_irq(card->irq, card); - netdev->irq = NO_IRQ; + netdev->irq = 0; fail_request_irq: ps3_sb_event_receive_port_destroy(dev, card->irq); fail_alloc_irq: @@ -1843,7 +1843,7 @@ static int ps3_gelic_driver_remove(struct ps3_system_bus_device *dev) netdev0 = card->netdev[GELIC_PORT_ETHERNET_0]; /* disconnect event port */ free_irq(card->irq, card); - netdev0->irq = NO_IRQ; + netdev0->irq = 0; ps3_sb_event_receive_port_destroy(card->dev, card->irq); wait_event(card->waitq, diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c index f38696ceee74..908e72e18ef7 100644 --- a/drivers/net/ethernet/via/via-velocity.c +++ b/drivers/net/ethernet/via/via-velocity.c @@ -1724,24 +1724,21 @@ static void velocity_free_tx_buf(struct velocity_info *vptr, struct velocity_td_info *tdinfo, struct tx_desc *td) { struct sk_buff *skb = tdinfo->skb; + int i; /* * Don't unmap the pre-allocated tx_bufs */ - if (tdinfo->skb_dma) { - int i; + for (i = 0; i < tdinfo->nskb_dma; i++) { + size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); - for (i = 0; i < tdinfo->nskb_dma; i++) { - size_t pktlen = max_t(size_t, skb->len, ETH_ZLEN); + /* For scatter-gather */ + if (skb_shinfo(skb)->nr_frags > 0) + pktlen = max_t(size_t, pktlen, + td->td_buf[i].size & ~TD_QUEUE); - /* For scatter-gather */ - if (skb_shinfo(skb)->nr_frags > 0) - pktlen = max_t(size_t, pktlen, - td->td_buf[i].size & ~TD_QUEUE); - - dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], - le16_to_cpu(pktlen), DMA_TO_DEVICE); - } + dma_unmap_single(vptr->dev, tdinfo->skb_dma[i], + le16_to_cpu(pktlen), DMA_TO_DEVICE); } dev_kfree_skb_irq(skb); tdinfo->skb = NULL; diff --git a/drivers/net/ethernet/xilinx/Kconfig b/drivers/net/ethernet/xilinx/Kconfig index 4f5c024c6192..6d68c8a8f4f2 100644 --- a/drivers/net/ethernet/xilinx/Kconfig +++ b/drivers/net/ethernet/xilinx/Kconfig @@ -5,7 +5,7 @@ config NET_VENDOR_XILINX bool "Xilinx devices" default y - depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ + depends on PPC || PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS ---help--- If you have a network (Ethernet) card belonging to this class, say Y. @@ -18,7 +18,7 @@ if NET_VENDOR_XILINX config XILINX_EMACLITE tristate "Xilinx 10/100 Ethernet Lite support" - depends on (PPC32 || MICROBLAZE || ARCH_ZYNQ) + depends on PPC32 || MICROBLAZE || ARCH_ZYNQ || MIPS select PHYLIB ---help--- This driver supports the 10/100 Ethernet Lite from Xilinx. diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c index 36ee7ab300ae..69e2a833a84f 100644 --- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c +++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c @@ -1297,7 +1297,7 @@ static int axienet_ethtools_set_coalesce(struct net_device *ndev, return 0; } -static struct ethtool_ops axienet_ethtool_ops = { +static const struct ethtool_ops axienet_ethtool_ops = { .get_drvinfo = axienet_ethtools_get_drvinfo, .get_regs_len = axienet_ethtools_get_regs_len, .get_regs = axienet_ethtools_get_regs, diff --git a/drivers/net/fjes/fjes_main.c b/drivers/net/fjes/fjes_main.c index 9006877c53f2..e46b1ebbbff4 100644 --- a/drivers/net/fjes/fjes_main.c +++ b/drivers/net/fjes/fjes_main.c @@ -97,7 +97,6 @@ static struct acpi_driver fjes_acpi_driver = { static struct platform_driver fjes_driver = { .driver = { .name = DRV_NAME, - .owner = THIS_MODULE, }, .probe = fjes_probe, .remove = fjes_remove, diff --git a/drivers/net/hamradio/6pack.c b/drivers/net/hamradio/6pack.c index 5a1e98547031..470b3dcd54e5 100644 --- a/drivers/net/hamradio/6pack.c +++ b/drivers/net/hamradio/6pack.c @@ -127,7 +127,7 @@ struct sixpack { #define AX25_6PACK_HEADER_LEN 0 -static void sixpack_decode(struct sixpack *, unsigned char[], int); +static void sixpack_decode(struct sixpack *, const unsigned char[], int); static int encode_sixpack(unsigned char *, unsigned char *, int, unsigned char); /* @@ -428,7 +428,7 @@ out: /* * Handle the 'receiver data ready' interrupt. - * This function is called by the 'tty_io' module in the kernel when + * This function is called by the tty module in the kernel when * a block of 6pack data has been received, which can now be decapsulated * and sent on to some IP layer for further processing. */ @@ -436,7 +436,6 @@ static void sixpack_receive_buf(struct tty_struct *tty, const unsigned char *cp, char *fp, int count) { struct sixpack *sp; - unsigned char buf[512]; int count1; if (!count) @@ -446,10 +445,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, if (!sp) return; - memcpy(buf, cp, count < sizeof(buf) ? count : sizeof(buf)); - /* Read the characters out of the buffer */ - count1 = count; while (count) { count--; @@ -459,7 +455,7 @@ static void sixpack_receive_buf(struct tty_struct *tty, continue; } } - sixpack_decode(sp, buf, count1); + sixpack_decode(sp, cp, count1); sp_put(sp); tty_unthrottle(tty); @@ -992,7 +988,7 @@ static void decode_std_command(struct sixpack *sp, unsigned char cmd) /* decode a 6pack packet */ static void -sixpack_decode(struct sixpack *sp, unsigned char *pre_rbuff, int count) +sixpack_decode(struct sixpack *sp, const unsigned char *pre_rbuff, int count) { unsigned char inbyte; int count1; diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c index d95a50ae996d..622ab3ab9e93 100644 --- a/drivers/net/hamradio/bpqether.c +++ b/drivers/net/hamradio/bpqether.c @@ -484,7 +484,7 @@ static void bpq_setup(struct net_device *dev) dev->flags = 0; dev->features = NETIF_F_LLTX; /* Allow recursion */ -#if defined(CONFIG_AX25) || defined(CONFIG_AX25_MODULE) +#if IS_ENABLED(CONFIG_AX25) dev->header_ops = &ax25_header_ops; #endif diff --git a/drivers/net/hyperv/hyperv_net.h b/drivers/net/hyperv/hyperv_net.h index 591af71eae56..f4fbcb5aa24a 100644 --- a/drivers/net/hyperv/hyperv_net.h +++ b/drivers/net/hyperv/hyperv_net.h @@ -84,8 +84,6 @@ struct ndis_recv_scale_cap { /* NDIS_RECEIVE_SCALE_CAPABILITIES */ #define NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 40 #define ITAB_NUM 128 -#define HASH_KEYLEN NDIS_RSS_HASH_SECRET_KEY_MAX_SIZE_REVISION_2 -extern u8 netvsc_hash_key[]; struct ndis_recv_scale_param { /* NDIS_RECEIVE_SCALE_PARAMETERS */ struct ndis_obj_header hdr; @@ -175,7 +173,7 @@ struct rndis_device { struct rndis_message; struct netvsc_device; int netvsc_device_add(struct hv_device *device, void *additional_info); -int netvsc_device_remove(struct hv_device *device); +void netvsc_device_remove(struct hv_device *device); int netvsc_send(struct hv_device *device, struct hv_netvsc_packet *packet, struct rndis_message *rndis_msg, @@ -435,7 +433,7 @@ struct nvsp_1_message_revoke_send_buffer { */ struct nvsp_1_message_send_rndis_packet { /* - * This field is specified by RNIDS. They assume there's two different + * This field is specified by RNDIS. They assume there's two different * channels of communication. However, the Network VSP only has one. * Therefore, the channel travels with the RNDIS packet. */ @@ -490,6 +488,7 @@ struct nvsp_2_vsc_capability { u64 sriov:1; u64 ieee8021q:1; u64 correlation_id:1; + u64 teaming:1; }; }; } __packed; @@ -579,7 +578,7 @@ struct nvsp_5_send_indirect_table { /* The number of entries in the send indirection table */ u32 count; - /* The offset of the send indireciton table from top of this struct. + /* The offset of the send indirection table from top of this struct. * The send indirection table tells which channel to put the send * traffic on. Each entry is a channel number. */ @@ -633,12 +632,36 @@ struct multi_send_data { u32 count; /* counter of batched packets */ }; +struct recv_comp_data { + u64 tid; /* transaction id */ + u32 status; +}; + +/* Netvsc Receive Slots Max */ +#define NETVSC_RECVSLOT_MAX (NETVSC_RECEIVE_BUFFER_SIZE / ETH_DATA_LEN + 1) + +struct multi_recv_comp { + void *buf; /* queued receive completions */ + u32 first; /* first data entry */ + u32 next; /* next entry for writing */ +}; + struct netvsc_stats { u64 packets; u64 bytes; + u64 broadcast; + u64 multicast; struct u64_stats_sync syncp; }; +struct netvsc_ethtool_stats { + unsigned long tx_scattered; + unsigned long tx_no_memory; + unsigned long tx_no_space; + unsigned long tx_too_big; + unsigned long tx_busy; +}; + struct netvsc_reconfig { struct list_head list; u32 event; @@ -668,14 +691,14 @@ struct net_device_context { /* Ethtool settings */ u8 duplex; u32 speed; + struct netvsc_ethtool_stats eth_stats; /* the device is going away */ bool start_remove; /* State to manage the associated VF interface. */ - struct net_device *vf_netdev; - bool vf_inject; - atomic_t vf_use_cnt; + struct net_device __rcu *vf_netdev; + /* 1: allocated, serial number is valid. 0: not allocated */ u32 vf_alloc; /* Serial number of the VF to team with */ @@ -711,7 +734,6 @@ struct netvsc_device { struct nvsp_message channel_init_pkt; struct nvsp_message revoke_packet; - /* unsigned char HwMacAddr[HW_MACADDR_LEN]; */ struct vmbus_channel *chn_table[VRSS_CHANNEL_MAX]; u32 send_table[VRSS_SEND_TAB_SIZE]; @@ -735,6 +757,9 @@ struct netvsc_device { u32 max_pkt; /* max number of pkt in one send, e.g. 8 */ u32 pkt_align; /* alignment bytes, e.g. 8 */ + struct multi_recv_comp mrc[VRSS_CHANNEL_MAX]; + atomic_t num_outstanding_recvs; + atomic_t open_cnt; }; @@ -1213,7 +1238,7 @@ struct rndis_message { u32 ndis_msg_type; /* Total length of this message, from the beginning */ - /* of the sruct rndis_message, in bytes. */ + /* of the struct rndis_message, in bytes. */ u32 msg_len; /* Actual message */ diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c index 410fb8e81376..720b5fa9e625 100644 --- a/drivers/net/hyperv/netvsc.c +++ b/drivers/net/hyperv/netvsc.c @@ -59,7 +59,6 @@ void netvsc_switch_datapath(struct net_device *ndev, bool vf) VM_PKT_DATA_INBAND, 0); } - static struct netvsc_device *alloc_net_device(void) { struct netvsc_device *net_device; @@ -74,17 +73,26 @@ static struct netvsc_device *alloc_net_device(void) return NULL; } + net_device->mrc[0].buf = vzalloc(NETVSC_RECVSLOT_MAX * + sizeof(struct recv_comp_data)); + init_waitqueue_head(&net_device->wait_drain); net_device->destroy = false; atomic_set(&net_device->open_cnt, 0); net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT; net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT; + init_completion(&net_device->channel_init_wait); return net_device; } static void free_netvsc_device(struct netvsc_device *nvdev) { + int i; + + for (i = 0; i < VRSS_CHANNEL_MAX; i++) + vfree(nvdev->mrc[i].buf); + kfree(nvdev->cb_buffer); kfree(nvdev); } @@ -107,20 +115,20 @@ static struct netvsc_device *get_inbound_net_device(struct hv_device *device) goto get_in_err; if (net_device->destroy && - atomic_read(&net_device->num_outstanding_sends) == 0) + atomic_read(&net_device->num_outstanding_sends) == 0 && + atomic_read(&net_device->num_outstanding_recvs) == 0) net_device = NULL; get_in_err: return net_device; } - -static int netvsc_destroy_buf(struct hv_device *device) +static void netvsc_destroy_buf(struct hv_device *device) { struct nvsp_message *revoke_packet; - int ret = 0; struct net_device *ndev = hv_get_drvdata(device); struct netvsc_device *net_device = net_device_to_netvsc_device(ndev); + int ret; /* * If we got a section count, it means we received a @@ -150,7 +158,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to send " "revoke receive buffer to netvsp\n"); - return ret; + return; } } @@ -165,7 +173,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to teardown receive buffer's gpadl\n"); - return ret; + return; } net_device->recv_buf_gpadl_handle = 0; } @@ -209,7 +217,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to send " "revoke send buffer to netvsp\n"); - return ret; + return; } } /* Teardown the gpadl on the vsp end */ @@ -223,7 +231,7 @@ static int netvsc_destroy_buf(struct hv_device *device) if (ret != 0) { netdev_err(ndev, "unable to teardown send buffer's gpadl\n"); - return ret; + return; } net_device->send_buf_gpadl_handle = 0; } @@ -233,8 +241,6 @@ static int netvsc_destroy_buf(struct hv_device *device) net_device->send_buf = NULL; } kfree(net_device->send_section_map); - - return ret; } static int netvsc_init_buf(struct hv_device *device) @@ -276,7 +282,6 @@ static int netvsc_init_buf(struct hv_device *device) goto cleanup; } - /* Notify the NetVsp of the gpadl handle */ init_packet = &net_device->channel_init_pkt; @@ -403,7 +408,7 @@ static int netvsc_init_buf(struct hv_device *device) /* Section count is simply the size divided by the section size. */ net_device->send_section_cnt = - net_device->send_buf_size/net_device->send_section_size; + net_device->send_buf_size / net_device->send_section_size; dev_info(&device->device, "Send section size: %d, Section count:%d\n", net_device->send_section_size, net_device->send_section_cnt); @@ -412,8 +417,8 @@ static int netvsc_init_buf(struct hv_device *device) net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt, BITS_PER_LONG); - net_device->send_section_map = - kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL); + net_device->send_section_map = kcalloc(net_device->map_words, + sizeof(ulong), GFP_KERNEL); if (net_device->send_section_map == NULL) { ret = -ENOMEM; goto cleanup; @@ -428,7 +433,6 @@ exit: return ret; } - /* Negotiate NVSP protocol version */ static int negotiate_nvsp_ver(struct hv_device *device, struct netvsc_device *net_device, @@ -468,9 +472,13 @@ static int negotiate_nvsp_ver(struct hv_device *device, init_packet->msg.v2_msg.send_ndis_config.mtu = ndev->mtu + ETH_HLEN; init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1; - if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) + if (nvsp_ver >= NVSP_PROTOCOL_VERSION_5) { init_packet->msg.v2_msg.send_ndis_config.capability.sriov = 1; + /* Teaming bit is needed to receive link speed updates */ + init_packet->msg.v2_msg.send_ndis_config.capability.teaming = 1; + } + ret = vmbus_sendpacket(device->channel, init_packet, sizeof(struct nvsp_message), (unsigned long)init_packet, @@ -485,9 +493,10 @@ static int netvsc_connect_vsp(struct hv_device *device) struct netvsc_device *net_device; struct nvsp_message *init_packet; int ndis_version; - u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, + const u32 ver_list[] = { + NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2, NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 }; - int i, num_ver = 4; /* number of different NVSP versions */ + int i; net_device = get_outbound_net_device(device); if (!net_device) @@ -496,7 +505,7 @@ static int netvsc_connect_vsp(struct hv_device *device) init_packet = &net_device->channel_init_pkt; /* Negotiate the latest NVSP protocol supported */ - for (i = num_ver - 1; i >= 0; i--) + for (i = ARRAY_SIZE(ver_list) - 1; i >= 0; i--) if (negotiate_nvsp_ver(device, net_device, init_packet, ver_list[i]) == 0) { net_device->nvsp_version = ver_list[i]; @@ -555,7 +564,7 @@ static void netvsc_disconnect_vsp(struct hv_device *device) /* * netvsc_device_remove - Callback when the root bus device is removed */ -int netvsc_device_remove(struct hv_device *device) +void netvsc_device_remove(struct hv_device *device) { struct net_device *ndev = hv_get_drvdata(device); struct net_device_context *net_device_ctx = netdev_priv(ndev); @@ -577,10 +586,8 @@ int netvsc_device_remove(struct hv_device *device) /* Release all resources */ vfree(net_device->sub_cb_buf); free_netvsc_device(net_device); - return 0; } - #define RING_AVAIL_PERCENT_HIWATER 20 #define RING_AVAIL_PERCENT_LOWATER 10 @@ -604,72 +611,79 @@ static inline void netvsc_free_send_slot(struct netvsc_device *net_device, sync_change_bit(index, net_device->send_section_map); } +static void netvsc_send_tx_complete(struct netvsc_device *net_device, + struct vmbus_channel *incoming_channel, + struct hv_device *device, + struct vmpacket_descriptor *packet) +{ + struct sk_buff *skb = (struct sk_buff *)(unsigned long)packet->trans_id; + struct net_device *ndev = hv_get_drvdata(device); + struct net_device_context *net_device_ctx = netdev_priv(ndev); + struct vmbus_channel *channel = device->channel; + int num_outstanding_sends; + u16 q_idx = 0; + int queue_sends; + + /* Notify the layer above us */ + if (likely(skb)) { + struct hv_netvsc_packet *nvsc_packet + = (struct hv_netvsc_packet *)skb->cb; + u32 send_index = nvsc_packet->send_buf_index; + + if (send_index != NETVSC_INVALID_INDEX) + netvsc_free_send_slot(net_device, send_index); + q_idx = nvsc_packet->q_idx; + channel = incoming_channel; + + dev_consume_skb_any(skb); + } + + num_outstanding_sends = + atomic_dec_return(&net_device->num_outstanding_sends); + queue_sends = atomic_dec_return(&net_device->queue_sends[q_idx]); + + if (net_device->destroy && num_outstanding_sends == 0) + wake_up(&net_device->wait_drain); + + if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && + !net_device_ctx->start_remove && + (hv_ringbuf_avail_percent(&channel->outbound) > RING_AVAIL_PERCENT_HIWATER || + queue_sends < 1)) + netif_tx_wake_queue(netdev_get_tx_queue(ndev, q_idx)); +} + static void netvsc_send_completion(struct netvsc_device *net_device, struct vmbus_channel *incoming_channel, struct hv_device *device, struct vmpacket_descriptor *packet) { struct nvsp_message *nvsp_packet; - struct hv_netvsc_packet *nvsc_packet; struct net_device *ndev = hv_get_drvdata(device); - struct net_device_context *net_device_ctx = netdev_priv(ndev); - u32 send_index; - struct sk_buff *skb; nvsp_packet = (struct nvsp_message *)((unsigned long)packet + - (packet->offset8 << 3)); + (packet->offset8 << 3)); - if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) || - (nvsp_packet->hdr.msg_type == - NVSP_MSG5_TYPE_SUBCHANNEL)) { + switch (nvsp_packet->hdr.msg_type) { + case NVSP_MSG_TYPE_INIT_COMPLETE: + case NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE: + case NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE: + case NVSP_MSG5_TYPE_SUBCHANNEL: /* Copy the response back */ memcpy(&net_device->channel_init_pkt, nvsp_packet, sizeof(struct nvsp_message)); complete(&net_device->channel_init_wait); - } else if (nvsp_packet->hdr.msg_type == - NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) { - int num_outstanding_sends; - u16 q_idx = 0; - struct vmbus_channel *channel = device->channel; - int queue_sends; + break; - /* Get the send context */ - skb = (struct sk_buff *)(unsigned long)packet->trans_id; + case NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE: + netvsc_send_tx_complete(net_device, incoming_channel, + device, packet); + break; - /* Notify the layer above us */ - if (skb) { - nvsc_packet = (struct hv_netvsc_packet *) skb->cb; - send_index = nvsc_packet->send_buf_index; - if (send_index != NETVSC_INVALID_INDEX) - netvsc_free_send_slot(net_device, send_index); - q_idx = nvsc_packet->q_idx; - channel = incoming_channel; - dev_kfree_skb_any(skb); - } - - num_outstanding_sends = - atomic_dec_return(&net_device->num_outstanding_sends); - queue_sends = atomic_dec_return(&net_device-> - queue_sends[q_idx]); - - if (net_device->destroy && num_outstanding_sends == 0) - wake_up(&net_device->wait_drain); - - if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) && - !net_device_ctx->start_remove && - (hv_ringbuf_avail_percent(&channel->outbound) > - RING_AVAIL_PERCENT_HIWATER || queue_sends < 1)) - netif_tx_wake_queue(netdev_get_tx_queue( - ndev, q_idx)); - } else { - netdev_err(ndev, "Unknown send completion packet type- " - "%d received!!\n", nvsp_packet->hdr.msg_type); + default: + netdev_err(ndev, + "Unknown send completion type %d received!!\n", + nvsp_packet->hdr.msg_type); } - } static u32 netvsc_get_next_send_section(struct netvsc_device *net_device) @@ -859,7 +873,7 @@ int netvsc_send(struct hv_device *device, struct sk_buff *skb) { struct netvsc_device *net_device; - int ret = 0, m_ret = 0; + int ret = 0; struct vmbus_channel *out_channel; u16 q_idx = packet->q_idx; u32 pktlen = packet->total_data_buflen, msd_len = 0; @@ -930,7 +944,7 @@ int netvsc_send(struct hv_device *device, } if (msdp->skb) - dev_kfree_skb_any(msdp->skb); + dev_consume_skb_any(msdp->skb); if (xmit_more && !packet->cp_partial) { msdp->skb = skb; @@ -948,8 +962,8 @@ int netvsc_send(struct hv_device *device, } if (msd_send) { - m_ret = netvsc_send_pkt(device, msd_send, net_device, - NULL, msd_skb); + int m_ret = netvsc_send_pkt(device, msd_send, net_device, + NULL, msd_skb); if (m_ret != 0) { netvsc_free_send_slot(net_device, @@ -968,49 +982,121 @@ send_now: return ret; } -static void netvsc_send_recv_completion(struct hv_device *device, - struct vmbus_channel *channel, - struct netvsc_device *net_device, - u64 transaction_id, u32 status) +static int netvsc_send_recv_completion(struct vmbus_channel *channel, + u64 transaction_id, u32 status) { struct nvsp_message recvcompMessage; - int retries = 0; int ret; - struct net_device *ndev = hv_get_drvdata(device); recvcompMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE; recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status; -retry_send_cmplt: /* Send the completion */ ret = vmbus_sendpacket(channel, &recvcompMessage, - sizeof(struct nvsp_message), transaction_id, - VM_PKT_COMP, 0); - if (ret == 0) { - /* success */ - /* no-op */ - } else if (ret == -EAGAIN) { - /* no more room...wait a bit and attempt to retry 3 times */ - retries++; - netdev_err(ndev, "unable to send receive completion pkt" - " (tid %llx)...retrying %d\n", transaction_id, retries); + sizeof(struct nvsp_message_header) + sizeof(u32), + transaction_id, VM_PKT_COMP, 0); - if (retries < 4) { - udelay(100); - goto retry_send_cmplt; - } else { - netdev_err(ndev, "unable to send receive " - "completion pkt (tid %llx)...give up retrying\n", - transaction_id); - } - } else { - netdev_err(ndev, "unable to send receive " - "completion pkt - %llx\n", transaction_id); + return ret; +} + +static inline void count_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx, + u32 *filled, u32 *avail) +{ + u32 first = nvdev->mrc[q_idx].first; + u32 next = nvdev->mrc[q_idx].next; + + *filled = (first > next) ? NETVSC_RECVSLOT_MAX - first + next : + next - first; + + *avail = NETVSC_RECVSLOT_MAX - *filled - 1; +} + +/* Read the first filled slot, no change to index */ +static inline struct recv_comp_data *read_recv_comp_slot(struct netvsc_device + *nvdev, u16 q_idx) +{ + u32 filled, avail; + + if (!nvdev->mrc[q_idx].buf) + return NULL; + + count_recv_comp_slot(nvdev, q_idx, &filled, &avail); + if (!filled) + return NULL; + + return nvdev->mrc[q_idx].buf + nvdev->mrc[q_idx].first * + sizeof(struct recv_comp_data); +} + +/* Put the first filled slot back to available pool */ +static inline void put_recv_comp_slot(struct netvsc_device *nvdev, u16 q_idx) +{ + int num_recv; + + nvdev->mrc[q_idx].first = (nvdev->mrc[q_idx].first + 1) % + NETVSC_RECVSLOT_MAX; + + num_recv = atomic_dec_return(&nvdev->num_outstanding_recvs); + + if (nvdev->destroy && num_recv == 0) + wake_up(&nvdev->wait_drain); +} + +/* Check and send pending recv completions */ +static void netvsc_chk_recv_comp(struct netvsc_device *nvdev, + struct vmbus_channel *channel, u16 q_idx) +{ + struct recv_comp_data *rcd; + int ret; + + while (true) { + rcd = read_recv_comp_slot(nvdev, q_idx); + if (!rcd) + break; + + ret = netvsc_send_recv_completion(channel, rcd->tid, + rcd->status); + if (ret) + break; + + put_recv_comp_slot(nvdev, q_idx); } } +#define NETVSC_RCD_WATERMARK 80 + +/* Get next available slot */ +static inline struct recv_comp_data *get_recv_comp_slot( + struct netvsc_device *nvdev, struct vmbus_channel *channel, u16 q_idx) +{ + u32 filled, avail, next; + struct recv_comp_data *rcd; + + if (!nvdev->recv_section) + return NULL; + + if (!nvdev->mrc[q_idx].buf) + return NULL; + + if (atomic_read(&nvdev->num_outstanding_recvs) > + nvdev->recv_section->num_sub_allocs * NETVSC_RCD_WATERMARK / 100) + netvsc_chk_recv_comp(nvdev, channel, q_idx); + + count_recv_comp_slot(nvdev, q_idx, &filled, &avail); + if (!avail) + return NULL; + + next = nvdev->mrc[q_idx].next; + rcd = nvdev->mrc[q_idx].buf + next * sizeof(struct recv_comp_data); + nvdev->mrc[q_idx].next = (next + 1) % NETVSC_RECVSLOT_MAX; + + atomic_inc(&nvdev->num_outstanding_recvs); + + return rcd; +} + static void netvsc_receive(struct netvsc_device *net_device, struct vmbus_channel *channel, struct hv_device *device, @@ -1025,6 +1111,9 @@ static void netvsc_receive(struct netvsc_device *net_device, int count = 0; struct net_device *ndev = hv_get_drvdata(device); void *data; + int ret; + struct recv_comp_data *rcd; + u16 q_idx = channel->offermsg.offer.sub_channel_index; /* * All inbound packets other than send completion should be xfer page @@ -1069,13 +1158,29 @@ static void netvsc_receive(struct netvsc_device *net_device, /* Pass it to the upper layer */ status = rndis_filter_receive(device, netvsc_packet, &data, channel); - } - netvsc_send_recv_completion(device, channel, net_device, - vmxferpage_packet->d.trans_id, status); -} + if (!net_device->mrc[q_idx].buf) { + ret = netvsc_send_recv_completion(channel, + vmxferpage_packet->d.trans_id, + status); + if (ret) + netdev_err(ndev, "Recv_comp q:%hd, tid:%llx, err:%d\n", + q_idx, vmxferpage_packet->d.trans_id, ret); + return; + } + rcd = get_recv_comp_slot(net_device, channel, q_idx); + + if (!rcd) { + netdev_err(ndev, "Recv_comp full buf q:%hd, tid:%llx\n", + q_idx, vmxferpage_packet->d.trans_id); + return; + } + + rcd->tid = vmxferpage_packet->d.trans_id; + rcd->status = status; +} static void netvsc_send_table(struct hv_device *hdev, struct nvsp_message *nvmsg) @@ -1157,11 +1262,11 @@ static void netvsc_process_raw_pkt(struct hv_device *device, } } - void netvsc_channel_cb(void *context) { int ret; struct vmbus_channel *channel = (struct vmbus_channel *)context; + u16 q_idx = channel->offermsg.offer.sub_channel_index; struct hv_device *device; struct netvsc_device *net_device; u32 bytes_recvd; @@ -1213,8 +1318,6 @@ void netvsc_channel_cb(void *context) ndev, request_id, desc); - - } else { /* * We are done for this pass. @@ -1241,7 +1344,8 @@ void netvsc_channel_cb(void *context) if (bufferlen > NETVSC_PACKET_SIZE) kfree(buffer); - return; + + netvsc_chk_recv_comp(net_device, channel, q_idx); } /* @@ -1263,9 +1367,6 @@ int netvsc_device_add(struct hv_device *device, void *additional_info) net_device->ring_size = ring_size; - /* Initialize the NetVSC channel extension */ - init_completion(&net_device->channel_init_wait); - set_per_channel_state(device->channel, net_device->cb_buffer); /* Open the channel */ diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c index 3ba29fc80d05..52eeb2f67276 100644 --- a/drivers/net/hyperv/netvsc_drv.c +++ b/drivers/net/hyperv/netvsc_drv.c @@ -40,7 +40,6 @@ #include "hyperv_net.h" - #define RING_SIZE_MIN 64 #define LINKCHANGE_INT (2 * HZ) #define NETVSC_HW_FEATURES (NETIF_F_RXCSUM | \ @@ -358,18 +357,14 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) struct rndis_message *rndis_msg; struct rndis_packet *rndis_pkt; u32 rndis_msg_size; - bool isvlan; - bool linear = false; struct rndis_per_packet_info *ppi; struct ndis_tcp_ip_checksum_info *csum_info; - struct ndis_tcp_lso_info *lso_info; int hdr_offset; u32 net_trans_info; u32 hash; u32 skb_length; struct hv_page_buffer page_buf[MAX_PAGE_BUFFER_COUNT]; struct hv_page_buffer *pb = page_buf; - struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); /* We will atmost need two pages to describe the rndis * header. We can only transmit MAX_PAGE_BUFFER_COUNT number @@ -377,22 +372,20 @@ static int netvsc_start_xmit(struct sk_buff *skb, struct net_device *net) * more pages we try linearizing it. */ -check_size: skb_length = skb->len; num_data_pgs = netvsc_get_slots(skb) + 2; - if (num_data_pgs > MAX_PAGE_BUFFER_COUNT && linear) { - net_alert_ratelimited("packet too big: %u pages (%u bytes)\n", - num_data_pgs, skb->len); - ret = -EFAULT; - goto drop; - } else if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { - if (skb_linearize(skb)) { - net_alert_ratelimited("failed to linearize skb\n"); - ret = -ENOMEM; + + if (unlikely(num_data_pgs > MAX_PAGE_BUFFER_COUNT)) { + ++net_device_ctx->eth_stats.tx_scattered; + + if (skb_linearize(skb)) + goto no_memory; + + num_data_pgs = netvsc_get_slots(skb) + 2; + if (num_data_pgs > MAX_PAGE_BUFFER_COUNT) { + ++net_device_ctx->eth_stats.tx_too_big; goto drop; } - linear = true; - goto check_size; } /* @@ -401,17 +394,14 @@ check_size: * structure. */ ret = skb_cow_head(skb, RNDIS_AND_PPI_SIZE); - if (ret) { - netdev_err(net, "unable to alloc hv_netvsc_packet\n"); - ret = -ENOMEM; - goto drop; - } + if (ret) + goto no_memory; + /* Use the skb control buffer for building up the packet */ BUILD_BUG_ON(sizeof(struct hv_netvsc_packet) > FIELD_SIZEOF(struct sk_buff, cb)); packet = (struct hv_netvsc_packet *)skb->cb; - packet->q_idx = skb_get_queue_mapping(skb); packet->total_data_buflen = skb->len; @@ -420,8 +410,6 @@ check_size: memset(rndis_msg, 0, RNDIS_AND_PPI_SIZE); - isvlan = skb->vlan_tci & VLAN_TAG_PRESENT; - /* Add the rndis header */ rndis_msg->ndis_msg_type = RNDIS_MSG_PACKET; rndis_msg->msg_len = packet->total_data_buflen; @@ -440,7 +428,7 @@ check_size: *(u32 *)((void *)ppi + ppi->ppi_offset) = hash; } - if (isvlan) { + if (skb_vlan_tag_present(skb)) { struct ndis_pkt_8021q_info *vlan; rndis_msg_size += NDIS_VLAN_PPI_SIZE; @@ -461,8 +449,37 @@ check_size: * Setup the sendside checksum offload only if this is not a * GSO packet. */ - if (skb_is_gso(skb)) - goto do_lso; + if (skb_is_gso(skb)) { + struct ndis_tcp_lso_info *lso_info; + + rndis_msg_size += NDIS_LSO_PPI_SIZE; + ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, + TCP_LARGESEND_PKTINFO); + + lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + + ppi->ppi_offset); + + lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; + if (net_trans_info & (INFO_IPV4 << 16)) { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; + ip_hdr(skb)->tot_len = 0; + ip_hdr(skb)->check = 0; + tcp_hdr(skb)->check = + ~csum_tcpudp_magic(ip_hdr(skb)->saddr, + ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } else { + lso_info->lso_v2_transmit.ip_version = + NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = + ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); + } + lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; + lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; + goto do_send; + } if ((skb->ip_summed == CHECKSUM_NONE) || (skb->ip_summed == CHECKSUM_UNNECESSARY)) @@ -495,7 +512,7 @@ check_size: ret = skb_cow_head(skb, 0); if (ret) - goto drop; + goto no_memory; uh = udp_hdr(skb); udp_len = ntohs(uh->len); @@ -509,35 +526,6 @@ check_size: csum_info->transmit.udp_checksum = 0; } - goto do_send; - -do_lso: - rndis_msg_size += NDIS_LSO_PPI_SIZE; - ppi = init_ppi_data(rndis_msg, NDIS_LSO_PPI_SIZE, - TCP_LARGESEND_PKTINFO); - - lso_info = (struct ndis_tcp_lso_info *)((void *)ppi + - ppi->ppi_offset); - - lso_info->lso_v2_transmit.type = NDIS_TCP_LARGE_SEND_OFFLOAD_V2_TYPE; - if (net_trans_info & (INFO_IPV4 << 16)) { - lso_info->lso_v2_transmit.ip_version = - NDIS_TCP_LARGE_SEND_OFFLOAD_IPV4; - ip_hdr(skb)->tot_len = 0; - ip_hdr(skb)->check = 0; - tcp_hdr(skb)->check = - ~csum_tcpudp_magic(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - } else { - lso_info->lso_v2_transmit.ip_version = - NDIS_TCP_LARGE_SEND_OFFLOAD_IPV6; - ipv6_hdr(skb)->payload_len = 0; - tcp_hdr(skb)->check = - ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0, IPPROTO_TCP, 0); - } - lso_info->lso_v2_transmit.tcp_header_offset = hdr_offset; - lso_info->lso_v2_transmit.mss = skb_shinfo(skb)->gso_size; do_send: /* Start filling in the page buffers with the rndis hdr */ @@ -550,21 +538,33 @@ do_send: skb_tx_timestamp(skb); ret = netvsc_send(net_device_ctx->device_ctx, packet, rndis_msg, &pb, skb); + if (likely(ret == 0)) { + struct netvsc_stats *tx_stats = this_cpu_ptr(net_device_ctx->tx_stats); -drop: - if (ret == 0) { u64_stats_update_begin(&tx_stats->syncp); tx_stats->packets++; tx_stats->bytes += skb_length; u64_stats_update_end(&tx_stats->syncp); - } else { - if (ret != -EAGAIN) { - dev_kfree_skb_any(skb); - net->stats.tx_dropped++; - } + return NETDEV_TX_OK; } - return (ret == -EAGAIN) ? NETDEV_TX_BUSY : NETDEV_TX_OK; + if (ret == -EAGAIN) { + ++net_device_ctx->eth_stats.tx_busy; + return NETDEV_TX_BUSY; + } + + if (ret == -ENOSPC) + ++net_device_ctx->eth_stats.tx_no_space; + +drop: + dev_kfree_skb_any(skb); + net->stats.tx_dropped++; + + return NETDEV_TX_OK; + +no_memory: + ++net_device_ctx->eth_stats.tx_no_memory; + goto drop; } /* @@ -579,19 +579,32 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, struct netvsc_reconfig *event; unsigned long flags; - /* Handle link change statuses only */ + net = hv_get_drvdata(device_obj); + + if (!net) + return; + + ndev_ctx = netdev_priv(net); + + /* Update the physical link speed when changing to another vSwitch */ + if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) { + u32 speed; + + speed = *(u32 *)((void *)indicate + indicate-> + status_buf_offset) / 10000; + ndev_ctx->speed = speed; + return; + } + + /* Handle these link change statuses below */ if (indicate->status != RNDIS_STATUS_NETWORK_CHANGE && indicate->status != RNDIS_STATUS_MEDIA_CONNECT && indicate->status != RNDIS_STATUS_MEDIA_DISCONNECT) return; - net = hv_get_drvdata(device_obj); - - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return; - ndev_ctx = netdev_priv(net); - event = kzalloc(sizeof(*event), GFP_ATOMIC); if (!event) return; @@ -604,7 +617,6 @@ void netvsc_linkstatus_callback(struct hv_device *device_obj, schedule_delayed_work(&ndev_ctx->dwork, 0); } - static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net, struct hv_netvsc_packet *packet, struct ndis_tcp_ip_checksum_info *csum_info, @@ -655,51 +667,23 @@ int netvsc_recv_callback(struct hv_device *device_obj, { struct net_device *net = hv_get_drvdata(device_obj); struct net_device_context *net_device_ctx = netdev_priv(net); + struct net_device *vf_netdev; struct sk_buff *skb; - struct sk_buff *vf_skb; struct netvsc_stats *rx_stats; - u32 bytes_recvd = packet->total_data_buflen; - int ret = 0; - if (!net || net->reg_state != NETREG_REGISTERED) + if (net->reg_state != NETREG_REGISTERED) return NVSP_STAT_FAIL; - if (READ_ONCE(net_device_ctx->vf_inject)) { - atomic_inc(&net_device_ctx->vf_use_cnt); - if (!READ_ONCE(net_device_ctx->vf_inject)) { - /* - * We raced; just move on. - */ - atomic_dec(&net_device_ctx->vf_use_cnt); - goto vf_injection_done; - } - - /* - * Inject this packet into the VF inerface. - * On Hyper-V, multicast and brodcast packets - * are only delivered on the synthetic interface - * (after subjecting these to policy filters on - * the host). Deliver these via the VF interface - * in the guest. - */ - vf_skb = netvsc_alloc_recv_skb(net_device_ctx->vf_netdev, - packet, csum_info, *data, - vlan_tci); - if (vf_skb != NULL) { - ++net_device_ctx->vf_netdev->stats.rx_packets; - net_device_ctx->vf_netdev->stats.rx_bytes += - bytes_recvd; - netif_receive_skb(vf_skb); - } else { - ++net->stats.rx_dropped; - ret = NVSP_STAT_FAIL; - } - atomic_dec(&net_device_ctx->vf_use_cnt); - return ret; - } - -vf_injection_done: - rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); + /* + * If necessary, inject this packet into the VF interface. + * On Hyper-V, multicast and brodcast packets are only delivered + * to the synthetic interface (after subjecting these to + * policy filters on the host). Deliver these via the VF + * interface in the guest. + */ + vf_netdev = rcu_dereference(net_device_ctx->vf_netdev); + if (vf_netdev && (vf_netdev->flags & IFF_UP)) + net = vf_netdev; /* Allocate a skb - TODO direct I/O to pages? */ skb = netvsc_alloc_recv_skb(net, packet, csum_info, *data, vlan_tci); @@ -707,12 +691,25 @@ vf_injection_done: ++net->stats.rx_dropped; return NVSP_STAT_FAIL; } - skb_record_rx_queue(skb, channel-> - offermsg.offer.sub_channel_index); + if (net != vf_netdev) + skb_record_rx_queue(skb, + channel->offermsg.offer.sub_channel_index); + + /* + * Even if injecting the packet, record the statistics + * on the synthetic device because modifying the VF device + * statistics will not work correctly. + */ + rx_stats = this_cpu_ptr(net_device_ctx->rx_stats); u64_stats_update_begin(&rx_stats->syncp); rx_stats->packets++; rx_stats->bytes += packet->total_data_buflen; + + if (skb->pkt_type == PACKET_BROADCAST) + ++rx_stats->broadcast; + else if (skb->pkt_type == PACKET_MULTICAST) + ++rx_stats->multicast; u64_stats_update_end(&rx_stats->syncp); /* @@ -728,8 +725,12 @@ vf_injection_done: static void netvsc_get_drvinfo(struct net_device *net, struct ethtool_drvinfo *info) { + struct net_device_context *net_device_ctx = netdev_priv(net); + struct hv_device *dev = net_device_ctx->device_ctx; + strlcpy(info->driver, KBUILD_MODNAME, sizeof(info->driver)); strlcpy(info->fw_version, "N/A", sizeof(info->fw_version)); + strlcpy(info->bus_info, vmbus_dev_name(dev), sizeof(info->bus_info)); } static void netvsc_get_channels(struct net_device *net, @@ -951,7 +952,7 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, cpu); struct netvsc_stats *rx_stats = per_cpu_ptr(ndev_ctx->rx_stats, cpu); - u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + u64 tx_packets, tx_bytes, rx_packets, rx_bytes, rx_multicast; unsigned int start; do { @@ -964,12 +965,14 @@ static struct rtnl_link_stats64 *netvsc_get_stats64(struct net_device *net, start = u64_stats_fetch_begin_irq(&rx_stats->syncp); rx_packets = rx_stats->packets; rx_bytes = rx_stats->bytes; + rx_multicast = rx_stats->multicast + rx_stats->broadcast; } while (u64_stats_fetch_retry_irq(&rx_stats->syncp, start)); t->tx_bytes += tx_bytes; t->tx_packets += tx_packets; t->rx_bytes += rx_bytes; t->rx_packets += rx_packets; + t->multicast += rx_multicast; } t->tx_dropped = net->stats.tx_dropped; @@ -1005,6 +1008,51 @@ static int netvsc_set_mac_addr(struct net_device *ndev, void *p) return err; } +static const struct { + char name[ETH_GSTRING_LEN]; + u16 offset; +} netvsc_stats[] = { + { "tx_scattered", offsetof(struct netvsc_ethtool_stats, tx_scattered) }, + { "tx_no_memory", offsetof(struct netvsc_ethtool_stats, tx_no_memory) }, + { "tx_no_space", offsetof(struct netvsc_ethtool_stats, tx_no_space) }, + { "tx_too_big", offsetof(struct netvsc_ethtool_stats, tx_too_big) }, + { "tx_busy", offsetof(struct netvsc_ethtool_stats, tx_busy) }, +}; + +static int netvsc_get_sset_count(struct net_device *dev, int string_set) +{ + switch (string_set) { + case ETH_SS_STATS: + return ARRAY_SIZE(netvsc_stats); + default: + return -EINVAL; + } +} + +static void netvsc_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, u64 *data) +{ + struct net_device_context *ndc = netdev_priv(dev); + const void *nds = &ndc->eth_stats; + int i; + + for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + data[i] = *(unsigned long *)(nds + netvsc_stats[i].offset); +} + +static void netvsc_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + int i; + + switch (stringset) { + case ETH_SS_STATS: + for (i = 0; i < ARRAY_SIZE(netvsc_stats); i++) + memcpy(data + i * ETH_GSTRING_LEN, + netvsc_stats[i].name, ETH_GSTRING_LEN); + break; + } +} + #ifdef CONFIG_NET_POLL_CONTROLLER static void netvsc_poll_controller(struct net_device *net) { @@ -1017,6 +1065,9 @@ static void netvsc_poll_controller(struct net_device *net) static const struct ethtool_ops ethtool_ops = { .get_drvinfo = netvsc_get_drvinfo, .get_link = ethtool_op_get_link, + .get_ethtool_stats = netvsc_get_ethtool_stats, + .get_sset_count = netvsc_get_sset_count, + .get_strings = netvsc_get_strings, .get_channels = netvsc_get_channels, .set_channels = netvsc_set_channels, .get_ts_info = ethtool_op_get_ts_info, @@ -1151,25 +1202,44 @@ static void netvsc_free_netdev(struct net_device *netdev) free_netdev(netdev); } -static struct net_device *get_netvsc_net_device(char *mac) +static struct net_device *get_netvsc_bymac(const u8 *mac) { - struct net_device *dev, *found = NULL; - int rtnl_locked; + struct net_device *dev; - rtnl_locked = rtnl_trylock(); + ASSERT_RTNL(); for_each_netdev(&init_net, dev) { - if (memcmp(dev->dev_addr, mac, ETH_ALEN) == 0) { - if (dev->netdev_ops != &device_ops) - continue; - found = dev; - break; - } - } - if (rtnl_locked) - rtnl_unlock(); + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ - return found; + if (ether_addr_equal(mac, dev->perm_addr)) + return dev; + } + + return NULL; +} + +static struct net_device *get_netvsc_byref(struct net_device *vf_netdev) +{ + struct net_device *dev; + + ASSERT_RTNL(); + + for_each_netdev(&init_net, dev) { + struct net_device_context *net_device_ctx; + + if (dev->netdev_ops != &device_ops) + continue; /* not a netvsc device */ + + net_device_ctx = netdev_priv(dev); + if (net_device_ctx->nvdev == NULL) + continue; /* device is removed */ + + if (rtnl_dereference(net_device_ctx->vf_netdev) == vf_netdev) + return dev; /* a match */ + } + + return NULL; } static int netvsc_register_vf(struct net_device *vf_netdev) @@ -1177,9 +1247,8 @@ static int netvsc_register_vf(struct net_device *vf_netdev) struct net_device *ndev; struct net_device_context *net_device_ctx; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - if (eth_ops == NULL || eth_ops == ðtool_ops) + if (vf_netdev->addr_len != ETH_ALEN) return NOTIFY_DONE; /* @@ -1187,13 +1256,13 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * associate with the VF interface. If we don't find a matching * synthetic interface, move on. */ - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_bymac(vf_netdev->perm_addr); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || net_device_ctx->vf_netdev) + if (!netvsc_dev || rtnl_dereference(net_device_ctx->vf_netdev)) return NOTIFY_DONE; netdev_info(ndev, "VF registering: %s\n", vf_netdev->name); @@ -1201,46 +1270,26 @@ static int netvsc_register_vf(struct net_device *vf_netdev) * Take a reference on the module. */ try_module_get(THIS_MODULE); - net_device_ctx->vf_netdev = vf_netdev; + + dev_hold(vf_netdev); + rcu_assign_pointer(net_device_ctx->vf_netdev, vf_netdev); return NOTIFY_OK; } -static void netvsc_inject_enable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = true; -} - -static void netvsc_inject_disable(struct net_device_context *net_device_ctx) -{ - net_device_ctx->vf_inject = false; - - /* Wait for currently active users to drain out. */ - while (atomic_read(&net_device_ctx->vf_use_cnt) != 0) - udelay(50); -} - static int netvsc_vf_up(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF up: %s\n", vf_netdev->name); - netvsc_inject_enable(net_device_ctx); /* * Open the device before switching data path. @@ -1261,29 +1310,20 @@ static int netvsc_vf_up(struct net_device *vf_netdev) return NOTIFY_OK; } - static int netvsc_vf_down(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; struct net_device_context *net_device_ctx; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; - netdev_info(ndev, "VF down: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); netvsc_switch_datapath(ndev, false); netdev_info(ndev, "Data path switched from VF: %s\n", vf_netdev->name); rndis_filter_close(netvsc_dev); @@ -1295,28 +1335,23 @@ static int netvsc_vf_down(struct net_device *vf_netdev) return NOTIFY_OK; } - static int netvsc_unregister_vf(struct net_device *vf_netdev) { struct net_device *ndev; struct netvsc_device *netvsc_dev; - const struct ethtool_ops *eth_ops = vf_netdev->ethtool_ops; struct net_device_context *net_device_ctx; - if (eth_ops == ðtool_ops) - return NOTIFY_DONE; - - ndev = get_netvsc_net_device(vf_netdev->dev_addr); + ndev = get_netvsc_byref(vf_netdev); if (!ndev) return NOTIFY_DONE; net_device_ctx = netdev_priv(ndev); netvsc_dev = net_device_ctx->nvdev; - if (!netvsc_dev || !net_device_ctx->vf_netdev) - return NOTIFY_DONE; + netdev_info(ndev, "VF unregistering: %s\n", vf_netdev->name); - netvsc_inject_disable(net_device_ctx); - net_device_ctx->vf_netdev = NULL; + + RCU_INIT_POINTER(net_device_ctx->vf_netdev, NULL); + dev_put(vf_netdev); module_put(THIS_MODULE); return NOTIFY_OK; } @@ -1337,6 +1372,8 @@ static int netvsc_probe(struct hv_device *dev, netif_carrier_off(net); + netvsc_init_settings(net); + net_device_ctx = netdev_priv(net); net_device_ctx->device_ctx = dev; net_device_ctx->msg_enable = netif_msg_init(debug, default_msg); @@ -1366,10 +1403,6 @@ static int netvsc_probe(struct hv_device *dev, spin_lock_init(&net_device_ctx->lock); INIT_LIST_HEAD(&net_device_ctx->reconfig_events); - atomic_set(&net_device_ctx->vf_use_cnt, 0); - net_device_ctx->vf_netdev = NULL; - net_device_ctx->vf_inject = false; - net->netdev_ops = &device_ops; net->hw_features = NETVSC_HW_FEATURES; @@ -1398,8 +1431,6 @@ static int netvsc_probe(struct hv_device *dev, netif_set_real_num_tx_queues(net, nvdev->num_chn); netif_set_real_num_rx_queues(net, nvdev->num_chn); - netvsc_init_settings(net); - ret = register_netdev(net); if (ret != 0) { pr_err("Unable to register netdev.\n"); @@ -1423,7 +1454,6 @@ static int netvsc_remove(struct hv_device *dev) return 0; } - ndev_ctx = netdev_priv(net); net_device = ndev_ctx->nvdev; @@ -1470,7 +1500,6 @@ static struct hv_driver netvsc_drv = { .remove = netvsc_remove, }; - /* * On Hyper-V, every VF interface is matched with a corresponding * synthetic interface. The synthetic interface is presented first @@ -1482,13 +1511,21 @@ static int netvsc_netdev_event(struct notifier_block *this, { struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); + /* Skip our own events */ + if (event_dev->netdev_ops == &device_ops) + return NOTIFY_DONE; + + /* Avoid non-Ethernet type devices */ + if (event_dev->type != ARPHRD_ETHER) + return NOTIFY_DONE; + /* Avoid Vlan dev with same MAC registering as VF */ if (event_dev->priv_flags & IFF_802_1Q_VLAN) return NOTIFY_DONE; /* Avoid Bonding master dev with same MAC registering as VF */ - if (event_dev->priv_flags & IFF_BONDING && - event_dev->flags & IFF_MASTER) + if ((event_dev->priv_flags & IFF_BONDING) && + (event_dev->flags & IFF_MASTER)) return NOTIFY_DONE; switch (event) { diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c index 8e830f741d47..9195d5da8485 100644 --- a/drivers/net/hyperv/rndis_filter.c +++ b/drivers/net/hyperv/rndis_filter.c @@ -663,13 +663,14 @@ cleanup: return ret; } -u8 netvsc_hash_key[HASH_KEYLEN] = { +static const u8 netvsc_hash_key[] = { 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa }; +#define HASH_KEYLEN ARRAY_SIZE(netvsc_hash_key) static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) { @@ -720,7 +721,6 @@ static int rndis_filter_set_rss_param(struct rndis_device *rdev, int num_queue) for (i = 0; i < HASH_KEYLEN; i++) keyp[i] = netvsc_hash_key[i]; - ret = rndis_filter_send_request(rdev, request); if (ret != 0) goto cleanup; @@ -738,7 +738,6 @@ cleanup: return ret; } - static int rndis_filter_query_device_link_status(struct rndis_device *dev) { u32 size = sizeof(u32); @@ -752,6 +751,28 @@ static int rndis_filter_query_device_link_status(struct rndis_device *dev) return ret; } +static int rndis_filter_query_link_speed(struct rndis_device *dev) +{ + u32 size = sizeof(u32); + u32 link_speed; + struct net_device_context *ndc; + int ret; + + ret = rndis_filter_query_device(dev, RNDIS_OID_GEN_LINK_SPEED, + &link_speed, &size); + + if (!ret) { + ndc = netdev_priv(dev->ndev); + + /* The link speed reported from host is in 100bps unit, so + * we convert it to Mbps here. + */ + ndc->speed = link_speed / 10000; + } + + return ret; +} + int rndis_filter_set_packet_filter(struct rndis_device *dev, u32 new_filter) { struct rndis_request *request; @@ -792,7 +813,6 @@ cleanup: return ret; } - static int rndis_filter_init_device(struct rndis_device *dev) { struct rndis_request *request; @@ -875,11 +895,11 @@ cleanup: /* Wait for all send completions */ wait_event(nvdev->wait_drain, - atomic_read(&nvdev->num_outstanding_sends) == 0); + atomic_read(&nvdev->num_outstanding_sends) == 0 && + atomic_read(&nvdev->num_outstanding_recvs) == 0); if (request) put_rndis_request(dev, request); - return; } static int rndis_filter_open_device(struct rndis_device *dev) @@ -931,6 +951,9 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) set_per_channel_state(new_sc, nvscdev->sub_cb_buf + (chn_index - 1) * NETVSC_PACKET_SIZE); + nvscdev->mrc[chn_index].buf = vzalloc(NETVSC_RECVSLOT_MAX * + sizeof(struct recv_comp_data)); + ret = vmbus_open(new_sc, nvscdev->ring_size * PAGE_SIZE, nvscdev->ring_size * PAGE_SIZE, NULL, 0, netvsc_channel_cb, new_sc); @@ -946,7 +969,7 @@ static void netvsc_sc_open(struct vmbus_channel *new_sc) } int rndis_filter_device_add(struct hv_device *dev, - void *additional_info) + void *additional_info) { int ret; struct net_device *net = hv_get_drvdata(dev); @@ -1028,7 +1051,6 @@ int rndis_filter_device_add(struct hv_device *dev, offloads.udp_ip_v6_csum = NDIS_OFFLOAD_PARAMETERS_TX_RX_ENABLED; offloads.lso_v2_ipv4 = NDIS_OFFLOAD_PARAMETERS_LSOV2_ENABLED; - ret = rndis_filter_set_offload_params(net, &offloads); if (ret) goto err_dev_remv; @@ -1044,6 +1066,8 @@ int rndis_filter_device_add(struct hv_device *dev, if (net_device->nvsp_version < NVSP_PROTOCOL_VERSION_5) return 0; + rndis_filter_query_link_speed(rndis_device); + /* vRSS setup */ memset(&rsscap, 0, rsscap_size); ret = rndis_filter_query_device(rndis_device, @@ -1152,7 +1176,6 @@ void rndis_filter_device_remove(struct hv_device *dev) netvsc_device_remove(dev); } - int rndis_filter_open(struct netvsc_device *nvdev) { if (!nvdev) diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c index 0becf0ac3926..ec387efb61d0 100644 --- a/drivers/net/ieee802154/fakelb.c +++ b/drivers/net/ieee802154/fakelb.c @@ -30,7 +30,7 @@ static int numlbs = 2; static LIST_HEAD(fakelb_phys); -static DEFINE_SPINLOCK(fakelb_phys_lock); +static DEFINE_MUTEX(fakelb_phys_lock); static LIST_HEAD(fakelb_ifup_phys); static DEFINE_RWLOCK(fakelb_ifup_phys_lock); @@ -188,9 +188,9 @@ static int fakelb_add_one(struct device *dev) if (err) goto err_reg; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_add_tail(&phy->list, &fakelb_phys); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; @@ -222,10 +222,10 @@ static int fakelb_probe(struct platform_device *pdev) return 0; err_slave: - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return err; } @@ -233,10 +233,10 @@ static int fakelb_remove(struct platform_device *pdev) { struct fakelb_phy *phy, *tmp; - spin_lock(&fakelb_phys_lock); + mutex_lock(&fakelb_phys_lock); list_for_each_entry_safe(phy, tmp, &fakelb_phys, list) fakelb_del(phy); - spin_unlock(&fakelb_phys_lock); + mutex_unlock(&fakelb_phys_lock); return 0; } diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h index 695a5dc9ace3..7e0732f5ea07 100644 --- a/drivers/net/ipvlan/ipvlan.h +++ b/drivers/net/ipvlan/ipvlan.h @@ -23,11 +23,13 @@ #include #include #include +#include #include #include #include #include #include +#include #define IPVLAN_DRV "ipvlan" #define IPV_DRV_VER "0.1" @@ -124,4 +126,8 @@ struct ipvl_addr *ipvlan_find_addr(const struct ipvl_dev *ipvlan, const void *iaddr, bool is_v6); bool ipvlan_addr_busy(struct ipvl_port *port, void *iaddr, bool is_v6); void ipvlan_ht_addr_del(struct ipvl_addr *addr); +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto); +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state); #endif /* __IPVLAN_H */ diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c index b5f9511d819e..b4e990743e1d 100644 --- a/drivers/net/ipvlan/ipvlan_core.c +++ b/drivers/net/ipvlan/ipvlan_core.c @@ -560,6 +560,7 @@ int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev) case IPVLAN_MODE_L2: return ipvlan_xmit_mode_l2(skb, dev); case IPVLAN_MODE_L3: + case IPVLAN_MODE_L3S: return ipvlan_xmit_mode_l3(skb, dev); } @@ -664,6 +665,8 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) return ipvlan_handle_mode_l2(pskb, port); case IPVLAN_MODE_L3: return ipvlan_handle_mode_l3(pskb, port); + case IPVLAN_MODE_L3S: + return RX_HANDLER_PASS; } /* Should not reach here */ @@ -672,3 +675,94 @@ rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb) kfree_skb(skb); return RX_HANDLER_CONSUMED; } + +static struct ipvl_addr *ipvlan_skb_to_addr(struct sk_buff *skb, + struct net_device *dev) +{ + struct ipvl_addr *addr = NULL; + struct ipvl_port *port; + void *lyr3h; + int addr_type; + + if (!dev || !netif_is_ipvlan_port(dev)) + goto out; + + port = ipvlan_port_get_rcu(dev); + if (!port || port->mode != IPVLAN_MODE_L3S) + goto out; + + lyr3h = ipvlan_get_L3_hdr(skb, &addr_type); + if (!lyr3h) + goto out; + + addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true); +out: + return addr; +} + +struct sk_buff *ipvlan_l3_rcv(struct net_device *dev, struct sk_buff *skb, + u16 proto) +{ + struct ipvl_addr *addr; + struct net_device *sdev; + + addr = ipvlan_skb_to_addr(skb, dev); + if (!addr) + goto out; + + sdev = addr->master->dev; + switch (proto) { + case AF_INET: + { + int err; + struct iphdr *ip4h = ip_hdr(skb); + + err = ip_route_input_noref(skb, ip4h->daddr, ip4h->saddr, + ip4h->tos, sdev); + if (unlikely(err)) + goto out; + break; + } + case AF_INET6: + { + struct dst_entry *dst; + struct ipv6hdr *ip6h = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct flowi6 fl6 = { + .flowi6_iif = sdev->ifindex, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_mark = skb->mark, + .flowi6_proto = ip6h->nexthdr, + }; + + skb_dst_drop(skb); + dst = ip6_route_input_lookup(dev_net(sdev), sdev, &fl6, flags); + skb_dst_set(skb, dst); + break; + } + default: + break; + } + +out: + return skb; +} + +unsigned int ipvlan_nf_input(void *priv, struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct ipvl_addr *addr; + unsigned int len; + + addr = ipvlan_skb_to_addr(skb, skb->dev); + if (!addr) + goto out; + + skb->dev = addr->master->dev; + len = skb->len + ETH_HLEN; + ipvlan_count_rx(addr->master, len, true, false); +out: + return NF_ACCEPT; +} diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 18b4e8c7f68a..f442eb366863 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -9,24 +9,87 @@ #include "ipvlan.h" +static u32 ipvl_nf_hook_refcnt = 0; + +static struct nf_hook_ops ipvl_nfops[] __read_mostly = { + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, + { + .hook = ipvlan_nf_input, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = INT_MAX, + }, +}; + +static struct l3mdev_ops ipvl_l3mdev_ops __read_mostly = { + .l3mdev_l3_rcv = ipvlan_l3_rcv, +}; + static void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev) { ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj; } -static void ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) +static int ipvlan_register_nf_hook(void) +{ + int err = 0; + + if (!ipvl_nf_hook_refcnt) { + err = _nf_register_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); + if (!err) + ipvl_nf_hook_refcnt = 1; + } else { + ipvl_nf_hook_refcnt++; + } + + return err; +} + +static void ipvlan_unregister_nf_hook(void) +{ + WARN_ON(!ipvl_nf_hook_refcnt); + + ipvl_nf_hook_refcnt--; + if (!ipvl_nf_hook_refcnt) + _nf_unregister_hooks(ipvl_nfops, ARRAY_SIZE(ipvl_nfops)); +} + +static int ipvlan_set_port_mode(struct ipvl_port *port, u16 nval) { struct ipvl_dev *ipvlan; + struct net_device *mdev = port->dev; + int err = 0; + ASSERT_RTNL(); if (port->mode != nval) { + if (nval == IPVLAN_MODE_L3S) { + /* New mode is L3S */ + err = ipvlan_register_nf_hook(); + if (!err) { + mdev->l3mdev_ops = &ipvl_l3mdev_ops; + mdev->priv_flags |= IFF_L3MDEV_MASTER; + } else + return err; + } else if (port->mode == IPVLAN_MODE_L3S) { + /* Old mode was L3S */ + mdev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + mdev->l3mdev_ops = NULL; + } list_for_each_entry(ipvlan, &port->ipvlans, pnode) { - if (nval == IPVLAN_MODE_L3) + if (nval == IPVLAN_MODE_L3 || nval == IPVLAN_MODE_L3S) ipvlan->dev->flags |= IFF_NOARP; else ipvlan->dev->flags &= ~IFF_NOARP; } port->mode = nval; } + return err; } static int ipvlan_port_create(struct net_device *dev) @@ -74,6 +137,11 @@ static void ipvlan_port_destroy(struct net_device *dev) struct ipvl_port *port = ipvlan_port_get_rtnl(dev); dev->priv_flags &= ~IFF_IPVLAN_MASTER; + if (port->mode == IPVLAN_MODE_L3S) { + dev->priv_flags &= ~IFF_L3MDEV_MASTER; + ipvlan_unregister_nf_hook(); + dev->l3mdev_ops = NULL; + } netdev_rx_handler_unregister(dev); cancel_work_sync(&port->wq); __skb_queue_purge(&port->backlog); @@ -132,7 +200,8 @@ static int ipvlan_open(struct net_device *dev) struct net_device *phy_dev = ipvlan->phy_dev; struct ipvl_addr *addr; - if (ipvlan->port->mode == IPVLAN_MODE_L3) + if (ipvlan->port->mode == IPVLAN_MODE_L3 || + ipvlan->port->mode == IPVLAN_MODE_L3S) dev->flags |= IFF_NOARP; else dev->flags &= ~IFF_NOARP; @@ -372,13 +441,14 @@ static int ipvlan_nl_changelink(struct net_device *dev, { struct ipvl_dev *ipvlan = netdev_priv(dev); struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev); + int err = 0; if (data && data[IFLA_IPVLAN_MODE]) { u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]); - ipvlan_set_port_mode(port, nmode); + err = ipvlan_set_port_mode(port, nmode); } - return 0; + return err; } static size_t ipvlan_nl_getsize(const struct net_device *dev) @@ -473,10 +543,13 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev, unregister_netdevice(dev); return err; } + err = ipvlan_set_port_mode(port, mode); + if (err) { + unregister_netdevice(dev); + return err; + } list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans); - ipvlan_set_port_mode(port, mode); - netif_stacked_transfer_operstate(phy_dev, dev); return 0; } diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 351e701eb043..3ea47f28e143 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -2973,6 +2973,7 @@ static void macsec_setup(struct net_device *dev) dev->priv_flags |= IFF_NO_QUEUE; dev->netdev_ops = &macsec_netdev_ops; dev->destructor = macsec_free_netdev; + SET_NETDEV_DEVTYPE(dev, &macsec_type); eth_zero_addr(dev->broadcast); } diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig index b4863e4e522b..5078a0d0db64 100644 --- a/drivers/net/phy/Kconfig +++ b/drivers/net/phy/Kconfig @@ -15,162 +15,27 @@ if PHYLIB config SWPHY bool -comment "MII PHY device drivers" +comment "MDIO bus device drivers" -config AQUANTIA_PHY - tristate "Drivers for the Aquantia PHYs" - ---help--- - Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 - -config AT803X_PHY - tristate "Drivers for Atheros AT803X PHYs" - ---help--- - Currently supports the AT8030 and AT8035 model - -config AMD_PHY - tristate "Drivers for the AMD PHYs" - ---help--- - Currently supports the am79c874 - -config MARVELL_PHY - tristate "Drivers for Marvell PHYs" - ---help--- - Currently has a driver for the 88E1011S - -config DAVICOM_PHY - tristate "Drivers for Davicom PHYs" - ---help--- - Currently supports dm9161e and dm9131 - -config QSEMI_PHY - tristate "Drivers for Quality Semiconductor PHYs" - ---help--- - Currently supports the qs6612 - -config LXT_PHY - tristate "Drivers for the Intel LXT PHYs" - ---help--- - Currently supports the lxt970, lxt971 - -config CICADA_PHY - tristate "Drivers for the Cicada PHYs" - ---help--- - Currently supports the cis8204 - -config VITESSE_PHY - tristate "Drivers for the Vitesse PHYs" - ---help--- - Currently supports the vsc8244 - -config TERANETICS_PHY - tristate "Drivers for the Teranetics PHYs" - ---help--- - Currently supports the Teranetics TN2020 - -config SMSC_PHY - tristate "Drivers for SMSC PHYs" - ---help--- - Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs - -config BCM_NET_PHYLIB - tristate - -config BROADCOM_PHY - tristate "Drivers for Broadcom PHYs" - select BCM_NET_PHYLIB - ---help--- - Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, - BCM5481 and BCM5482 PHYs. - -config BCM_CYGNUS_PHY - tristate "Drivers for Broadcom Cygnus SoC internal PHY" - depends on ARCH_BCM_CYGNUS || COMPILE_TEST - depends on MDIO_BCM_IPROC - select BCM_NET_PHYLIB - ---help--- - This PHY driver is for the 1G internal PHYs of the Broadcom - Cygnus Family SoC. - - Currently supports internal PHY's used in the BCM11300, - BCM11320, BCM11350, BCM11360, BCM58300, BCM58302, - BCM58303 & BCM58305 Broadcom Cygnus SoCs. - -config BCM63XX_PHY - tristate "Drivers for Broadcom 63xx SOCs internal PHY" - depends on BCM63XX - select BCM_NET_PHYLIB - ---help--- - Currently supports the 6348 and 6358 PHYs. - -config BCM7XXX_PHY - tristate "Drivers for Broadcom 7xxx SOCs internal PHYs" - select BCM_NET_PHYLIB - ---help--- - Currently supports the BCM7366, BCM7439, BCM7445, and - 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. - -config BCM87XX_PHY - tristate "Driver for Broadcom BCM8706 and BCM8727 PHYs" +config MDIO_BCM_IPROC + tristate "Broadcom iProc MDIO bus controller" + depends on ARCH_BCM_IPROC || COMPILE_TEST + depends on HAS_IOMEM && OF_MDIO help - Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. + This module provides a driver for the MDIO busses found in the + Broadcom iProc SoC's. -config ICPLUS_PHY - tristate "Drivers for ICPlus PHYs" - ---help--- - Currently supports the IP175C and IP1001 PHYs. - -config REALTEK_PHY - tristate "Drivers for Realtek PHYs" - ---help--- - Supports the Realtek 821x PHY. - -config NATIONAL_PHY - tristate "Drivers for National Semiconductor PHYs" - ---help--- - Currently supports the DP83865 PHY. - -config STE10XP - tristate "Driver for STMicroelectronics STe10Xp PHYs" - ---help--- - This is the driver for the STe100p and STe101p PHYs. - -config LSI_ET1011C_PHY - tristate "Driver for LSI ET1011C PHY" - ---help--- - Supports the LSI ET1011C PHY. - -config MICREL_PHY - tristate "Driver for Micrel PHYs" - ---help--- - Supports the KSZ9021, VSC8201, KS8001 PHYs. - -config DP83848_PHY - tristate "Driver for Texas Instruments DP83848 PHY" - ---help--- - Supports the DP83848 PHY. - -config DP83867_PHY - tristate "Drivers for Texas Instruments DP83867 Gigabit PHY" - ---help--- - Currently supports the DP83867 PHY. - -config MICROCHIP_PHY - tristate "Drivers for Microchip PHYs" +config MDIO_BCM_UNIMAC + tristate "Broadcom UniMAC MDIO bus controller" + depends on HAS_IOMEM help - Supports the LAN88XX PHYs. - -config FIXED_PHY - tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs" - depends on PHYLIB - select SWPHY - ---help--- - Adds the platform "fixed" MDIO Bus to cover the boards that use - PHYs that are not connected to the real MDIO bus. - - Currently tested with mpc866ads and mpc8349e-mitx. + This module provides a driver for the Broadcom UniMAC MDIO busses. + This hardware can be found in the Broadcom GENET Ethernet MAC + controllers as well as some Broadcom Ethernet switches such as the + Starfighter 2 switches. config MDIO_BITBANG - tristate "Support for bitbanged MDIO buses" + tristate "Bitbanged MDIO buses" help This module implements the MDIO bus protocol in software, for use by low level drivers that export the ability to @@ -178,54 +43,6 @@ config MDIO_BITBANG If in doubt, say N. -config MDIO_GPIO - tristate "Support for GPIO lib-based bitbanged MDIO buses" - depends on MDIO_BITBANG && GPIOLIB - ---help--- - Supports GPIO lib-based MDIO busses. - - To compile this driver as a module, choose M here: the module - will be called mdio-gpio. - -config MDIO_CAVIUM - tristate - -config MDIO_OCTEON - tristate "Support for MDIO buses on Octeon and some ThunderX SOCs" - depends on 64BIT - depends on HAS_IOMEM - select MDIO_CAVIUM - help - This module provides a driver for the Octeon and ThunderX MDIO - buses. It is required by the Octeon and ThunderX ethernet device - drivers on some systems. - -config MDIO_THUNDER - tristate "Support for MDIO buses on ThunderX SOCs" - depends on 64BIT - depends on PCI - select MDIO_CAVIUM - help - This driver supports the MDIO interfaces found on Cavium - ThunderX SoCs when the MDIO bus device appears as a PCI - device. - - -config MDIO_SUN4I - tristate "Allwinner sun4i MDIO interface support" - depends on ARCH_SUNXI - help - This driver supports the MDIO interface found in the network - interface units of the Allwinner SoC that have an EMAC (A10, - A12, A10s, etc.) - -config MDIO_MOXART - tristate "MOXA ART MDIO interface support" - depends on ARCH_MOXART - help - This driver supports the MDIO interface found in the network - interface units of the MOXA ART SoC - config MDIO_BUS_MUX tristate depends on OF_MDIO @@ -235,8 +52,19 @@ config MDIO_BUS_MUX to a parent bus. Switching between child busses is done by device specific drivers. +config MDIO_BUS_MUX_BCM_IPROC + tristate "Broadcom iProc based MDIO bus multiplexers" + depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST) + select MDIO_BUS_MUX + default ARCH_BCM_IPROC + help + This module provides a driver for MDIO bus multiplexers found in + iProc based Broadcom SoCs. This multiplexer connects one of several + child MDIO bus to a parent bus. Buses could be internal as well as + external and selection logic lies inside the same multiplexer. + config MDIO_BUS_MUX_GPIO - tristate "Support for GPIO controlled MDIO bus multiplexers" + tristate "GPIO controlled MDIO bus multiplexers" depends on OF_GPIO && OF_MDIO select MDIO_BUS_MUX help @@ -246,7 +74,7 @@ config MDIO_BUS_MUX_GPIO selection is under the control of GPIO lines. config MDIO_BUS_MUX_MMIOREG - tristate "Support for MMIO device-controlled MDIO bus multiplexers" + tristate "MMIO device-controlled MDIO bus multiplexers" depends on OF_MDIO && HAS_IOMEM select MDIO_BUS_MUX help @@ -258,41 +86,17 @@ config MDIO_BUS_MUX_MMIOREG Currently, only 8-bit registers are supported. -config MDIO_BUS_MUX_BCM_IPROC - tristate "Support for iProc based MDIO bus multiplexers" - depends on OF && OF_MDIO && (ARCH_BCM_IPROC || COMPILE_TEST) - select MDIO_BUS_MUX - default ARCH_BCM_IPROC - help - This module provides a driver for MDIO bus multiplexers found in - iProc based Broadcom SoCs. This multiplexer connects one of several - child MDIO bus to a parent bus. Buses could be internal as well as - external and selection logic lies inside the same multiplexer. +config MDIO_CAVIUM + tristate -config MDIO_BCM_UNIMAC - tristate "Broadcom UniMAC MDIO bus controller" - depends on HAS_IOMEM - help - This module provides a driver for the Broadcom UniMAC MDIO busses. - This hardware can be found in the Broadcom GENET Ethernet MAC - controllers as well as some Broadcom Ethernet switches such as the - Starfighter 2 switches. - -config MDIO_BCM_IPROC - tristate "Broadcom iProc MDIO bus controller" - depends on ARCH_BCM_IPROC || COMPILE_TEST - depends on HAS_IOMEM && OF_MDIO - help - This module provides a driver for the MDIO busses found in the - Broadcom iProc SoC's. - -config INTEL_XWAY_PHY - tristate "Driver for Intel XWAY PHYs" +config MDIO_GPIO + tristate "GPIO lib-based bitbanged MDIO buses" + depends on MDIO_BITBANG && GPIOLIB ---help--- - Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs. - These PHYs are marked as standalone chips under the names - PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel - SoCs xRX200, xRX300, xRX330, xRX350 and xRX550. + Supports GPIO lib-based MDIO busses. + + To compile this driver as a module, choose M here: the module + will be called mdio-gpio. config MDIO_HISI_FEMAC tristate "Hisilicon FEMAC MDIO bus controller" @@ -301,6 +105,221 @@ config MDIO_HISI_FEMAC This module provides a driver for the MDIO busses found in the Hisilicon SoC that have an Fast Ethernet MAC. +config MDIO_MOXART + tristate "MOXA ART MDIO interface support" + depends on ARCH_MOXART + help + This driver supports the MDIO interface found in the network + interface units of the MOXA ART SoC + +config MDIO_OCTEON + tristate "Octeon and some ThunderX SOCs MDIO buses" + depends on 64BIT + depends on HAS_IOMEM + select MDIO_CAVIUM + help + This module provides a driver for the Octeon and ThunderX MDIO + buses. It is required by the Octeon and ThunderX ethernet device + drivers on some systems. + +config MDIO_SUN4I + tristate "Allwinner sun4i MDIO interface support" + depends on ARCH_SUNXI + help + This driver supports the MDIO interface found in the network + interface units of the Allwinner SoC that have an EMAC (A10, + A12, A10s, etc.) + +config MDIO_THUNDER + tristate "ThunderX SOCs MDIO buses" + depends on 64BIT + depends on PCI + select MDIO_CAVIUM + help + This driver supports the MDIO interfaces found on Cavium + ThunderX SoCs when the MDIO bus device appears as a PCI + device. + +config MDIO_XGENE + tristate "APM X-Gene SoC MDIO bus controller" + help + This module provides a driver for the MDIO busses found in the + APM X-Gene SoC's. + +comment "MII PHY device drivers" + +config AMD_PHY + tristate "AMD PHYs" + ---help--- + Currently supports the am79c874 + +config AQUANTIA_PHY + tristate "Aquantia PHYs" + ---help--- + Currently supports the Aquantia AQ1202, AQ2104, AQR105, AQR405 + +config AT803X_PHY + tristate "AT803X PHYs" + ---help--- + Currently supports the AT8030 and AT8035 model + +config BCM63XX_PHY + tristate "Broadcom 63xx SOCs internal PHY" + depends on BCM63XX + select BCM_NET_PHYLIB + ---help--- + Currently supports the 6348 and 6358 PHYs. + +config BCM7XXX_PHY + tristate "Broadcom 7xxx SOCs internal PHYs" + select BCM_NET_PHYLIB + ---help--- + Currently supports the BCM7366, BCM7439, BCM7445, and + 40nm and 65nm generation of BCM7xxx Set Top Box SoCs. + +config BCM87XX_PHY + tristate "Broadcom BCM8706 and BCM8727 PHYs" + help + Currently supports the BCM8706 and BCM8727 10G Ethernet PHYs. + +config BCM_CYGNUS_PHY + tristate "Broadcom Cygnus SoC internal PHY" + depends on ARCH_BCM_CYGNUS || COMPILE_TEST + depends on MDIO_BCM_IPROC + select BCM_NET_PHYLIB + ---help--- + This PHY driver is for the 1G internal PHYs of the Broadcom + Cygnus Family SoC. + + Currently supports internal PHY's used in the BCM11300, + BCM11320, BCM11350, BCM11360, BCM58300, BCM58302, + BCM58303 & BCM58305 Broadcom Cygnus SoCs. + +config BCM_NET_PHYLIB + tristate + +config BROADCOM_PHY + tristate "Broadcom PHYs" + select BCM_NET_PHYLIB + ---help--- + Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464, + BCM5481 and BCM5482 PHYs. + +config CICADA_PHY + tristate "Cicada PHYs" + ---help--- + Currently supports the cis8204 + +config DAVICOM_PHY + tristate "Davicom PHYs" + ---help--- + Currently supports dm9161e and dm9131 + +config DP83848_PHY + tristate "Texas Instruments DP83848 PHY" + ---help--- + Supports the DP83848 PHY. + +config DP83867_PHY + tristate "Texas Instruments DP83867 Gigabit PHY" + ---help--- + Currently supports the DP83867 PHY. + +config FIXED_PHY + tristate "MDIO Bus/PHY emulation with fixed speed/link PHYs" + depends on PHYLIB + select SWPHY + ---help--- + Adds the platform "fixed" MDIO Bus to cover the boards that use + PHYs that are not connected to the real MDIO bus. + + Currently tested with mpc866ads and mpc8349e-mitx. + +config ICPLUS_PHY + tristate "ICPlus PHYs" + ---help--- + Currently supports the IP175C and IP1001 PHYs. + +config INTEL_XWAY_PHY + tristate "Intel XWAY PHYs" + ---help--- + Supports the Intel XWAY (former Lantiq) 11G and 22E PHYs. + These PHYs are marked as standalone chips under the names + PEF 7061, PEF 7071 and PEF 7072 or integrated into the Intel + SoCs xRX200, xRX300, xRX330, xRX350 and xRX550. + +config LSI_ET1011C_PHY + tristate "LSI ET1011C PHY" + ---help--- + Supports the LSI ET1011C PHY. + +config LXT_PHY + tristate "Intel LXT PHYs" + ---help--- + Currently supports the lxt970, lxt971 + +config MARVELL_PHY + tristate "Marvell PHYs" + ---help--- + Currently has a driver for the 88E1011S + +config MICREL_PHY + tristate "Micrel PHYs" + ---help--- + Supports the KSZ9021, VSC8201, KS8001 PHYs. + +config MICROCHIP_PHY + tristate "Microchip PHYs" + help + Supports the LAN88XX PHYs. + +config MICROSEMI_PHY + tristate "Microsemi PHYs" + ---help--- + Currently supports the VSC8531 and VSC8541 PHYs + +config NATIONAL_PHY + tristate "National Semiconductor PHYs" + ---help--- + Currently supports the DP83865 PHY. + +config QSEMI_PHY + tristate "Quality Semiconductor PHYs" + ---help--- + Currently supports the qs6612 + +config REALTEK_PHY + tristate "Realtek PHYs" + ---help--- + Supports the Realtek 821x PHY. + +config SMSC_PHY + tristate "SMSC PHYs" + ---help--- + Currently supports the LAN83C185, LAN8187 and LAN8700 PHYs + +config STE10XP + tristate "STMicroelectronics STe10Xp PHYs" + ---help--- + This is the driver for the STe100p and STe101p PHYs. + +config TERANETICS_PHY + tristate "Teranetics PHYs" + ---help--- + Currently supports the Teranetics TN2020 + +config VITESSE_PHY + tristate "Vitesse PHYs" + ---help--- + Currently supports the vsc8244 + +config XILINX_GMII2RGMII + tristate "Xilinx GMII2RGMII converter driver" + ---help--- + This driver support xilinx GMII to RGMII IP core it provides + the Reduced Gigabit Media Independent Interface(RGMII) between + Ethernet physical media devices and the Gigabit Ethernet controller. + config MDIO_XGENE tristate "APM X-Gene SoC MDIO bus controller" depends on ARCH_XGENE || COMPILE_TEST diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile index 534dfa74d5a2..e58667d111e7 100644 --- a/drivers/net/phy/Makefile +++ b/drivers/net/phy/Makefile @@ -1,51 +1,55 @@ -# Makefile for Linux PHY drivers +# Makefile for Linux PHY drivers and MDIO bus drivers libphy-y := phy.o phy_device.o mdio_bus.o mdio_device.o libphy-$(CONFIG_SWPHY) += swphy.o obj-$(CONFIG_PHYLIB) += libphy.o + +obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o +obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o +obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o +obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o +obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o +obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o +obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o +obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o +obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o +obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o +obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o +obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o +obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o +obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o +obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o + +obj-$(CONFIG_AMD_PHY) += amd.o obj-$(CONFIG_AQUANTIA_PHY) += aquantia.o -obj-$(CONFIG_MARVELL_PHY) += marvell.o -obj-$(CONFIG_DAVICOM_PHY) += davicom.o -obj-$(CONFIG_CICADA_PHY) += cicada.o -obj-$(CONFIG_LXT_PHY) += lxt.o -obj-$(CONFIG_QSEMI_PHY) += qsemi.o -obj-$(CONFIG_SMSC_PHY) += smsc.o -obj-$(CONFIG_TERANETICS_PHY) += teranetics.o -obj-$(CONFIG_VITESSE_PHY) += vitesse.o -obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o -obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_AT803X_PHY) += at803x.o obj-$(CONFIG_BCM63XX_PHY) += bcm63xx.o obj-$(CONFIG_BCM7XXX_PHY) += bcm7xxx.o obj-$(CONFIG_BCM87XX_PHY) += bcm87xx.o obj-$(CONFIG_BCM_CYGNUS_PHY) += bcm-cygnus.o -obj-$(CONFIG_ICPLUS_PHY) += icplus.o -obj-$(CONFIG_REALTEK_PHY) += realtek.o -obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o -obj-$(CONFIG_FIXED_PHY) += fixed_phy.o -obj-$(CONFIG_MDIO_BITBANG) += mdio-bitbang.o -obj-$(CONFIG_MDIO_GPIO) += mdio-gpio.o -obj-$(CONFIG_NATIONAL_PHY) += national.o +obj-$(CONFIG_BCM_NET_PHYLIB) += bcm-phy-lib.o +obj-$(CONFIG_BROADCOM_PHY) += broadcom.o +obj-$(CONFIG_CICADA_PHY) += cicada.o +obj-$(CONFIG_DAVICOM_PHY) += davicom.o obj-$(CONFIG_DP83640_PHY) += dp83640.o obj-$(CONFIG_DP83848_PHY) += dp83848.o obj-$(CONFIG_DP83867_PHY) += dp83867.o -obj-$(CONFIG_STE10XP) += ste10Xp.o -obj-$(CONFIG_MICREL_PHY) += micrel.o -obj-$(CONFIG_MDIO_OCTEON) += mdio-octeon.o -obj-$(CONFIG_MDIO_THUNDER) += mdio-thunder.o -obj-$(CONFIG_MDIO_CAVIUM) += mdio-cavium.o -obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o -obj-$(CONFIG_AT803X_PHY) += at803x.o -obj-$(CONFIG_AMD_PHY) += amd.o -obj-$(CONFIG_MDIO_BUS_MUX) += mdio-mux.o -obj-$(CONFIG_MDIO_BUS_MUX_GPIO) += mdio-mux-gpio.o -obj-$(CONFIG_MDIO_BUS_MUX_MMIOREG) += mdio-mux-mmioreg.o -obj-$(CONFIG_MDIO_BUS_MUX_BCM_IPROC) += mdio-mux-bcm-iproc.o -obj-$(CONFIG_MDIO_SUN4I) += mdio-sun4i.o -obj-$(CONFIG_MDIO_MOXART) += mdio-moxart.o -obj-$(CONFIG_MDIO_BCM_UNIMAC) += mdio-bcm-unimac.o -obj-$(CONFIG_MICROCHIP_PHY) += microchip.o -obj-$(CONFIG_MDIO_BCM_IPROC) += mdio-bcm-iproc.o +obj-$(CONFIG_FIXED_PHY) += fixed_phy.o +obj-$(CONFIG_ICPLUS_PHY) += icplus.o obj-$(CONFIG_INTEL_XWAY_PHY) += intel-xway.o -obj-$(CONFIG_MDIO_HISI_FEMAC) += mdio-hisi-femac.o -obj-$(CONFIG_MDIO_XGENE) += mdio-xgene.o +obj-$(CONFIG_LSI_ET1011C_PHY) += et1011c.o +obj-$(CONFIG_LXT_PHY) += lxt.o +obj-$(CONFIG_MARVELL_PHY) += marvell.o +obj-$(CONFIG_MICREL_KS8995MA) += spi_ks8995.o +obj-$(CONFIG_MICREL_PHY) += micrel.o +obj-$(CONFIG_MICROCHIP_PHY) += microchip.o +obj-$(CONFIG_MICROSEMI_PHY) += mscc.o +obj-$(CONFIG_NATIONAL_PHY) += national.o +obj-$(CONFIG_QSEMI_PHY) += qsemi.o +obj-$(CONFIG_REALTEK_PHY) += realtek.o +obj-$(CONFIG_SMSC_PHY) += smsc.o +obj-$(CONFIG_STE10XP) += ste10Xp.o +obj-$(CONFIG_TERANETICS_PHY) += teranetics.o +obj-$(CONFIG_VITESSE_PHY) += vitesse.o +obj-$(CONFIG_XILINX_GMII2RGMII) += xilinx_gmii2rgmii.o diff --git a/drivers/net/phy/microchip.c b/drivers/net/phy/microchip.c index 15f820648f82..7c00e508a101 100644 --- a/drivers/net/phy/microchip.c +++ b/drivers/net/phy/microchip.c @@ -55,7 +55,7 @@ static int lan88xx_phy_ack_interrupt(struct phy_device *phydev) return rc < 0 ? rc : 0; } -int lan88xx_suspend(struct phy_device *phydev) +static int lan88xx_suspend(struct phy_device *phydev) { struct lan88xx_priv *priv = phydev->priv; diff --git a/drivers/net/phy/mscc.c b/drivers/net/phy/mscc.c new file mode 100644 index 000000000000..a17573e3bd8a --- /dev/null +++ b/drivers/net/phy/mscc.c @@ -0,0 +1,337 @@ +/* + * Driver for Microsemi VSC85xx PHYs + * + * Author: Nagaraju Lakkaraju + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#include +#include +#include +#include +#include +#include +#include + +enum rgmii_rx_clock_delay { + RGMII_RX_CLK_DELAY_0_2_NS = 0, + RGMII_RX_CLK_DELAY_0_8_NS = 1, + RGMII_RX_CLK_DELAY_1_1_NS = 2, + RGMII_RX_CLK_DELAY_1_7_NS = 3, + RGMII_RX_CLK_DELAY_2_0_NS = 4, + RGMII_RX_CLK_DELAY_2_3_NS = 5, + RGMII_RX_CLK_DELAY_2_6_NS = 6, + RGMII_RX_CLK_DELAY_3_4_NS = 7 +}; + +/* Microsemi VSC85xx PHY registers */ +/* IEEE 802. Std Registers */ +#define MSCC_PHY_EXT_PHY_CNTL_1 23 +#define MAC_IF_SELECTION_MASK 0x1800 +#define MAC_IF_SELECTION_GMII 0 +#define MAC_IF_SELECTION_RMII 1 +#define MAC_IF_SELECTION_RGMII 2 +#define MAC_IF_SELECTION_POS 11 +#define FAR_END_LOOPBACK_MODE_MASK 0x0008 + +#define MII_VSC85XX_INT_MASK 25 +#define MII_VSC85XX_INT_MASK_MASK 0xa000 +#define MII_VSC85XX_INT_STATUS 26 + +#define MSCC_PHY_WOL_MAC_CONTROL 27 +#define EDGE_RATE_CNTL_POS 5 +#define EDGE_RATE_CNTL_MASK 0x00E0 + +#define MSCC_EXT_PAGE_ACCESS 31 +#define MSCC_PHY_PAGE_STANDARD 0x0000 /* Standard registers */ +#define MSCC_PHY_PAGE_EXTENDED_2 0x0002 /* Extended reg - page 2 */ + +/* Extended Page 2 Registers */ +#define MSCC_PHY_RGMII_CNTL 20 +#define RGMII_RX_CLK_DELAY_MASK 0x0070 +#define RGMII_RX_CLK_DELAY_POS 4 + +/* Microsemi PHY ID's */ +#define PHY_ID_VSC8531 0x00070570 +#define PHY_ID_VSC8541 0x00070770 + +struct edge_rate_table { + u16 vddmac; + int slowdown[MSCC_SLOWDOWN_MAX]; +}; + +struct edge_rate_table edge_table[MSCC_VDDMAC_MAX] = { + {3300, { 0, -2, -4, -7, -10, -17, -29, -53} }, + {2500, { 0, -3, -6, -10, -14, -23, -37, -63} }, + {1800, { 0, -5, -9, -16, -23, -35, -52, -76} }, + {1500, { 0, -6, -14, -21, -29, -42, -58, -77} }, +}; + +struct vsc8531_private { + u8 edge_slowdown; + u16 vddmac; +}; + +static int vsc85xx_phy_page_set(struct phy_device *phydev, u8 page) +{ + int rc; + + rc = phy_write(phydev, MSCC_EXT_PAGE_ACCESS, page); + return rc; +} + +static u8 edge_rate_magic_get(u16 vddmac, + int slowdown) +{ + int rc = (MSCC_SLOWDOWN_MAX - 1); + u8 vdd; + u8 sd; + + for (vdd = 0; vdd < MSCC_VDDMAC_MAX; vdd++) { + if (edge_table[vdd].vddmac == vddmac) { + for (sd = 0; sd < MSCC_SLOWDOWN_MAX; sd++) { + if (edge_table[vdd].slowdown[sd] <= slowdown) { + rc = (MSCC_SLOWDOWN_MAX - sd - 1); + break; + } + } + } + } + + return rc; +} + +static int vsc85xx_edge_rate_cntl_set(struct phy_device *phydev, + u8 edge_rate) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + reg_val = phy_read(phydev, MSCC_PHY_WOL_MAC_CONTROL); + reg_val &= ~(EDGE_RATE_CNTL_MASK); + reg_val |= (edge_rate << EDGE_RATE_CNTL_POS); + rc = phy_write(phydev, MSCC_PHY_WOL_MAC_CONTROL, reg_val); + if (rc != 0) + goto out_unlock; + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + +static int vsc85xx_mac_if_set(struct phy_device *phydev, + phy_interface_t interface) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + reg_val = phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_1); + reg_val &= ~(MAC_IF_SELECTION_MASK); + switch (interface) { + case PHY_INTERFACE_MODE_RGMII: + reg_val |= (MAC_IF_SELECTION_RGMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_RMII: + reg_val |= (MAC_IF_SELECTION_RMII << MAC_IF_SELECTION_POS); + break; + case PHY_INTERFACE_MODE_MII: + case PHY_INTERFACE_MODE_GMII: + reg_val |= (MAC_IF_SELECTION_GMII << MAC_IF_SELECTION_POS); + break; + default: + rc = -EINVAL; + goto out_unlock; + } + rc = phy_write(phydev, MSCC_PHY_EXT_PHY_CNTL_1, reg_val); + if (rc != 0) + goto out_unlock; + + rc = genphy_soft_reset(phydev); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + +static int vsc85xx_default_config(struct phy_device *phydev) +{ + int rc; + u16 reg_val; + + mutex_lock(&phydev->lock); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_EXTENDED_2); + if (rc != 0) + goto out_unlock; + + reg_val = phy_read(phydev, MSCC_PHY_RGMII_CNTL); + reg_val &= ~(RGMII_RX_CLK_DELAY_MASK); + reg_val |= (RGMII_RX_CLK_DELAY_1_1_NS << RGMII_RX_CLK_DELAY_POS); + phy_write(phydev, MSCC_PHY_RGMII_CNTL, reg_val); + rc = vsc85xx_phy_page_set(phydev, MSCC_PHY_PAGE_STANDARD); + +out_unlock: + mutex_unlock(&phydev->lock); + + return rc; +} + +#ifdef CONFIG_OF_MDIO +static int vsc8531_of_init(struct phy_device *phydev) +{ + int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + struct device *dev = &phydev->mdio.dev; + struct device_node *of_node = dev->of_node; + + if (!of_node) + return -ENODEV; + + rc = of_property_read_u16(of_node, "vsc8531,vddmac", + &vsc8531->vddmac); + if (rc == -EINVAL) + vsc8531->vddmac = MSCC_VDDMAC_3300; + rc = of_property_read_u8(of_node, "vsc8531,edge-slowdown", + &vsc8531->edge_slowdown); + if (rc == -EINVAL) + vsc8531->edge_slowdown = 0; + + rc = 0; + return rc; +} +#else +static int vsc8531_of_init(struct phy_device *phydev) +{ + return 0; +} +#endif /* CONFIG_OF_MDIO */ + +static int vsc85xx_config_init(struct phy_device *phydev) +{ + int rc; + struct vsc8531_private *vsc8531 = phydev->priv; + u8 edge_rate; + + rc = vsc8531_of_init(phydev); + if (rc) + return rc; + + rc = vsc85xx_default_config(phydev); + if (rc) + return rc; + + rc = vsc85xx_mac_if_set(phydev, phydev->interface); + if (rc) + return rc; + + edge_rate = edge_rate_magic_get(vsc8531->vddmac, + -(int)vsc8531->edge_slowdown); + rc = vsc85xx_edge_rate_cntl_set(phydev, edge_rate); + if (rc) + return rc; + + rc = genphy_config_init(phydev); + + return rc; +} + +static int vsc85xx_ack_interrupt(struct phy_device *phydev) +{ + int rc = 0; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + + return (rc < 0) ? rc : 0; +} + +static int vsc85xx_config_intr(struct phy_device *phydev) +{ + int rc; + + if (phydev->interrupts == PHY_INTERRUPT_ENABLED) { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, + MII_VSC85XX_INT_MASK_MASK); + } else { + rc = phy_write(phydev, MII_VSC85XX_INT_MASK, 0); + if (rc < 0) + return rc; + rc = phy_read(phydev, MII_VSC85XX_INT_STATUS); + } + + return rc; +} + +static int vsc85xx_probe(struct phy_device *phydev) +{ + struct vsc8531_private *vsc8531; + + vsc8531 = devm_kzalloc(&phydev->mdio.dev, sizeof(*vsc8531), GFP_KERNEL); + if (!vsc8531) + return -ENOMEM; + + phydev->priv = vsc8531; + + return 0; +} + +/* Microsemi VSC85xx PHYs */ +static struct phy_driver vsc85xx_driver[] = { +{ + .phy_id = PHY_ID_VSC8531, + .name = "Microsemi VSC8531", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, + .probe = &vsc85xx_probe, +}, +{ + .phy_id = PHY_ID_VSC8541, + .name = "Microsemi VSC8541 SyncE", + .phy_id_mask = 0xfffffff0, + .features = PHY_GBIT_FEATURES, + .flags = PHY_HAS_INTERRUPT, + .soft_reset = &genphy_soft_reset, + .config_init = &vsc85xx_config_init, + .config_aneg = &genphy_config_aneg, + .aneg_done = &genphy_aneg_done, + .read_status = &genphy_read_status, + .ack_interrupt = &vsc85xx_ack_interrupt, + .config_intr = &vsc85xx_config_intr, + .suspend = &genphy_suspend, + .resume = &genphy_resume, + .probe = &vsc85xx_probe, +} + +}; + +module_phy_driver(vsc85xx_driver); + +static struct mdio_device_id __maybe_unused vsc85xx_tbl[] = { + { PHY_ID_VSC8531, 0xfffffff0, }, + { PHY_ID_VSC8541, 0xfffffff0, }, + { } +}; + +MODULE_DEVICE_TABLE(mdio, vsc85xx_tbl); + +MODULE_DESCRIPTION("Microsemi VSC85xx PHY driver"); +MODULE_AUTHOR("Nagaraju Lakkaraju"); +MODULE_LICENSE("Dual MIT/GPL"); diff --git a/drivers/net/phy/xilinx_gmii2rgmii.c b/drivers/net/phy/xilinx_gmii2rgmii.c new file mode 100644 index 000000000000..d15dd3938ba8 --- /dev/null +++ b/drivers/net/phy/xilinx_gmii2rgmii.c @@ -0,0 +1,112 @@ +/* Xilinx GMII2RGMII Converter driver + * + * Copyright (C) 2016 Xilinx, Inc. + * Copyright (C) 2016 Andrew Lunn + * + * Author: Andrew Lunn + * Author: Kedareswara rao Appana + * + * Description: + * This driver is developed for Xilinx GMII2RGMII Converter + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include +#include +#include +#include +#include +#include + +#define XILINX_GMII2RGMII_REG 0x10 +#define XILINX_GMII2RGMII_SPEED_MASK (BMCR_SPEED1000 | BMCR_SPEED100) + +struct gmii2rgmii { + struct phy_device *phy_dev; + struct phy_driver *phy_drv; + struct phy_driver conv_phy_drv; + int addr; +}; + +static int xgmiitorgmii_read_status(struct phy_device *phydev) +{ + struct gmii2rgmii *priv = phydev->priv; + u16 val = 0; + + priv->phy_drv->read_status(phydev); + + val = mdiobus_read(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG); + val &= XILINX_GMII2RGMII_SPEED_MASK; + + if (phydev->speed == SPEED_1000) + val |= BMCR_SPEED1000; + else if (phydev->speed == SPEED_100) + val |= BMCR_SPEED100; + else + val |= BMCR_SPEED10; + + mdiobus_write(phydev->mdio.bus, priv->addr, XILINX_GMII2RGMII_REG, val); + + return 0; +} + +static int xgmiitorgmii_probe(struct mdio_device *mdiodev) +{ + struct device *dev = &mdiodev->dev; + struct device_node *np = dev->of_node, *phy_node; + struct gmii2rgmii *priv; + + priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + phy_node = of_parse_phandle(np, "phy-handle", 0); + if (!phy_node) { + dev_err(dev, "Couldn't parse phy-handle\n"); + return -ENODEV; + } + + priv->phy_dev = of_phy_find_device(phy_node); + of_node_put(phy_node); + if (!priv->phy_dev) { + dev_info(dev, "Couldn't find phydev\n"); + return -EPROBE_DEFER; + } + + priv->addr = mdiodev->addr; + priv->phy_drv = priv->phy_dev->drv; + memcpy(&priv->conv_phy_drv, priv->phy_dev->drv, + sizeof(struct phy_driver)); + priv->conv_phy_drv.read_status = xgmiitorgmii_read_status; + priv->phy_dev->priv = priv; + priv->phy_dev->drv = &priv->conv_phy_drv; + + return 0; +} + +static const struct of_device_id xgmiitorgmii_of_match[] = { + { .compatible = "xlnx,gmii-to-rgmii-1.0" }, + {}, +}; +MODULE_DEVICE_TABLE(of, xgmiitorgmii_of_match); + +static struct mdio_driver xgmiitorgmii_driver = { + .probe = xgmiitorgmii_probe, + .mdiodrv.driver = { + .name = "xgmiitorgmii", + .of_match_table = xgmiitorgmii_of_match, + }, +}; + +mdio_module_driver(xgmiitorgmii_driver); + +MODULE_DESCRIPTION("Xilinx GMII2RGMII converter driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index f226db4616b7..5489c0ec1d9a 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -1103,6 +1103,15 @@ static int ppp_nl_newlink(struct net *src_net, struct net_device *dev, } conf.file = file; + + /* Don't use device name generated by the rtnetlink layer when ifname + * isn't specified. Let ppp_dev_configure() set the device name using + * the PPP unit identifer as suffix (i.e. ppp). This allows + * userspace to infer the device name using to the PPPIOCGUNIT ioctl. + */ + if (!tb[IFLA_IFNAME]) + conf.ifname_is_set = false; + err = ppp_dev_configure(src_net, dev, &conf); out_unlock: @@ -1354,6 +1363,8 @@ static void ppp_setup(struct net_device *dev) dev->netdev_ops = &ppp_netdev_ops; SET_NETDEV_DEVTYPE(dev, &ppp_type); + dev->features |= NETIF_F_LLTX; + dev->hard_header_len = PPP_HDRLEN; dev->mtu = PPP_MRU; dev->addr_len = 0; @@ -1367,12 +1378,8 @@ static void ppp_setup(struct net_device *dev) * Transmit-side routines. */ -/* - * Called to do any work queued up on the transmit side - * that can now be done. - */ -static void -ppp_xmit_process(struct ppp *ppp) +/* Called to do any work queued up on the transmit side that can now be done */ +static void __ppp_xmit_process(struct ppp *ppp) { struct sk_buff *skb; @@ -1392,6 +1399,30 @@ ppp_xmit_process(struct ppp *ppp) ppp_xmit_unlock(ppp); } +static DEFINE_PER_CPU(int, ppp_xmit_recursion); + +static void ppp_xmit_process(struct ppp *ppp) +{ + local_bh_disable(); + + if (unlikely(__this_cpu_read(ppp_xmit_recursion))) + goto err; + + __this_cpu_inc(ppp_xmit_recursion); + __ppp_xmit_process(ppp); + __this_cpu_dec(ppp_xmit_recursion); + + local_bh_enable(); + + return; + +err: + local_bh_enable(); + + if (net_ratelimit()) + netdev_err(ppp->dev, "recursion detected\n"); +} + static inline struct sk_buff * pad_compress_skb(struct ppp *ppp, struct sk_buff *skb) { @@ -1847,11 +1878,8 @@ static int ppp_mp_explode(struct ppp *ppp, struct sk_buff *skb) } #endif /* CONFIG_PPP_MULTILINK */ -/* - * Try to send data out on a channel. - */ -static void -ppp_channel_push(struct channel *pch) +/* Try to send data out on a channel */ +static void __ppp_channel_push(struct channel *pch) { struct sk_buff *skb; struct ppp *ppp; @@ -1876,11 +1904,22 @@ ppp_channel_push(struct channel *pch) read_lock_bh(&pch->upl); ppp = pch->ppp; if (ppp) - ppp_xmit_process(ppp); + __ppp_xmit_process(ppp); read_unlock_bh(&pch->upl); } } +static void ppp_channel_push(struct channel *pch) +{ + local_bh_disable(); + + __this_cpu_inc(ppp_xmit_recursion); + __ppp_channel_push(pch); + __this_cpu_dec(ppp_xmit_recursion); + + local_bh_enable(); +} + /* * Receive-side routines. */ diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index ae0905ed4a32..1951b1085cb8 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -37,6 +37,7 @@ #include #include #include +#include #include @@ -53,41 +54,6 @@ static struct proto pptp_sk_proto __read_mostly; static const struct ppp_channel_ops pptp_chan_ops; static const struct proto_ops pptp_ops; -#define PPP_LCP_ECHOREQ 0x09 -#define PPP_LCP_ECHOREP 0x0A -#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) - -#define MISSING_WINDOW 20 -#define WRAPPED(curseq, lastseq)\ - ((((curseq) & 0xffffff00) == 0) &&\ - (((lastseq) & 0xffffff00) == 0xffffff00)) - -#define PPTP_GRE_PROTO 0x880B -#define PPTP_GRE_VER 0x1 - -#define PPTP_GRE_FLAG_C 0x80 -#define PPTP_GRE_FLAG_R 0x40 -#define PPTP_GRE_FLAG_K 0x20 -#define PPTP_GRE_FLAG_S 0x10 -#define PPTP_GRE_FLAG_A 0x80 - -#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) -#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) -#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) -#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) -#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) - -#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) -struct pptp_gre_header { - u8 flags; - u8 ver; - __be16 protocol; - __be16 payload_len; - __be16 call_id; - __be32 seq; - __be32 ack; -} __packed; - static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) { struct pppox_sock *sock; @@ -240,16 +206,14 @@ static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) skb_push(skb, header_len); hdr = (struct pptp_gre_header *)(skb->data); - hdr->flags = PPTP_GRE_FLAG_K; - hdr->ver = PPTP_GRE_VER; - hdr->protocol = htons(PPTP_GRE_PROTO); - hdr->call_id = htons(opt->dst_addr.call_id); + hdr->gre_hd.flags = GRE_KEY | GRE_VERSION_1 | GRE_SEQ; + hdr->gre_hd.protocol = GRE_PROTO_PPP; + hdr->call_id = htons(opt->dst_addr.call_id); - hdr->flags |= PPTP_GRE_FLAG_S; - hdr->seq = htonl(++opt->seq_sent); + hdr->seq = htonl(++opt->seq_sent); if (opt->ack_sent != seq_recv) { /* send ack with this message */ - hdr->ver |= PPTP_GRE_FLAG_A; + hdr->gre_hd.flags |= GRE_ACK; hdr->ack = htonl(seq_recv); opt->ack_sent = seq_recv; } @@ -312,7 +276,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize = sizeof(*header); /* test if acknowledgement present */ - if (PPTP_GRE_IS_A(header->ver)) { + if (GRE_IS_ACK(header->gre_hd.flags)) { __u32 ack; if (!pskb_may_pull(skb, headersize)) @@ -320,7 +284,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) header = (struct pptp_gre_header *)(skb->data); /* ack in different place if S = 0 */ - ack = PPTP_GRE_IS_S(header->flags) ? header->ack : header->seq; + ack = GRE_IS_SEQ(header->gre_hd.flags) ? header->ack : header->seq; ack = ntohl(ack); @@ -333,7 +297,7 @@ static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) headersize -= sizeof(header->ack); } /* test if payload present */ - if (!PPTP_GRE_IS_S(header->flags)) + if (!GRE_IS_SEQ(header->gre_hd.flags)) goto drop; payload_len = ntohs(header->payload_len); @@ -394,11 +358,11 @@ static int pptp_rcv(struct sk_buff *skb) header = (struct pptp_gre_header *)skb->data; - if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ - PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ - PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ - !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ - (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + if (header->gre_hd.protocol != GRE_PROTO_PPP || /* PPTP-GRE protocol for PPTP */ + GRE_IS_CSUM(header->gre_hd.flags) || /* flag CSUM should be clear */ + GRE_IS_ROUTING(header->gre_hd.flags) || /* flag ROUTING should be clear */ + !GRE_IS_KEY(header->gre_hd.flags) || /* flag KEY should be set */ + (header->gre_hd.flags & GRE_FLAGS)) /* flag Recursion Ctrl should be clear */ /* if invalid, discard this packet */ goto drop; diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 6f9df375c5d4..8093e39ae263 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -731,14 +731,9 @@ static int update_filter(struct tap_filter *filter, void __user *arg) } alen = ETH_ALEN * uf.count; - addr = kmalloc(alen, GFP_KERNEL); - if (!addr) - return -ENOMEM; - - if (copy_from_user(addr, arg + sizeof(uf), alen)) { - err = -EFAULT; - goto done; - } + addr = memdup_user(arg + sizeof(uf), alen); + if (IS_ERR(addr)) + return PTR_ERR(addr); /* The filter is updated without holding any locks. Which is * perfectly safe. We disable it first and in the worst @@ -758,7 +753,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg) for (; n < uf.count; n++) { if (!is_multicast_ether_addr(addr[n].u)) { err = 0; /* no filter */ - goto done; + goto free_addr; } addr_hash_set(filter->mask, addr[n].u); } @@ -774,8 +769,7 @@ static int update_filter(struct tap_filter *filter, void __user *arg) /* Return the number of exact filters */ err = nexact; - -done: +free_addr: kfree(addr); return err; } diff --git a/drivers/net/usb/asix.h b/drivers/net/usb/asix.h index a2d3ea6efb20..d1092421aaa7 100644 --- a/drivers/net/usb/asix.h +++ b/drivers/net/usb/asix.h @@ -46,6 +46,7 @@ #define AX_CMD_SET_SW_MII 0x06 #define AX_CMD_READ_MII_REG 0x07 #define AX_CMD_WRITE_MII_REG 0x08 +#define AX_CMD_STATMNGSTS_REG 0x09 #define AX_CMD_SET_HW_MII 0x0a #define AX_CMD_READ_EEPROM 0x0b #define AX_CMD_WRITE_EEPROM 0x0c @@ -71,6 +72,17 @@ #define AX_CMD_SW_RESET 0x20 #define AX_CMD_SW_PHY_STATUS 0x21 #define AX_CMD_SW_PHY_SELECT 0x22 +#define AX_QCTCTRL 0x2A + +#define AX_CHIPCODE_MASK 0x70 +#define AX_AX88772_CHIPCODE 0x00 +#define AX_AX88772A_CHIPCODE 0x10 +#define AX_AX88772B_CHIPCODE 0x20 +#define AX_HOST_EN 0x01 + +#define AX_PHYSEL_PSEL 0x01 +#define AX_PHYSEL_SSMII 0 +#define AX_PHYSEL_SSEN 0x10 #define AX_PHY_SELECT_MASK (BIT(3) | BIT(2)) #define AX_PHY_SELECT_INTERNAL 0 @@ -173,6 +185,10 @@ struct asix_rx_fixup_info { }; struct asix_common_private { + void (*resume)(struct usbnet *dev); + void (*suspend)(struct usbnet *dev); + u16 presvd_phy_advertise; + u16 presvd_phy_bmcr; struct asix_rx_fixup_info rx_fixup_info; }; @@ -182,10 +198,10 @@ extern const struct driver_info ax88172a_info; #define FLAG_EEPROM_MAC (1UL << 0) /* init device MAC from eeprom */ int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data); + u16 size, void *data, int in_pm); int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data); + u16 size, void *data, int in_pm); void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, u16 size, void *data); @@ -197,27 +213,31 @@ int asix_rx_fixup_common(struct usbnet *dev, struct sk_buff *skb); struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, gfp_t flags); -int asix_set_sw_mii(struct usbnet *dev); -int asix_set_hw_mii(struct usbnet *dev); +int asix_set_sw_mii(struct usbnet *dev, int in_pm); +int asix_set_hw_mii(struct usbnet *dev, int in_pm); int asix_read_phy_addr(struct usbnet *dev, int internal); int asix_get_phy_addr(struct usbnet *dev); -int asix_sw_reset(struct usbnet *dev, u8 flags); +int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm); -u16 asix_read_rx_ctl(struct usbnet *dev); -int asix_write_rx_ctl(struct usbnet *dev, u16 mode); +u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm); +int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm); -u16 asix_read_medium_status(struct usbnet *dev); -int asix_write_medium_mode(struct usbnet *dev, u16 mode); +u16 asix_read_medium_status(struct usbnet *dev, int in_pm); +int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm); -int asix_write_gpio(struct usbnet *dev, u16 value, int sleep); +int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm); void asix_set_multicast(struct net_device *net); int asix_mdio_read(struct net_device *netdev, int phy_id, int loc); void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val); +int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc); +void asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, + int val); + void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo); int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo); diff --git a/drivers/net/usb/asix_common.c b/drivers/net/usb/asix_common.c index 7de5ab589e4e..f79eb12c326a 100644 --- a/drivers/net/usb/asix_common.c +++ b/drivers/net/usb/asix_common.c @@ -22,24 +22,49 @@ #include "asix.h" int asix_read_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data) + u16 size, void *data, int in_pm) { int ret; - ret = usbnet_read_cmd(dev, cmd, - USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, data, size); + int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_read_cmd; + else + fn = usbnet_read_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_IN | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to read reg index 0x%04x: %d\n", + index, ret); - if (ret != size && ret >= 0) - return -EINVAL; return ret; } int asix_write_cmd(struct usbnet *dev, u8 cmd, u16 value, u16 index, - u16 size, void *data) + u16 size, void *data, int in_pm) { - return usbnet_write_cmd(dev, cmd, - USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, - value, index, data, size); + int ret; + int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16); + + BUG_ON(!dev); + + if (!in_pm) + fn = usbnet_write_cmd; + else + fn = usbnet_write_cmd_nopm; + + ret = fn(dev, cmd, USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE, + value, index, data, size); + + if (unlikely(ret < 0)) + netdev_warn(dev->net, "Failed to write reg index 0x%04x: %d\n", + index, ret); + + return ret; } void asix_write_cmd_async(struct usbnet *dev, u8 cmd, u16 value, u16 index, @@ -225,19 +250,20 @@ struct sk_buff *asix_tx_fixup(struct usbnet *dev, struct sk_buff *skb, return skb; } -int asix_set_sw_mii(struct usbnet *dev) +int asix_set_sw_mii(struct usbnet *dev, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SET_SW_MII, 0x0000, 0, 0, NULL, in_pm); + if (ret < 0) netdev_err(dev->net, "Failed to enable software MII access\n"); return ret; } -int asix_set_hw_mii(struct usbnet *dev) +int asix_set_hw_mii(struct usbnet *dev, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SET_HW_MII, 0x0000, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to enable hardware MII access\n"); return ret; @@ -247,7 +273,7 @@ int asix_read_phy_addr(struct usbnet *dev, int internal) { int offset = (internal ? 1 : 0); u8 buf[2]; - int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf); + int ret = asix_read_cmd(dev, AX_CMD_READ_PHY_ID, 0, 0, 2, buf, 0); netdev_dbg(dev->net, "asix_get_phy_addr()\n"); @@ -270,21 +296,21 @@ int asix_get_phy_addr(struct usbnet *dev) } -int asix_sw_reset(struct usbnet *dev, u8 flags) +int asix_sw_reset(struct usbnet *dev, u8 flags, int in_pm) { int ret; - ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_SW_RESET, flags, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to send software reset: %02x\n", ret); return ret; } -u16 asix_read_rx_ctl(struct usbnet *dev) +u16 asix_read_rx_ctl(struct usbnet *dev, int in_pm) { __le16 v; - int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v); + int ret = asix_read_cmd(dev, AX_CMD_READ_RX_CTL, 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading RX_CTL register: %02x\n", ret); @@ -295,12 +321,12 @@ out: return ret; } -int asix_write_rx_ctl(struct usbnet *dev, u16 mode) +int asix_write_rx_ctl(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_rx_ctl() - mode = 0x%04x\n", mode); - ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_RX_CTL, mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write RX_CTL mode to 0x%04x: %02x\n", mode, ret); @@ -308,10 +334,11 @@ int asix_write_rx_ctl(struct usbnet *dev, u16 mode) return ret; } -u16 asix_read_medium_status(struct usbnet *dev) +u16 asix_read_medium_status(struct usbnet *dev, int in_pm) { __le16 v; - int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, 0, 0, 2, &v); + int ret = asix_read_cmd(dev, AX_CMD_READ_MEDIUM_STATUS, + 0, 0, 2, &v, in_pm); if (ret < 0) { netdev_err(dev->net, "Error reading Medium Status register: %02x\n", @@ -323,12 +350,13 @@ u16 asix_read_medium_status(struct usbnet *dev) } -int asix_write_medium_mode(struct usbnet *dev, u16 mode) +int asix_write_medium_mode(struct usbnet *dev, u16 mode, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_medium_mode() - mode = 0x%04x\n", mode); - ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, mode, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_MEDIUM_MODE, + mode, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write Medium Mode mode to 0x%04x: %02x\n", mode, ret); @@ -336,12 +364,12 @@ int asix_write_medium_mode(struct usbnet *dev, u16 mode) return ret; } -int asix_write_gpio(struct usbnet *dev, u16 value, int sleep) +int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm) { int ret; netdev_dbg(dev->net, "asix_write_gpio() - value = 0x%04x\n", value); - ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, value, 0, 0, NULL, in_pm); if (ret < 0) netdev_err(dev->net, "Failed to write GPIO value 0x%04x: %02x\n", value, ret); @@ -398,16 +426,31 @@ int asix_mdio_read(struct net_device *netdev, int phy_id, int loc) { struct usbnet *dev = netdev_priv(netdev); __le16 res; + u8 smsr; + int i = 0; + int ret; mutex_lock(&dev->phy_mutex); - asix_set_sw_mii(dev); + do { + ret = asix_set_sw_mii(dev, 0); + if (ret == -ENODEV) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return ret; + } + asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, - (__u16)loc, 2, &res); - asix_set_hw_mii(dev); + (__u16)loc, 2, &res, 0); + asix_set_hw_mii(dev, 0); mutex_unlock(&dev->phy_mutex); netdev_dbg(dev->net, "asix_mdio_read() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", - phy_id, loc, le16_to_cpu(res)); + phy_id, loc, le16_to_cpu(res)); return le16_to_cpu(res); } @@ -416,13 +459,95 @@ void asix_mdio_write(struct net_device *netdev, int phy_id, int loc, int val) { struct usbnet *dev = netdev_priv(netdev); __le16 res = cpu_to_le16(val); + u8 smsr; + int i = 0; + int ret; netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", - phy_id, loc, val); + phy_id, loc, val); + mutex_lock(&dev->phy_mutex); - asix_set_sw_mii(dev); - asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, (__u16)loc, 2, &res); - asix_set_hw_mii(dev); + do { + ret = asix_set_sw_mii(dev, 0); + if (ret == -ENODEV) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 0); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return; + } + + asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, + (__u16)loc, 2, &res, 0); + asix_set_hw_mii(dev, 0); + mutex_unlock(&dev->phy_mutex); +} + +int asix_mdio_read_nopm(struct net_device *netdev, int phy_id, int loc) +{ + struct usbnet *dev = netdev_priv(netdev); + __le16 res; + u8 smsr; + int i = 0; + int ret; + + mutex_lock(&dev->phy_mutex); + do { + ret = asix_set_sw_mii(dev, 1); + if (ret == -ENODEV) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return ret; + } + + asix_read_cmd(dev, AX_CMD_READ_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + asix_set_hw_mii(dev, 1); + mutex_unlock(&dev->phy_mutex); + + netdev_dbg(dev->net, "asix_mdio_read_nopm() phy_id=0x%02x, loc=0x%02x, returns=0x%04x\n", + phy_id, loc, le16_to_cpu(res)); + + return le16_to_cpu(res); +} + +void +asix_mdio_write_nopm(struct net_device *netdev, int phy_id, int loc, int val) +{ + struct usbnet *dev = netdev_priv(netdev); + __le16 res = cpu_to_le16(val); + u8 smsr; + int i = 0; + int ret; + + netdev_dbg(dev->net, "asix_mdio_write() phy_id=0x%02x, loc=0x%02x, val=0x%04x\n", + phy_id, loc, val); + + mutex_lock(&dev->phy_mutex); + do { + ret = asix_set_sw_mii(dev, 1); + if (ret == -ENODEV) + break; + usleep_range(1000, 1100); + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, + 0, 0, 1, &smsr, 1); + } while (!(smsr & AX_HOST_EN) && (i++ < 30) && (ret != -ENODEV)); + if (ret == -ENODEV) { + mutex_unlock(&dev->phy_mutex); + return; + } + + asix_write_cmd(dev, AX_CMD_WRITE_MII_REG, phy_id, + (__u16)loc, 2, &res, 1); + asix_set_hw_mii(dev, 1); mutex_unlock(&dev->phy_mutex); } @@ -431,7 +556,8 @@ void asix_get_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) struct usbnet *dev = netdev_priv(net); u8 opt; - if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, 0, 0, 1, &opt) < 0) { + if (asix_read_cmd(dev, AX_CMD_READ_MONITOR_MODE, + 0, 0, 1, &opt, 0) < 0) { wolinfo->supported = 0; wolinfo->wolopts = 0; return; @@ -455,7 +581,7 @@ int asix_set_wol(struct net_device *net, struct ethtool_wolinfo *wolinfo) opt |= AX_MONITOR_MAGIC; if (asix_write_cmd(dev, AX_CMD_WRITE_MONITOR_MODE, - opt, 0, 0, NULL) < 0) + opt, 0, 0, NULL, 0) < 0) return -EINVAL; return 0; @@ -490,7 +616,7 @@ int asix_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, /* ax8817x returns 2 bytes from eeprom on read */ for (i = first_word; i <= last_word; i++) { if (asix_read_cmd(dev, AX_CMD_READ_EEPROM, i, 0, 2, - &(eeprom_buff[i - first_word])) < 0) { + &eeprom_buff[i - first_word], 0) < 0) { kfree(eeprom_buff); return -EIO; } @@ -531,7 +657,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, the EEPROM */ if (eeprom->offset & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, first_word, 0, 2, - &(eeprom_buff[0])); + &eeprom_buff[0], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word); goto free; @@ -540,7 +666,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, if ((eeprom->offset + eeprom->len) & 1) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, last_word, 0, 2, - &(eeprom_buff[last_word - first_word])); + &eeprom_buff[last_word - first_word], 0); if (ret < 0) { netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word); goto free; @@ -550,7 +676,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len); /* write data to EEPROM */ - ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to enable EEPROM write\n"); goto free; @@ -561,7 +687,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n", i, eeprom_buff[i - first_word]); ret = asix_write_cmd(dev, AX_CMD_WRITE_EEPROM, i, - eeprom_buff[i - first_word], 0, NULL); + eeprom_buff[i - first_word], 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n", i); @@ -570,7 +696,7 @@ int asix_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom, msleep(20); } - ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL); + ret = asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0x0000, 0, 0, NULL, 0); if (ret < 0) { netdev_err(net, "Failed to disable EEPROM write\n"); goto free; diff --git a/drivers/net/usb/asix_devices.c b/drivers/net/usb/asix_devices.c index 5cabefc23494..cce24950a0ab 100644 --- a/drivers/net/usb/asix_devices.c +++ b/drivers/net/usb/asix_devices.c @@ -35,6 +35,15 @@ #define PHY_MODE_RTL8211CL 0x000C +#define AX88772A_PHY14H 0x14 +#define AX88772A_PHY14H_DEFAULT 0x442C + +#define AX88772A_PHY15H 0x15 +#define AX88772A_PHY15H_DEFAULT 0x03C8 + +#define AX88772A_PHY16H 0x16 +#define AX88772A_PHY16H_DEFAULT 0x4044 + struct ax88172_int_data { __le16 res1; u8 link; @@ -79,6 +88,8 @@ static u32 asix_get_phyid(struct usbnet *dev) /* Poll for the rare case the FW or phy isn't ready yet. */ for (i = 0; i < 100; i++) { phy_reg = asix_mdio_read(dev->net, dev->mii.phy_id, MII_PHYSID1); + if (phy_reg < 0) + return 0; if (phy_reg != 0 && phy_reg != 0xFFFF) break; mdelay(1); @@ -184,7 +195,7 @@ static int ax88172_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88172_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", ethtool_cmd_speed(&ecmd), ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); return 0; } @@ -201,6 +212,28 @@ static const struct net_device_ops ax88172_netdev_ops = { .ndo_set_rx_mode = ax88172_set_multicast, }; +static void asix_phy_reset(struct usbnet *dev, unsigned int reset_bits) +{ + unsigned int timeout = 5000; + + asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, reset_bits); + + /* give phy_id a chance to process reset */ + udelay(500); + + /* See IEEE 802.3 "22.2.4.1.1 Reset": 500ms max */ + while (timeout--) { + if (asix_mdio_read(dev->net, dev->mii.phy_id, MII_BMCR) + & BMCR_RESET) + udelay(100); + else + return; + } + + netdev_err(dev->net, "BMCR_RESET timeout on phy_id %d\n", + dev->mii.phy_id); +} + static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) { int ret = 0; @@ -213,18 +246,19 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) /* Toggle the GPIOs in a manufacturer/model specific way */ for (i = 2; i >= 0; i--) { ret = asix_write_cmd(dev, AX_CMD_WRITE_GPIOS, - (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL); + (gpio_bits >> (i * 8)) & 0xff, 0, 0, NULL, 0); if (ret < 0) goto out; msleep(5); } - ret = asix_write_rx_ctl(dev, 0x80); + ret = asix_write_rx_ctl(dev, 0x80, 0); if (ret < 0) goto out; /* Get the MAC address */ - ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX88172_CMD_READ_NODE_ID, + 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_dbg(dev->net, "read AX_CMD_READ_NODE_ID failed: %d\n", ret); @@ -246,7 +280,7 @@ static int ax88172_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); + asix_phy_reset(dev, BMCR_RESET); asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); mii_nway_restart(&dev->mii); @@ -290,7 +324,7 @@ static int ax88772_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88772_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", ethtool_cmd_speed(&ecmd), ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); return 0; } @@ -298,100 +332,115 @@ static int ax88772_link_reset(struct usbnet *dev) static int ax88772_reset(struct usbnet *dev) { struct asix_data *data = (struct asix_data *)&dev->data; - int ret, embd_phy; - u16 rx_ctl; + int ret; - ret = asix_write_gpio(dev, - AX_GPIO_RSE | AX_GPIO_GPO_2 | AX_GPIO_GPO2EN, 5); + /* Rewrite MAC address */ + ether_addr_copy(data->mac_addr, dev->net->dev_addr); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, + ETH_ALEN, data->mac_addr, 0); if (ret < 0) goto out; - embd_phy = ((asix_get_phy_addr(dev) & 0x1f) == 0x10 ? 1 : 0); + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); + if (ret < 0) + goto out; - ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); + asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, 0); + if (ret < 0) + goto out; + + return 0; + +out: + return ret; +} + +static int ax88772_hw_reset(struct usbnet *dev, int in_pm) +{ + struct asix_data *data = (struct asix_data *)&dev->data; + int ret, embd_phy; + u16 rx_ctl; + + ret = asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_2 | + AX_GPIO_GPO2EN, 5, in_pm); + if (ret < 0) + goto out; + + embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + + ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, + 0, 0, NULL, in_pm); if (ret < 0) { netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); goto out; } - ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - ret = asix_sw_reset(dev, AX_SWRESET_CLEAR); - if (ret < 0) - goto out; - - msleep(150); - if (embd_phy) { - ret = asix_sw_reset(dev, AX_SWRESET_IPRL); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD, in_pm); + if (ret < 0) + goto out; + + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm); + if (ret < 0) + goto out; + + msleep(60); + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL, + in_pm); if (ret < 0) goto out; } else { - ret = asix_sw_reset(dev, AX_SWRESET_PRTE); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_PRL, + in_pm); if (ret < 0) goto out; } msleep(150); - rx_ctl = asix_read_rx_ctl(dev); - netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); - ret = asix_write_rx_ctl(dev, 0x0000); + + if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + MII_PHYSID1))){ + ret = -EIO; + goto out; + } + + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); - netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); - - ret = asix_sw_reset(dev, AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - ret = asix_sw_reset(dev, AX_SWRESET_IPRL | AX_SWRESET_PRL); - if (ret < 0) - goto out; - - msleep(150); - - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, BMCR_RESET); - asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, - ADVERTISE_ALL | ADVERTISE_CSMA); - mii_nway_restart(&dev->mii); - - ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT); + ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm); if (ret < 0) goto out; ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, - AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, - AX88772_IPG2_DEFAULT, 0, NULL); + AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, + AX88772_IPG2_DEFAULT, 0, NULL, in_pm); if (ret < 0) { netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); goto out; } /* Rewrite MAC address */ - memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); - ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + ether_addr_copy(data->mac_addr, dev->net->dev_addr); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, + ETH_ALEN, data->mac_addr, in_pm); if (ret < 0) goto out; /* Set RX_CTL to default values with 2k buffer, and enable cactus */ - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, in_pm); netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", rx_ctl); - rx_ctl = asix_read_medium_status(dev); + rx_ctl = asix_read_medium_status(dev, in_pm); netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n", rx_ctl); @@ -400,7 +449,140 @@ static int ax88772_reset(struct usbnet *dev) out: return ret; +} +static int ax88772a_hw_reset(struct usbnet *dev, int in_pm) +{ + struct asix_data *data = (struct asix_data *)&dev->data; + int ret, embd_phy; + u16 rx_ctl, phy14h, phy15h, phy16h; + u8 chipcode = 0; + + ret = asix_write_gpio(dev, AX_GPIO_RSE, 5, in_pm); + if (ret < 0) + goto out; + + embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + + ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy | + AX_PHYSEL_SSEN, 0, 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); + goto out; + } + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_IPPD | AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + usleep_range(10000, 11000); + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + msleep(160); + + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, in_pm); + if (ret < 0) + goto out; + + ret = asix_sw_reset(dev, AX_SWRESET_IPRL, in_pm); + if (ret < 0) + goto out; + + msleep(200); + + if (in_pm && (!asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + MII_PHYSID1))) { + ret = -1; + goto out; + } + + ret = asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, + 0, 1, &chipcode, in_pm); + if (ret < 0) + goto out; + + if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772B_CHIPCODE) { + ret = asix_write_cmd(dev, AX_QCTCTRL, 0x8000, 0x8001, + 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Write BQ setting failed: %d\n", + ret); + goto out; + } + } else if ((chipcode & AX_CHIPCODE_MASK) == AX_AX88772A_CHIPCODE) { + /* Check if the PHY registers have default settings */ + phy14h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY14H); + phy15h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY15H); + phy16h = asix_mdio_read_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY16H); + + netdev_dbg(dev->net, + "772a_hw_reset: MR20=0x%x MR21=0x%x MR22=0x%x\n", + phy14h, phy15h, phy16h); + + /* Restore PHY registers default setting if not */ + if (phy14h != AX88772A_PHY14H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY14H, + AX88772A_PHY14H_DEFAULT); + if (phy15h != AX88772A_PHY15H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY15H, + AX88772A_PHY15H_DEFAULT); + if (phy16h != AX88772A_PHY16H_DEFAULT) + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, + AX88772A_PHY16H, + AX88772A_PHY16H_DEFAULT); + } + + ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, + AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, + AX88772_IPG2_DEFAULT, 0, NULL, in_pm); + if (ret < 0) { + netdev_dbg(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); + goto out; + } + + /* Rewrite MAC address */ + memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); + ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, + data->mac_addr, in_pm); + if (ret < 0) + goto out; + + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); + if (ret < 0) + goto out; + + ret = asix_write_medium_mode(dev, AX88772_MEDIUM_DEFAULT, in_pm); + if (ret < 0) + return ret; + + /* Set RX_CTL to default values with 2k buffer, and enable cactus */ + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, in_pm); + if (ret < 0) + goto out; + + rx_ctl = asix_read_rx_ctl(dev, in_pm); + netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", + rx_ctl); + + rx_ctl = asix_read_medium_status(dev, in_pm); + netdev_dbg(dev->net, + "Medium Status is 0x%04x after all initializations\n", + rx_ctl); + + return 0; + +out: + return ret; } static const struct net_device_ops ax88772_netdev_ops = { @@ -415,11 +597,97 @@ static const struct net_device_ops ax88772_netdev_ops = { .ndo_set_rx_mode = asix_set_multicast, }; +static void ax88772_suspend(struct usbnet *dev) +{ + struct asix_common_private *priv = dev->driver_priv; + u16 medium; + + /* Stop MAC operation */ + medium = asix_read_medium_status(dev, 0); + medium &= ~AX_MEDIUM_RE; + asix_write_medium_mode(dev, medium, 0); + + netdev_dbg(dev->net, "ax88772_suspend: medium=0x%04x\n", + asix_read_medium_status(dev, 0)); + + /* Preserve BMCR for restoring */ + priv->presvd_phy_bmcr = + asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_BMCR); + + /* Preserve ANAR for restoring */ + priv->presvd_phy_advertise = + asix_mdio_read_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE); +} + +static int asix_suspend(struct usb_interface *intf, pm_message_t message) +{ + struct usbnet *dev = usb_get_intfdata(intf); + struct asix_common_private *priv = dev->driver_priv; + + if (priv->suspend) + priv->suspend(dev); + + return usbnet_suspend(intf, message); +} + +static void ax88772_restore_phy(struct usbnet *dev) +{ + struct asix_common_private *priv = dev->driver_priv; + + if (priv->presvd_phy_advertise) { + /* Restore Advertisement control reg */ + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_ADVERTISE, + priv->presvd_phy_advertise); + + /* Restore BMCR */ + asix_mdio_write_nopm(dev->net, dev->mii.phy_id, MII_BMCR, + priv->presvd_phy_bmcr); + + mii_nway_restart(&dev->mii); + priv->presvd_phy_advertise = 0; + priv->presvd_phy_bmcr = 0; + } +} + +static void ax88772_resume(struct usbnet *dev) +{ + int i; + + for (i = 0; i < 3; i++) + if (!ax88772_hw_reset(dev, 1)) + break; + ax88772_restore_phy(dev); +} + +static void ax88772a_resume(struct usbnet *dev) +{ + int i; + + for (i = 0; i < 3; i++) { + if (!ax88772a_hw_reset(dev, 1)) + break; + } + + ax88772_restore_phy(dev); +} + +static int asix_resume(struct usb_interface *intf) +{ + struct usbnet *dev = usb_get_intfdata(intf); + struct asix_common_private *priv = dev->driver_priv; + + if (priv->resume) + priv->resume(dev); + + return usbnet_resume(intf); +} + static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) { - int ret, embd_phy, i; - u8 buf[ETH_ALEN]; + int ret, i; + u8 buf[ETH_ALEN], chipcode = 0; u32 phyid; + struct asix_common_private *priv; usbnet_get_endpoints(dev,intf); @@ -427,13 +695,13 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) if (dev->driver_info->data & FLAG_EEPROM_MAC) { for (i = 0; i < (ETH_ALEN >> 1); i++) { ret = asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x04 + i, - 0, 2, buf + i * 2); + 0, 2, buf + i * 2, 0); if (ret < 0) break; } } else { ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, - 0, 0, ETH_ALEN, buf); + 0, 0, ETH_ALEN, buf, 0); } if (ret < 0) { @@ -456,16 +724,11 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->needed_headroom = 4; /* cf asix_tx_fixup() */ dev->net->needed_tailroom = 4; /* cf asix_tx_fixup() */ - embd_phy = ((dev->mii.phy_id & 0x1f) == 0x10 ? 1 : 0); + asix_read_cmd(dev, AX_CMD_STATMNGSTS_REG, 0, 0, 1, &chipcode, 0); + chipcode &= AX_CHIPCODE_MASK; - /* Reset the PHY to normal operation mode */ - ret = asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, embd_phy, 0, 0, NULL); - if (ret < 0) { - netdev_dbg(dev->net, "Select PHY #1 failed: %d\n", ret); - return ret; - } - - ax88772_reset(dev); + (chipcode == AX_AX88772_CHIPCODE) ? ax88772_hw_reset(dev, 0) : + ax88772a_hw_reset(dev, 0); /* Read PHYID register *AFTER* the PHY was reset properly */ phyid = asix_get_phyid(dev); @@ -482,6 +745,18 @@ static int ax88772_bind(struct usbnet *dev, struct usb_interface *intf) if (!dev->driver_priv) return -ENOMEM; + priv = dev->driver_priv; + + priv->presvd_phy_bmcr = 0; + priv->presvd_phy_advertise = 0; + if (chipcode == AX_AX88772_CHIPCODE) { + priv->resume = ax88772_resume; + priv->suspend = ax88772_suspend; + } else { + priv->resume = ax88772a_resume; + priv->suspend = ax88772_suspend; + } + return 0; } @@ -593,12 +868,12 @@ static int ax88178_reset(struct usbnet *dev) int gpio0 = 0; u32 phyid; - asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status); + asix_read_cmd(dev, AX_CMD_READ_GPIOS, 0, 0, 1, &status, 0); netdev_dbg(dev->net, "GPIO Status: 0x%04x\n", status); - asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL); - asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom); - asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL); + asix_write_cmd(dev, AX_CMD_WRITE_ENABLE, 0, 0, 0, NULL, 0); + asix_read_cmd(dev, AX_CMD_READ_EEPROM, 0x0017, 0, 2, &eeprom, 0); + asix_write_cmd(dev, AX_CMD_WRITE_DISABLE, 0, 0, 0, NULL, 0); netdev_dbg(dev->net, "EEPROM index 0x17 is 0x%04x\n", eeprom); @@ -614,15 +889,16 @@ static int ax88178_reset(struct usbnet *dev) netdev_dbg(dev->net, "GPIO0: %d, PhyMode: %d\n", gpio0, data->phymode); /* Power up external GigaPHY through AX88178 GPIO pin */ - asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | AX_GPIO_GPO1EN, 40); + asix_write_gpio(dev, AX_GPIO_RSE | AX_GPIO_GPO_1 | + AX_GPIO_GPO1EN, 40, 0); if ((le16_to_cpu(eeprom) >> 8) != 1) { - asix_write_gpio(dev, 0x003c, 30); - asix_write_gpio(dev, 0x001c, 300); - asix_write_gpio(dev, 0x003c, 30); + asix_write_gpio(dev, 0x003c, 30, 0); + asix_write_gpio(dev, 0x001c, 300, 0); + asix_write_gpio(dev, 0x003c, 30, 0); } else { netdev_dbg(dev->net, "gpio phymode == 1 path\n"); - asix_write_gpio(dev, AX_GPIO_GPO1EN, 30); - asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30); + asix_write_gpio(dev, AX_GPIO_GPO1EN, 30, 0); + asix_write_gpio(dev, AX_GPIO_GPO1EN | AX_GPIO_GPO_1, 30, 0); } /* Read PHYID register *AFTER* powering up PHY */ @@ -630,15 +906,15 @@ static int ax88178_reset(struct usbnet *dev) netdev_dbg(dev->net, "PHYID=0x%08x\n", phyid); /* Set AX88178 to enable MII/GMII/RGMII interface for external PHY */ - asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL); + asix_write_cmd(dev, AX_CMD_SW_PHY_SELECT, 0, 0, 0, NULL, 0); - asix_sw_reset(dev, 0); + asix_sw_reset(dev, 0, 0); msleep(150); - asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD); + asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0); msleep(150); - asix_write_rx_ctl(dev, 0); + asix_write_rx_ctl(dev, 0, 0); if (data->phymode == PHY_MODE_MARVELL) { marvell_phy_init(dev); @@ -646,27 +922,23 @@ static int ax88178_reset(struct usbnet *dev) } else if (data->phymode == PHY_MODE_RTL8211CL) rtl8211cl_phy_init(dev); - asix_mdio_write(dev->net, dev->mii.phy_id, MII_BMCR, - BMCR_RESET | BMCR_ANENABLE); + asix_phy_reset(dev, BMCR_RESET | BMCR_ANENABLE); asix_mdio_write(dev->net, dev->mii.phy_id, MII_ADVERTISE, ADVERTISE_ALL | ADVERTISE_CSMA | ADVERTISE_PAUSE_CAP); asix_mdio_write(dev->net, dev->mii.phy_id, MII_CTRL1000, ADVERTISE_1000FULL); + asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT, 0); mii_nway_restart(&dev->mii); - ret = asix_write_medium_mode(dev, AX88178_MEDIUM_DEFAULT); - if (ret < 0) - return ret; - /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + data->mac_addr, 0); if (ret < 0) return ret; - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); if (ret < 0) return ret; @@ -704,7 +976,7 @@ static int ax88178_link_reset(struct usbnet *dev) netdev_dbg(dev->net, "ax88178_link_reset() speed: %u duplex: %d setting mode to 0x%04x\n", speed, ecmd.duplex, mode); - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); if (data->phymode == PHY_MODE_MARVELL && data->ledmode) marvell_led_status(dev, speed); @@ -733,15 +1005,15 @@ static void ax88178_set_mfb(struct usbnet *dev) mfb = AX_RX_CTL_MFB_16384; } - rxctl = asix_read_rx_ctl(dev); - asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb); + rxctl = asix_read_rx_ctl(dev, 0); + asix_write_rx_ctl(dev, (rxctl & ~AX_RX_CTL_MFB_16384) | mfb, 0); - medium = asix_read_medium_status(dev); + medium = asix_read_medium_status(dev, 0); if (dev->net->mtu > 1500) medium |= AX_MEDIUM_JFE; else medium &= ~AX_MEDIUM_JFE; - asix_write_medium_mode(dev, medium); + asix_write_medium_mode(dev, medium, 0); if (dev->rx_urb_size > old_rx_urb_size) usbnet_unlink_rx_urbs(dev); @@ -790,7 +1062,7 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) usbnet_get_endpoints(dev,intf); /* Get the MAC address */ - ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_dbg(dev->net, "Failed to read MAC address: %d\n", ret); return ret; @@ -811,10 +1083,10 @@ static int ax88178_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &ax88178_ethtool_ops; /* Blink LEDS so users know driver saw dongle */ - asix_sw_reset(dev, 0); + asix_sw_reset(dev, 0, 0); msleep(150); - asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD); + asix_sw_reset(dev, AX_SWRESET_PRL | AX_SWRESET_IPPD, 0); msleep(150); /* Asix framing packs multiple eth frames into a 2K usb bulk transfer */ @@ -877,7 +1149,7 @@ static const struct driver_info ax88772_info = { .unbind = ax88772_unbind, .status = asix_status, .link_reset = ax88772_link_reset, - .reset = ax88772_link_reset, + .reset = ax88772_reset, .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_LINK_INTR | FLAG_MULTI_PACKET, .rx_fixup = asix_rx_fixup_common, .tx_fixup = asix_tx_fixup, @@ -1005,7 +1277,7 @@ static const struct usb_device_id products [] = { }, { // Lenovo U2L100P 10/100 USB_DEVICE (0x17ef, 0x7203), - .driver_info = (unsigned long) &ax88772_info, + .driver_info = (unsigned long)&ax88772b_info, }, { // ASIX AX88772B 10/100 USB_DEVICE (0x0b95, 0x772b), @@ -1073,7 +1345,7 @@ static const struct usb_device_id products [] = { }, { // Asus USB Ethernet Adapter USB_DEVICE (0x0b95, 0x7e2b), - .driver_info = (unsigned long) &ax88772_info, + .driver_info = (unsigned long)&ax88772b_info, }, { /* ASIX 88172a demo board */ USB_DEVICE(0x0b95, 0x172a), @@ -1095,8 +1367,8 @@ static struct usb_driver asix_driver = { .name = DRIVER_NAME, .id_table = products, .probe = usbnet_probe, - .suspend = usbnet_suspend, - .resume = usbnet_resume, + .suspend = asix_suspend, + .resume = asix_resume, .disconnect = usbnet_disconnect, .supports_autosuspend = 1, .disable_hub_initiated_lpm = 1, diff --git a/drivers/net/usb/ax88172a.c b/drivers/net/usb/ax88172a.c index 163a2c576e69..49a3bc107d05 100644 --- a/drivers/net/usb/ax88172a.c +++ b/drivers/net/usb/ax88172a.c @@ -81,7 +81,7 @@ static void ax88172a_adjust_link(struct net_device *netdev) } if (mode != priv->oldmode) { - asix_write_medium_mode(dev, mode); + asix_write_medium_mode(dev, mode, 0); priv->oldmode = mode; netdev_dbg(netdev, "speed %u duplex %d, setting mode to 0x%04x\n", phydev->speed, phydev->duplex, mode); @@ -176,18 +176,19 @@ static int ax88172a_reset_phy(struct usbnet *dev, int embd_phy) { int ret; - ret = asix_sw_reset(dev, AX_SWRESET_IPPD); + ret = asix_sw_reset(dev, AX_SWRESET_IPPD, 0); if (ret < 0) goto err; msleep(150); - ret = asix_sw_reset(dev, AX_SWRESET_CLEAR); + ret = asix_sw_reset(dev, AX_SWRESET_CLEAR, 0); if (ret < 0) goto err; msleep(150); - ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD); + ret = asix_sw_reset(dev, embd_phy ? AX_SWRESET_IPRL : AX_SWRESET_IPPD, + 0); if (ret < 0) goto err; @@ -213,7 +214,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) dev->driver_priv = priv; /* Get the MAC address */ - ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf); + ret = asix_read_cmd(dev, AX_CMD_READ_NODE_ID, 0, 0, ETH_ALEN, buf, 0); if (ret < 0) { netdev_err(dev->net, "Failed to read MAC address: %d\n", ret); goto free; @@ -224,7 +225,7 @@ static int ax88172a_bind(struct usbnet *dev, struct usb_interface *intf) dev->net->ethtool_ops = &ax88172a_ethtool_ops; /* are we using the internal or the external phy? */ - ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf); + ret = asix_read_cmd(dev, AX_CMD_SW_PHY_STATUS, 0, 0, 1, buf, 0); if (ret < 0) { netdev_err(dev->net, "Failed to read software interface selection register: %d\n", ret); @@ -303,20 +304,20 @@ static int ax88172a_reset(struct usbnet *dev) ax88172a_reset_phy(dev, priv->use_embdphy); msleep(150); - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x after software reset\n", rx_ctl); - ret = asix_write_rx_ctl(dev, 0x0000); + ret = asix_write_rx_ctl(dev, 0x0000, 0); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x setting to 0x0000\n", rx_ctl); msleep(150); ret = asix_write_cmd(dev, AX_CMD_WRITE_IPG0, AX88772_IPG0_DEFAULT | AX88772_IPG1_DEFAULT, - AX88772_IPG2_DEFAULT, 0, NULL); + AX88772_IPG2_DEFAULT, 0, NULL, 0); if (ret < 0) { netdev_err(dev->net, "Write IPG,IPG1,IPG2 failed: %d\n", ret); goto out; @@ -325,20 +326,20 @@ static int ax88172a_reset(struct usbnet *dev) /* Rewrite MAC address */ memcpy(data->mac_addr, dev->net->dev_addr, ETH_ALEN); ret = asix_write_cmd(dev, AX_CMD_WRITE_NODE_ID, 0, 0, ETH_ALEN, - data->mac_addr); + data->mac_addr, 0); if (ret < 0) goto out; /* Set RX_CTL to default values with 2k buffer, and enable cactus */ - ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL); + ret = asix_write_rx_ctl(dev, AX_DEFAULT_RX_CTL, 0); if (ret < 0) goto out; - rx_ctl = asix_read_rx_ctl(dev); + rx_ctl = asix_read_rx_ctl(dev, 0); netdev_dbg(dev->net, "RX_CTL is 0x%04x after all initializations\n", rx_ctl); - rx_ctl = asix_read_medium_status(dev); + rx_ctl = asix_read_medium_status(dev, 0); netdev_dbg(dev->net, "Medium Status is 0x%04x after all initializations\n", rx_ctl); diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index 4b4458616693..e7b516342678 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -50,6 +50,8 @@ * *****************************************************************************/ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -108,23 +110,12 @@ /*****************************************************************************/ /* Debugging functions */ /*****************************************************************************/ -#define D__(lvl_, fmt, arg...) \ - do { \ - printk(lvl_ "[%d:%s]: " fmt "\n", \ - __LINE__, __func__, ## arg); \ - } while (0) - -#define D_(lvl, args...) \ - do { \ - if (lvl & debug) \ - D__(KERN_INFO, args); \ - } while (0) - -#define D1(args...) D_(0x01, ##args) -#define D2(args...) D_(0x02, ##args) -#define D3(args...) D_(0x04, ##args) -#define D4(args...) D_(0x08, ##args) -#define D5(args...) D_(0x10, ##args) +#define hso_dbg(lvl, fmt, ...) \ +do { \ + if ((lvl) & debug) \ + pr_info("[%d:%s] " fmt, \ + __LINE__, __func__, ##__VA_ARGS__); \ +} while (0) /*****************************************************************************/ /* Enumerators */ @@ -649,7 +640,7 @@ static int get_free_serial_index(void) } spin_unlock_irqrestore(&serial_table_lock, flags); - printk(KERN_ERR "%s: no free serial devices in table\n", __func__); + pr_err("%s: no free serial devices in table\n", __func__); return -1; } @@ -709,7 +700,8 @@ static void handle_usb_error(int status, const char *function, } /* log a meaningful explanation of an USB status */ - D1("%s: received USB status - %s (%d)", function, explanation, status); + hso_dbg(0x1, "%s: received USB status - %s (%d)\n", + function, explanation, status); } /* Network interface functions */ @@ -808,7 +800,7 @@ static netdev_tx_t hso_net_start_xmit(struct sk_buff *skb, DUMP1(skb->data, skb->len); /* Copy it from kernel memory to OUR memory */ memcpy(odev->mux_bulk_tx_buf, skb->data, skb->len); - D1("len: %d/%d", skb->len, MUX_BULK_TX_BUF_SIZE); + hso_dbg(0x1, "len: %d/%d\n", skb->len, MUX_BULK_TX_BUF_SIZE); /* Fill in the URB for shipping it out. */ usb_fill_bulk_urb(odev->mux_bulk_tx_urb, @@ -872,7 +864,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, unsigned char *tmp_rx_buf; /* log if needed */ - D1("Rx %d bytes", count); + hso_dbg(0x1, "Rx %d bytes\n", count); DUMP(ip_pkt, min(128, (int)count)); while (count) { @@ -912,7 +904,7 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, frame_len); if (!odev->skb_rx_buf) { /* We got no receive buffer. */ - D1("could not allocate memory"); + hso_dbg(0x1, "could not allocate memory\n"); odev->rx_parse_state = WAIT_SYNC; continue; } @@ -972,11 +964,11 @@ static void packetizeRx(struct hso_net *odev, unsigned char *ip_pkt, break; case WAIT_SYNC: - D1(" W_S"); + hso_dbg(0x1, " W_S\n"); count = 0; break; default: - D1(" "); + hso_dbg(0x1, "\n"); count--; break; } @@ -1020,7 +1012,7 @@ static void read_bulk_callback(struct urb *urb) /* Sanity check */ if (!odev || !test_bit(HSO_NET_RUNNING, &odev->flags)) { - D1("BULK IN callback but driver is not active!"); + hso_dbg(0x1, "BULK IN callback but driver is not active!\n"); return; } usb_mark_last_busy(urb->dev); @@ -1112,11 +1104,11 @@ static void _hso_serial_set_termios(struct tty_struct *tty, struct hso_serial *serial = tty->driver_data; if (!serial) { - printk(KERN_ERR "%s: no tty structures", __func__); + pr_err("%s: no tty structures", __func__); return; } - D4("port %d", serial->minor); + hso_dbg(0x8, "port %d\n", serial->minor); /* * Fix up unsupported bits @@ -1205,11 +1197,11 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) struct hso_serial *serial = urb->context; int status = urb->status; - D4("\n--- Got serial_read_bulk callback %02x ---", status); + hso_dbg(0x8, "--- Got serial_read_bulk callback %02x ---\n", status); /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } if (status) { @@ -1217,7 +1209,7 @@ static void hso_std_serial_read_bulk_callback(struct urb *urb) return; } - D1("Actual length = %d\n", urb->actual_length); + hso_dbg(0x1, "Actual length = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); /* Anyone listening? */ @@ -1266,7 +1258,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (serial == NULL || serial->magic != HSO_SERIAL_MAGIC) { WARN_ON(1); tty->driver_data = NULL; - D1("Failed to open port"); + hso_dbg(0x1, "Failed to open port\n"); return -ENODEV; } @@ -1275,7 +1267,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) if (result < 0) goto err_out; - D1("Opening %d", serial->minor); + hso_dbg(0x1, "Opening %d\n", serial->minor); /* setup */ tty->driver_data = serial; @@ -1298,7 +1290,7 @@ static int hso_serial_open(struct tty_struct *tty, struct file *filp) kref_get(&serial->parent->ref); } } else { - D1("Port was already open"); + hso_dbg(0x1, "Port was already open\n"); } usb_autopm_put_interface(serial->parent->interface); @@ -1317,7 +1309,7 @@ static void hso_serial_close(struct tty_struct *tty, struct file *filp) struct hso_serial *serial = tty->driver_data; u8 usb_gone; - D1("Closing serial port"); + hso_dbg(0x1, "Closing serial port\n"); /* Open failed, no close cleanup required */ if (serial == NULL) @@ -1357,7 +1349,7 @@ static int hso_serial_write(struct tty_struct *tty, const unsigned char *buf, /* sanity check */ if (serial == NULL) { - printk(KERN_ERR "%s: serial is NULL\n", __func__); + pr_err("%s: serial is NULL\n", __func__); return -ENODEV; } @@ -1412,8 +1404,8 @@ static void hso_serial_set_termios(struct tty_struct *tty, struct ktermios *old) unsigned long flags; if (old) - D5("Termios called with: cflags new[%d] - old[%d]", - tty->termios.c_cflag, old->c_cflag); + hso_dbg(0x16, "Termios called with: cflags new[%d] - old[%d]\n", + tty->termios.c_cflag, old->c_cflag); /* the actual setup */ spin_lock_irqsave(&serial->serial_lock, flags); @@ -1649,7 +1641,7 @@ static int hso_serial_tiocmget(struct tty_struct *tty) /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } spin_lock_irq(&serial->serial_lock); @@ -1682,7 +1674,7 @@ static int hso_serial_tiocmset(struct tty_struct *tty, /* sanity check */ if (!serial) { - D1("no tty structures"); + hso_dbg(0x1, "no tty structures\n"); return -EINVAL; } @@ -1721,7 +1713,7 @@ static int hso_serial_ioctl(struct tty_struct *tty, { struct hso_serial *serial = tty->driver_data; int ret = 0; - D4("IOCTL cmd: %d, arg: %ld", cmd, arg); + hso_dbg(0x8, "IOCTL cmd: %d, arg: %ld\n", cmd, arg); if (!serial) return -ENODEV; @@ -1783,7 +1775,7 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, /* Sanity check */ if (!serial || !ctrl_urb || !ctrl_req) { - printk(KERN_ERR "%s: Wrong arguments\n", __func__); + pr_err("%s: Wrong arguments\n", __func__); return -EINVAL; } @@ -1808,9 +1800,9 @@ static int mux_device_request(struct hso_serial *serial, u8 type, u16 port, pipe = usb_sndctrlpipe(serial->parent->usb, 0); } /* syslog */ - D2("%s command (%02x) len: %d, port: %d", - type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", - ctrl_req->bRequestType, ctrl_req->wLength, port); + hso_dbg(0x2, "%s command (%02x) len: %d, port: %d\n", + type == USB_CDC_GET_ENCAPSULATED_RESPONSE ? "Read" : "Write", + ctrl_req->bRequestType, ctrl_req->wLength, port); /* Load ctrl urb */ ctrl_urb->transfer_flags = 0; @@ -1876,11 +1868,11 @@ static void intr_callback(struct urb *urb) handle_usb_error(status, __func__, NULL); return; } - D4("\n--- Got intr callback 0x%02X ---", status); + hso_dbg(0x8, "--- Got intr callback 0x%02X ---\n", status); /* what request? */ port_req = urb->transfer_buffer; - D4(" port_req = 0x%.2X\n", *port_req); + hso_dbg(0x8, "port_req = 0x%.2X\n", *port_req); /* loop over all muxed ports to find the one sending this */ for (i = 0; i < 8; i++) { /* max 8 channels on MUX */ @@ -1888,7 +1880,8 @@ static void intr_callback(struct urb *urb) serial = get_serial_by_shared_int_and_type(shared_int, (1 << i)); if (serial != NULL) { - D1("Pending read interrupt on port %d\n", i); + hso_dbg(0x1, "Pending read interrupt on port %d\n", + i); spin_lock(&serial->serial_lock); if (serial->rx_state == RX_IDLE && serial->port.count > 0) { @@ -1900,8 +1893,8 @@ static void intr_callback(struct urb *urb) } else serial->rx_state = RX_PENDING; } else { - D1("Already a read pending on " - "port %d or port not open\n", i); + hso_dbg(0x1, "Already a read pending on port %d or port not open\n", + i); } spin_unlock(&serial->serial_lock); } @@ -1933,7 +1926,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) /* sanity check */ if (!serial) { - D1("serial == NULL"); + hso_dbg(0x1, "serial == NULL\n"); return; } @@ -1948,7 +1941,7 @@ static void hso_std_serial_write_bulk_callback(struct urb *urb) tty_port_tty_wakeup(&serial->port); hso_kick_transmit(serial); - D1(" "); + hso_dbg(0x1, "\n"); } /* called for writing diag or CS serial port */ @@ -1996,8 +1989,8 @@ static void ctrl_callback(struct urb *urb) /* what request? */ req = (struct usb_ctrlrequest *)(urb->setup_packet); - D4("\n--- Got muxed ctrl callback 0x%02X ---", status); - D4("Actual length of urb = %d\n", urb->actual_length); + hso_dbg(0x8, "--- Got muxed ctrl callback 0x%02X ---\n", status); + hso_dbg(0x8, "Actual length of urb = %d\n", urb->actual_length); DUMP1(urb->transfer_buffer, urb->actual_length); if (req->bRequestType == @@ -2023,7 +2016,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) /* Sanity check */ if (urb == NULL || serial == NULL) { - D1("serial = NULL"); + hso_dbg(0x1, "serial = NULL\n"); return -2; } @@ -2035,7 +2028,7 @@ static int put_rxbuf_data(struct urb *urb, struct hso_serial *serial) } /* Push data to tty */ - D1("data to push to tty"); + hso_dbg(0x1, "data to push to tty\n"); count = tty_buffer_request_room(&serial->port, urb->actual_length); if (count >= urb->actual_length) { tty_insert_flip_string(&serial->port, urb->transfer_buffer, @@ -2300,10 +2293,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, serial->rx_data_length = rx_size; for (i = 0; i < serial->num_rx_urbs; i++) { serial->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL); - if (!serial->rx_urb[i]) { - dev_err(dev, "Could not allocate urb?\n"); + if (!serial->rx_urb[i]) goto exit; - } serial->rx_urb[i]->transfer_buffer = NULL; serial->rx_urb[i]->transfer_buffer_length = 0; serial->rx_data[i] = kzalloc(serial->rx_data_length, @@ -2314,10 +2305,8 @@ static int hso_serial_common_create(struct hso_serial *serial, int num_urbs, /* TX, allocate urb and initialize */ serial->tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!serial->tx_urb) { - dev_err(dev, "Could not allocate urb?\n"); + if (!serial->tx_urb) goto exit; - } serial->tx_urb->transfer_buffer = NULL; serial->tx_urb->transfer_buffer_length = 0; /* prepare our TX buffer */ @@ -2419,7 +2408,7 @@ static void hso_net_init(struct net_device *net) { struct hso_net *hso_net = netdev_priv(net); - D1("sizeof hso_net is %d", (int)sizeof(*hso_net)); + hso_dbg(0x1, "sizeof hso_net is %zu\n", sizeof(*hso_net)); /* fill in the other fields */ net->netdev_ops = &hso_netdev_ops; @@ -2555,20 +2544,16 @@ static struct hso_device *hso_create_net_device(struct usb_interface *interface, /* start allocating */ for (i = 0; i < MUX_BULK_RX_BUF_COUNT; i++) { hso_net->mux_bulk_rx_urb_pool[i] = usb_alloc_urb(0, GFP_KERNEL); - if (!hso_net->mux_bulk_rx_urb_pool[i]) { - dev_err(&interface->dev, "Could not allocate rx urb\n"); + if (!hso_net->mux_bulk_rx_urb_pool[i]) goto exit; - } hso_net->mux_bulk_rx_buf_pool[i] = kzalloc(MUX_BULK_RX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_rx_buf_pool[i]) goto exit; } hso_net->mux_bulk_tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!hso_net->mux_bulk_tx_urb) { - dev_err(&interface->dev, "Could not allocate tx urb\n"); + if (!hso_net->mux_bulk_tx_urb) goto exit; - } hso_net->mux_bulk_tx_buf = kzalloc(MUX_BULK_TX_BUF_SIZE, GFP_KERNEL); if (!hso_net->mux_bulk_tx_buf) goto exit; @@ -2787,10 +2772,8 @@ struct hso_shared_int *hso_create_shared_int(struct usb_interface *interface) } mux->shared_intr_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!mux->shared_intr_urb) { - dev_err(&interface->dev, "Could not allocate intr urb?\n"); + if (!mux->shared_intr_urb) goto exit; - } mux->shared_intr_buf = kzalloc(le16_to_cpu(mux->intr_endp->wMaxPacketSize), GFP_KERNEL); @@ -3239,7 +3222,7 @@ static int __init hso_init(void) int result; /* put it in the log */ - printk(KERN_INFO "hso: %s\n", version); + pr_info("%s\n", version); /* Initialise the serial table semaphore and table */ spin_lock_init(&serial_table_lock); @@ -3270,16 +3253,15 @@ static int __init hso_init(void) /* register the tty driver */ result = tty_register_driver(tty_drv); if (result) { - printk(KERN_ERR "%s - tty_register_driver failed(%d)\n", - __func__, result); + pr_err("%s - tty_register_driver failed(%d)\n", + __func__, result); goto err_free_tty; } /* register this module as an usb driver */ result = usb_register(&hso_driver); if (result) { - printk(KERN_ERR "Could not register hso driver? error: %d\n", - result); + pr_err("Could not register hso driver - error: %d\n", result); goto err_unreg_tty; } @@ -3294,7 +3276,7 @@ err_free_tty: static void __exit hso_exit(void) { - printk(KERN_INFO "hso: unloaded\n"); + pr_info("unloaded\n"); tty_unregister_driver(tty_drv); put_tty_driver(tty_drv); @@ -3311,7 +3293,7 @@ MODULE_DESCRIPTION(MOD_DESCRIPTION); MODULE_LICENSE(MOD_LICENSE); /* change the debug level (eg: insmod hso.ko debug=0x04) */ -MODULE_PARM_DESC(debug, "Level of debug [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); +MODULE_PARM_DESC(debug, "debug level mask [0x01 | 0x02 | 0x04 | 0x08 | 0x10]"); module_param(debug, int, S_IRUGO | S_IWUSR); /* set the major tty number (eg: insmod hso.ko tty_major=245) */ diff --git a/drivers/net/usb/kaweth.c b/drivers/net/usb/kaweth.c index 528b9c9c4e60..66b34ddbe216 100644 --- a/drivers/net/usb/kaweth.c +++ b/drivers/net/usb/kaweth.c @@ -265,8 +265,6 @@ static int kaweth_control(struct kaweth_device *kaweth, struct usb_ctrlrequest *dr; int retval; - netdev_dbg(kaweth->net, "kaweth_control()\n"); - if(in_interrupt()) { netdev_dbg(kaweth->net, "in_interrupt()\n"); return -EBUSY; @@ -300,8 +298,6 @@ static int kaweth_read_configuration(struct kaweth_device *kaweth) { int retval; - netdev_dbg(kaweth->net, "Reading kaweth configuration\n"); - retval = kaweth_control(kaweth, usb_rcvctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_GET_ETHERNET_DESC, @@ -451,8 +447,6 @@ static int kaweth_trigger_firmware(struct kaweth_device *kaweth, kaweth->firmware_buf[6] = 0x00; kaweth->firmware_buf[7] = 0x00; - netdev_dbg(kaweth->net, "Triggering firmware\n"); - return kaweth_control(kaweth, usb_sndctrlpipe(kaweth->dev, 0), KAWETH_COMMAND_SCAN, @@ -471,7 +465,6 @@ static int kaweth_reset(struct kaweth_device *kaweth) { int result; - netdev_dbg(kaweth->net, "kaweth_reset(%p)\n", kaweth); result = usb_reset_configuration(kaweth->dev); mdelay(10); @@ -685,8 +678,6 @@ static int kaweth_open(struct net_device *net) struct kaweth_device *kaweth = netdev_priv(net); int res; - netdev_dbg(kaweth->net, "Opening network device.\n"); - res = usb_autopm_get_interface(kaweth->intf); if (res) { dev_err(&kaweth->intf->dev, "Interface cannot be resumed.\n"); @@ -951,7 +942,6 @@ static int kaweth_suspend(struct usb_interface *intf, pm_message_t message) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Suspending device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status |= KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -968,7 +958,6 @@ static int kaweth_resume(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); unsigned long flags; - dev_dbg(&intf->dev, "Resuming device\n"); spin_lock_irqsave(&kaweth->device_lock, flags); kaweth->status &= ~KAWETH_STATUS_SUSPENDING; spin_unlock_irqrestore(&kaweth->device_lock, flags); @@ -1190,8 +1179,6 @@ err_fw: dev_info(dev, "kaweth interface created at %s\n", kaweth->net->name); - dev_dbg(dev, "Kaweth probe returning.\n"); - return 0; err_intfdata: @@ -1219,8 +1206,6 @@ static void kaweth_disconnect(struct usb_interface *intf) struct kaweth_device *kaweth = usb_get_intfdata(intf); struct net_device *netdev; - dev_info(&intf->dev, "Unregistering\n"); - usb_set_intfdata(intf, NULL); if (!kaweth) { dev_warn(&intf->dev, "unregistering non-existent device\n"); diff --git a/drivers/net/usb/lan78xx.c b/drivers/net/usb/lan78xx.c index 6a9d474b08b2..db558b8b32fe 100644 --- a/drivers/net/usb/lan78xx.c +++ b/drivers/net/usb/lan78xx.c @@ -1179,7 +1179,7 @@ static int lan78xx_link_reset(struct lan78xx_net *dev) * NOTE: annoying asymmetry: if it's active, schedule_work() fails, * but tasklet_schedule() doesn't. hope the failure is rare. */ -void lan78xx_defer_kevent(struct lan78xx_net *dev, int work) +static void lan78xx_defer_kevent(struct lan78xx_net *dev, int work) { set_bit(work, &dev->flags); if (!schedule_delayed_work(&dev->wq, 0)) @@ -1406,7 +1406,7 @@ static u32 lan78xx_get_link(struct net_device *net) return net->phydev->link; } -int lan78xx_nway_reset(struct net_device *net) +static int lan78xx_nway_reset(struct net_device *net) { return phy_start_aneg(net->phydev); } @@ -1997,7 +1997,7 @@ static int lan78xx_change_mtu(struct net_device *netdev, int new_mtu) return 0; } -int lan78xx_set_mac_addr(struct net_device *netdev, void *p) +static int lan78xx_set_mac_addr(struct net_device *netdev, void *p) { struct lan78xx_net *dev = netdev_priv(netdev); struct sockaddr *addr = p; @@ -2371,7 +2371,7 @@ static void lan78xx_terminate_urbs(struct lan78xx_net *dev) remove_wait_queue(&unlink_wakeup, &wait); } -int lan78xx_stop(struct net_device *net) +static int lan78xx_stop(struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); @@ -2533,7 +2533,8 @@ static void lan78xx_queue_skb(struct sk_buff_head *list, entry->state = state; } -netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) +static netdev_tx_t +lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); struct sk_buff *skb2 = NULL; @@ -2562,7 +2563,8 @@ netdev_tx_t lan78xx_start_xmit(struct sk_buff *skb, struct net_device *net) return NETDEV_TX_OK; } -int lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) +static int +lan78xx_get_endpoints(struct lan78xx_net *dev, struct usb_interface *intf) { int tmp; struct usb_host_interface *alt = NULL; @@ -2700,7 +2702,7 @@ static void lan78xx_rx_csum_offload(struct lan78xx_net *dev, } } -void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) +static void lan78xx_skb_return(struct lan78xx_net *dev, struct sk_buff *skb) { int status; @@ -3002,10 +3004,8 @@ static void lan78xx_tx_bh(struct lan78xx_net *dev) gso_skb: urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netif_dbg(dev, tx_err, dev->net, "no urb\n"); + if (!urb) goto drop; - } entry = (struct skb_data *)skb->cb; entry->urb = urb; @@ -3285,7 +3285,7 @@ static void lan78xx_disconnect(struct usb_interface *intf) usb_put_dev(udev); } -void lan78xx_tx_timeout(struct net_device *net) +static void lan78xx_tx_timeout(struct net_device *net) { struct lan78xx_net *dev = netdev_priv(net); @@ -3605,7 +3605,7 @@ static int lan78xx_set_suspend(struct lan78xx_net *dev, u32 wol) return 0; } -int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) +static int lan78xx_suspend(struct usb_interface *intf, pm_message_t message) { struct lan78xx_net *dev = usb_get_intfdata(intf); struct lan78xx_priv *pdata = (struct lan78xx_priv *)(dev->data[0]); @@ -3701,7 +3701,7 @@ out: return ret; } -int lan78xx_resume(struct usb_interface *intf) +static int lan78xx_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); struct sk_buff *skb; @@ -3768,7 +3768,7 @@ int lan78xx_resume(struct usb_interface *intf) return 0; } -int lan78xx_reset_resume(struct usb_interface *intf) +static int lan78xx_reset_resume(struct usb_interface *intf) { struct lan78xx_net *dev = usb_get_intfdata(intf); diff --git a/drivers/net/usb/pegasus.c b/drivers/net/usb/pegasus.c index 9bbe0161a2f4..1434e5dd5f9c 100644 --- a/drivers/net/usb/pegasus.c +++ b/drivers/net/usb/pegasus.c @@ -1129,7 +1129,8 @@ static int pegasus_probe(struct usb_interface *intf, return -ENODEV; if (pegasus_count == 0) { - pegasus_workqueue = create_singlethread_workqueue("pegasus"); + pegasus_workqueue = alloc_workqueue("pegasus", WQ_MEM_RECLAIM, + 0); if (!pegasus_workqueue) return -ENOMEM; } diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index c254248863d4..44d439f50961 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -1076,8 +1076,7 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) return -ENODEV; if (obj->type != ACPI_TYPE_BUFFER || obj->string.length != 0x17) { netif_warn(tp, probe, tp->netdev, - "Invalid buffer when reading pass-thru MAC addr: " - "(%d, %d)\n", + "Invalid buffer for pass-thru MAC addr: (%d, %d)\n", obj->type, obj->string.length); goto amacout; } @@ -1090,8 +1089,8 @@ static int vendor_mac_passthru_addr_read(struct r8152 *tp, struct sockaddr *sa) ret = hex2bin(buf, obj->string.pointer + 9, 6); if (!(ret == 0 && is_valid_ether_addr(buf))) { netif_warn(tp, probe, tp->netdev, - "Invalid MAC when reading pass-thru MAC addr: " - "%d, %pM\n", ret, buf); + "Invalid MAC for pass-thru MAC addr: %d, %pM\n", + ret, buf); ret = -EINVAL; goto amacout; } @@ -1111,9 +1110,9 @@ static int set_ethernet_addr(struct r8152 *tp) struct sockaddr sa; int ret; - if (tp->version == RTL_VER_01) + if (tp->version == RTL_VER_01) { ret = pla_ocp_read(tp, PLA_IDR, 8, sa.sa_data); - else { + } else { /* if this is not an RTL8153-AD, no eFuse mac pass thru set, * or system doesn't provide valid _SB.AMAC this will be * be expected to non-zero @@ -4043,7 +4042,7 @@ static int rtl8152_set_coalesce(struct net_device *netdev, return ret; } -static struct ethtool_ops ops = { +static const struct ethtool_ops ops = { .get_drvinfo = rtl8152_get_drvinfo, .get_settings = rtl8152_get_settings, .set_settings = rtl8152_set_settings, diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index dc989a8b5afb..831aa33d078a 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -33,7 +33,7 @@ #include "smsc95xx.h" #define SMSC_CHIPNAME "smsc95xx" -#define SMSC_DRIVER_VERSION "1.0.4" +#define SMSC_DRIVER_VERSION "1.0.5" #define HS_USB_PKT_SIZE (512) #define FS_USB_PKT_SIZE (64) #define DEFAULT_HS_BURST_CAP_SIZE (16 * 1024 + 5 * HS_USB_PKT_SIZE) @@ -64,6 +64,7 @@ #define CARRIER_CHECK_DELAY (2 * HZ) struct smsc95xx_priv { + u32 chip_id; u32 mac_cr; u32 hash_hi; u32 hash_lo; @@ -71,6 +72,7 @@ struct smsc95xx_priv { spinlock_t mac_cr_lock; u8 features; u8 suspend_flags; + u8 mdix_ctrl; bool link_ok; struct delayed_work carrier_check; struct usbnet *dev; @@ -782,14 +784,113 @@ static int smsc95xx_ethtool_set_wol(struct net_device *net, return ret; } +static int get_mdix_status(struct net_device *net) +{ + struct usbnet *dev = netdev_priv(net); + u32 val; + int buf; + + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, SPECIAL_CTRL_STS); + if (buf & SPECIAL_CTRL_STS_OVRRD_AMDIX_) { + if (buf & SPECIAL_CTRL_STS_AMDIX_ENABLE_) + return ETH_TP_MDI_AUTO; + else if (buf & SPECIAL_CTRL_STS_AMDIX_STATE_) + return ETH_TP_MDI_X; + } else { + buf = smsc95xx_read_reg(dev, STRAP_STATUS, &val); + if (val & STRAP_STATUS_AMDIX_EN_) + return ETH_TP_MDI_AUTO; + } + + return ETH_TP_MDI; +} + +static void set_mdix_status(struct net_device *net, __u8 mdix_ctrl) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int buf; + + if ((pdata->chip_id == ID_REV_CHIP_ID_9500A_) || + (pdata->chip_id == ID_REV_CHIP_ID_9530_) || + (pdata->chip_id == ID_REV_CHIP_ID_89530_) || + (pdata->chip_id == ID_REV_CHIP_ID_9730_)) { + /* Extend Manual AutoMDIX timer for 9500A/9500Ai */ + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG); + buf |= PHY_EDPD_CONFIG_EXT_CROSSOVER_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + PHY_EDPD_CONFIG, buf); + } + + if (mdix_ctrl == ETH_TP_MDI) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_X) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf |= SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_STATE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } else if (mdix_ctrl == ETH_TP_MDI_AUTO) { + buf = smsc95xx_mdio_read(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS); + buf &= ~SPECIAL_CTRL_STS_OVRRD_AMDIX_; + buf &= ~(SPECIAL_CTRL_STS_AMDIX_ENABLE_ | + SPECIAL_CTRL_STS_AMDIX_STATE_); + buf |= SPECIAL_CTRL_STS_AMDIX_ENABLE_; + smsc95xx_mdio_write(dev->net, dev->mii.phy_id, + SPECIAL_CTRL_STS, buf); + } + pdata->mdix_ctrl = mdix_ctrl; +} + +static int smsc95xx_get_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + retval = usbnet_get_settings(net, cmd); + + cmd->eth_tp_mdix = pdata->mdix_ctrl; + cmd->eth_tp_mdix_ctrl = pdata->mdix_ctrl; + + return retval; +} + +static int smsc95xx_set_settings(struct net_device *net, + struct ethtool_cmd *cmd) +{ + struct usbnet *dev = netdev_priv(net); + struct smsc95xx_priv *pdata = (struct smsc95xx_priv *)(dev->data[0]); + int retval; + + if (pdata->mdix_ctrl != cmd->eth_tp_mdix_ctrl) + set_mdix_status(net, cmd->eth_tp_mdix_ctrl); + + retval = usbnet_set_settings(net, cmd); + + return retval; +} + static const struct ethtool_ops smsc95xx_ethtool_ops = { .get_link = usbnet_get_link, .nway_reset = usbnet_nway_reset, .get_drvinfo = usbnet_get_drvinfo, .get_msglevel = usbnet_get_msglevel, .set_msglevel = usbnet_set_msglevel, - .get_settings = usbnet_get_settings, - .set_settings = usbnet_set_settings, + .get_settings = smsc95xx_get_settings, + .set_settings = smsc95xx_set_settings, .get_eeprom_len = smsc95xx_ethtool_get_eeprom_len, .get_eeprom = smsc95xx_ethtool_get_eeprom, .set_eeprom = smsc95xx_ethtool_set_eeprom, @@ -1194,6 +1295,8 @@ static int smsc95xx_bind(struct usbnet *dev, struct usb_interface *intf) if (ret < 0) return ret; val >>= 16; + pdata->chip_id = val; + pdata->mdix_ctrl = get_mdix_status(dev->net); if ((val == ID_REV_CHIP_ID_9500A_) || (val == ID_REV_CHIP_ID_9530_) || (val == ID_REV_CHIP_ID_89530_) || (val == ID_REV_CHIP_ID_9730_)) diff --git a/drivers/net/usb/smsc95xx.h b/drivers/net/usb/smsc95xx.h index 526faa0c44e6..29a4d9efce7c 100644 --- a/drivers/net/usb/smsc95xx.h +++ b/drivers/net/usb/smsc95xx.h @@ -144,6 +144,14 @@ #define BURST_CAP (0x38) +#define STRAP_STATUS (0x3C) +#define STRAP_STATUS_PWR_SEL_ (0x00000020) +#define STRAP_STATUS_AMDIX_EN_ (0x00000010) +#define STRAP_STATUS_PORT_SWAP_ (0x00000008) +#define STRAP_STATUS_EEP_SIZE_ (0x00000004) +#define STRAP_STATUS_RMT_WKP_ (0x00000002) +#define STRAP_STATUS_EEP_DISABLE_ (0x00000001) + #define GPIO_WAKE (0x64) #define INT_EP_CTL (0x68) diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c index 3bfb59209326..d5071e364d40 100644 --- a/drivers/net/usb/usbnet.c +++ b/drivers/net/usb/usbnet.c @@ -2062,11 +2062,8 @@ int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype, cmd, reqtype, value, index, size); urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!urb) { - netdev_err(dev->net, "Error allocating URB in" - " %s!\n", __func__); + if (!urb) goto fail; - } if (data) { buf = kmemdup(data, size, GFP_ATOMIC); diff --git a/drivers/net/veth.c b/drivers/net/veth.c index f37a6e61d4ad..fbc853e64531 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -313,7 +313,7 @@ static const struct net_device_ops veth_netdev_ops = { }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ - NETIF_F_RXCSUM | NETIF_F_HIGHDMA | \ + NETIF_F_RXCSUM | NETIF_F_SCTP_CRC | NETIF_F_HIGHDMA | \ NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ENCAP_ALL | \ NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX | \ NETIF_F_HW_VLAN_STAG_TX | NETIF_F_HW_VLAN_STAG_RX ) @@ -340,6 +340,7 @@ static void veth_setup(struct net_device *dev) dev->hw_features = VETH_FEATURES; dev->hw_enc_features = VETH_FEATURES; + dev->mpls_features = NETIF_F_HW_CSUM | NETIF_F_GSO_SOFTWARE; } /* diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 4244b9d4418e..b5554f2ebee4 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -1641,7 +1641,7 @@ static void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq, } } -void +static void vmxnet3_rq_destroy_all_rxdataring(struct vmxnet3_adapter *adapter) { int i; @@ -3186,7 +3186,6 @@ vmxnet3_tx_timeout(struct net_device *netdev) netdev_err(adapter->netdev, "tx hang\n"); schedule_work(&adapter->work); - netif_wake_queue(adapter->netdev); } @@ -3213,6 +3212,7 @@ vmxnet3_reset_work(struct work_struct *data) } rtnl_unlock(); + netif_wake_queue(adapter->netdev); clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state); } diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index 1ce7420322ee..85c271c70d42 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -37,9 +37,6 @@ #include #include -#define RT_FL_TOS(oldflp4) \ - ((oldflp4)->flowi4_tos & (IPTOS_RT_MASK | RTO_ONLINK)) - #define DRV_NAME "vrf" #define DRV_VERSION "1.0" @@ -137,6 +134,20 @@ static int vrf_local_xmit(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) +static int vrf_ip6_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, + sk, skb, NULL, skb_dst(skb)->dev, dst_output); + + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, struct net_device *dev) { @@ -151,7 +162,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, .flowlabel = ip6_flowinfo(iph), .flowi6_mark = skb->mark, .flowi6_proto = iph->nexthdr, - .flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF, + .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF, }; int ret = NET_XMIT_DROP; struct dst_entry *dst; @@ -207,7 +218,7 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, /* strip the ethernet header added for pass through VRF device */ __skb_pull(skb, skb_network_offset(skb)); - ret = ip6_local_out(net, skb->sk, skb); + ret = vrf_ip6_local_out(net, skb->sk, skb); if (unlikely(net_xmit_eval(ret))) dev->stats.tx_errors++; else @@ -227,6 +238,20 @@ static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb, } #endif +/* based on ip_local_out; can't use it b/c the dst is switched pointing to us */ +static int vrf_ip_local_out(struct net *net, struct sock *sk, + struct sk_buff *skb) +{ + int err; + + err = nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, + skb, NULL, skb_dst(skb)->dev, dst_output); + if (likely(err == 1)) + err = dst_output(net, sk, skb); + + return err; +} + static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, struct net_device *vrf_dev) { @@ -237,8 +262,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, .flowi4_oif = vrf_dev->ifindex, .flowi4_iif = LOOPBACK_IFINDEX, .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF, + .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF, .daddr = ip4h->daddr, }; struct net *net = dev_net(vrf_dev); @@ -292,7 +316,7 @@ static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb, RT_SCOPE_LINK); } - ret = ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); + ret = vrf_ip_local_out(dev_net(skb_dst(skb)->dev), skb->sk, skb); if (unlikely(net_xmit_eval(ret))) vrf_dev->stats.tx_errors++; else @@ -377,6 +401,43 @@ static int vrf_output6(struct net *net, struct sock *sk, struct sk_buff *skb) !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rt6_info *rt6; + + /* don't divert link scope packets */ + if (rt6_need_strict(&ipv6_hdr(skb)->daddr)) + return skb; + + rcu_read_lock(); + + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) { + dst = &rt6->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + /* holding rtnl */ static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { @@ -463,6 +524,13 @@ out: return rc; } #else +static struct sk_buff *vrf_ip6_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + return skb; +} + static void vrf_rt6_release(struct net_device *dev, struct net_vrf *vrf) { } @@ -531,6 +599,55 @@ static int vrf_output(struct net *net, struct sock *sk, struct sk_buff *skb) !(IPCB(skb)->flags & IPSKB_REROUTED)); } +/* set dst on skb to send packet to us via dev_xmit path. Allows + * packet to go through device based features such as qdisc, netfilter + * hooks and packet sockets with skb->dev set to vrf device. + */ +static struct sk_buff *vrf_ip_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb) +{ + struct net_vrf *vrf = netdev_priv(vrf_dev); + struct dst_entry *dst = NULL; + struct rtable *rth; + + rcu_read_lock(); + + rth = rcu_dereference(vrf->rth); + if (likely(rth)) { + dst = &rth->dst; + dst_hold(dst); + } + + rcu_read_unlock(); + + if (unlikely(!dst)) { + vrf_tx_error(vrf_dev, skb); + return NULL; + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); + + return skb; +} + +/* called with rcu lock held */ +static struct sk_buff *vrf_l3_out(struct net_device *vrf_dev, + struct sock *sk, + struct sk_buff *skb, + u16 proto) +{ + switch (proto) { + case AF_INET: + return vrf_ip_out(vrf_dev, sk, skb); + case AF_INET6: + return vrf_ip6_out(vrf_dev, sk, skb); + } + + return skb; +} + /* holding rtnl */ static void vrf_rtable_release(struct net_device *dev, struct net_vrf *vrf) { @@ -722,63 +839,6 @@ static u32 vrf_fib_table(const struct net_device *dev) return vrf->tb_id; } -static struct rtable *vrf_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - struct rtable *rth = NULL; - - if (!(fl4->flowi4_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - - rcu_read_lock(); - - rth = rcu_dereference(vrf->rth); - if (likely(rth)) - dst_hold(&rth->dst); - - rcu_read_unlock(); - } - - return rth; -} - -/* called under rcu_read_lock */ -static int vrf_get_saddr(struct net_device *dev, struct flowi4 *fl4) -{ - struct fib_result res = { .tclassid = 0 }; - struct net *net = dev_net(dev); - u32 orig_tos = fl4->flowi4_tos; - u8 flags = fl4->flowi4_flags; - u8 scope = fl4->flowi4_scope; - u8 tos = RT_FL_TOS(fl4); - int rc; - - if (unlikely(!fl4->daddr)) - return 0; - - fl4->flowi4_flags |= FLOWI_FLAG_SKIP_NH_OIF; - fl4->flowi4_iif = LOOPBACK_IFINDEX; - /* make sure oif is set to VRF device for lookup */ - fl4->flowi4_oif = dev->ifindex; - fl4->flowi4_tos = tos & IPTOS_RT_MASK; - fl4->flowi4_scope = ((tos & RTO_ONLINK) ? - RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); - - rc = fib_lookup(net, fl4, &res, 0); - if (!rc) { - if (res.type == RTN_LOCAL) - fl4->saddr = res.fi->fib_prefsrc ? : fl4->daddr; - else - fib_select_path(net, &res, fl4, -1); - } - - fl4->flowi4_flags = flags; - fl4->flowi4_tos = orig_tos; - fl4->flowi4_scope = scope; - - return rc; -} - static int vrf_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { return 0; @@ -970,106 +1030,44 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, } #if IS_ENABLED(CONFIG_IPV6) -static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - struct flowi6 *fl6) +/* send to link-local or multicast address via interface enslaved to + * VRF device. Force lookup to VRF table without changing flow struct + */ +static struct dst_entry *vrf_link_scope_lookup(const struct net_device *dev, + struct flowi6 *fl6) { - bool need_strict = rt6_need_strict(&fl6->daddr); - struct net_vrf *vrf = netdev_priv(dev); struct net *net = dev_net(dev); + int flags = RT6_LOOKUP_F_IFACE; struct dst_entry *dst = NULL; struct rt6_info *rt; - /* send to link-local or multicast address */ - if (need_strict) { - int flags = RT6_LOOKUP_F_IFACE; - - /* VRF device does not have a link-local address and - * sending packets to link-local or mcast addresses over - * a VRF device does not make sense - */ - if (fl6->flowi6_oif == dev->ifindex) { - struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; - - dst_hold(dst); - return dst; - } - - if (!ipv6_addr_any(&fl6->saddr)) - flags |= RT6_LOOKUP_F_HAS_SADDR; - - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); - if (rt) - dst = &rt->dst; - - } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - - rcu_read_lock(); - - rt = rcu_dereference(vrf->rt6); - if (likely(rt)) { - dst = &rt->dst; - dst_hold(dst); - } - - rcu_read_unlock(); + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + dst = &net->ipv6.ip6_null_entry->dst; + dst_hold(dst); + return dst; } - /* make sure oif is set to VRF device for lookup */ - if (!need_strict) - fl6->flowi6_oif = dev->ifindex; + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; return dst; } - -/* called under rcu_read_lock */ -static int vrf_get_saddr6(struct net_device *dev, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net *net = dev_net(dev); - struct dst_entry *dst; - struct rt6_info *rt; - int err; - - if (rt6_need_strict(&fl6->daddr)) { - rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, - RT6_LOOKUP_F_IFACE); - if (unlikely(!rt)) - return 0; - - dst = &rt->dst; - } else { - __u8 flags = fl6->flowi6_flags; - - fl6->flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; - fl6->flowi6_flags |= FLOWI_FLAG_SKIP_NH_OIF; - - dst = ip6_route_output(net, sk, fl6); - rt = (struct rt6_info *)dst; - - fl6->flowi6_flags = flags; - } - - err = dst->error; - if (!err) { - err = ip6_route_get_saddr(net, rt, &fl6->daddr, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl6->saddr); - } - - dst_release(dst); - - return err; -} #endif static const struct l3mdev_ops vrf_l3mdev_ops = { .l3mdev_fib_table = vrf_fib_table, - .l3mdev_get_rtable = vrf_get_rtable, - .l3mdev_get_saddr = vrf_get_saddr, .l3mdev_l3_rcv = vrf_l3_rcv, + .l3mdev_l3_out = vrf_l3_out, #if IS_ENABLED(CONFIG_IPV6) - .l3mdev_get_rt6_dst = vrf_get_rt6_dst, - .l3mdev_get_saddr6 = vrf_get_saddr6, + .l3mdev_link_scope_lookup = vrf_link_scope_lookup, #endif }; diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index 6e65832051d6..e7d16687538b 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -27,7 +27,6 @@ #include #include #include -#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -288,7 +287,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan, if (!net_eq(dev_net(vxlan->dev), vxlan->net) && nla_put_s32(skb, NDA_LINK_NETNSID, - peernet2id_alloc(dev_net(vxlan->dev), vxlan->net))) + peernet2id(dev_net(vxlan->dev), vxlan->net))) goto nla_put_failure; if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr)) @@ -861,20 +860,20 @@ out: /* Dump forwarding table */ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct vxlan_dev *vxlan = netdev_priv(dev); unsigned int h; + int err = 0; for (h = 0; h < FDB_HASH_SIZE; ++h) { struct vxlan_fdb *f; - int err; hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) { struct vxlan_rdst *rd; list_for_each_entry_rcu(rd, &f->remotes, list) { - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = vxlan_fdb_info(skb, vxlan, f, @@ -882,17 +881,15 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, rd); - if (err < 0) { - cb->args[1] = err; + if (err < 0) goto out; - } skip: - ++idx; + *idx += 1; } } } out: - return idx; + return err; } /* Watch incoming packets to learn mapping between Ethernet address @@ -1294,7 +1291,7 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb) struct metadata_dst *tun_dst; tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY, - vxlan_vni_to_tun_id(vni), sizeof(*md)); + key32_to_tunnel_id(vni), sizeof(*md)); if (!tun_dst) goto drop; @@ -1948,7 +1945,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, goto drop; } dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port; - vni = vxlan_tun_id_to_vni(info->key.tun_id); + vni = tunnel_id_to_key32(info->key.tun_id); remote_ip.sa.sa_family = ip_tunnel_info_af(info); if (remote_ip.sa.sa_family == AF_INET) { remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst; @@ -2106,6 +2103,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev, vni, md, flags, udp_sum); if (err < 0) { dst_release(ndst); + dev->stats.tx_errors++; return; } udp_tunnel6_xmit_skb(ndst, sk, skb, dev, diff --git a/drivers/net/wan/fsl_ucc_hdlc.c b/drivers/net/wan/fsl_ucc_hdlc.c index 6f044450b702..5fbf83d5aa57 100644 --- a/drivers/net/wan/fsl_ucc_hdlc.c +++ b/drivers/net/wan/fsl_ucc_hdlc.c @@ -162,7 +162,7 @@ static int uhdlc_init(struct ucc_hdlc_private *priv) ALIGNMENT_OF_UCC_HDLC_PRAM); if (priv->ucc_pram_offset < 0) { - dev_err(priv->dev, "Can not allocate MURAM for hdlc prameter.\n"); + dev_err(priv->dev, "Can not allocate MURAM for hdlc parameter.\n"); ret = -ENOMEM; goto free_tx_bd; } diff --git a/drivers/net/wan/sbni.c b/drivers/net/wan/sbni.c index d98c7e57137d..3a421ca8a4d0 100644 --- a/drivers/net/wan/sbni.c +++ b/drivers/net/wan/sbni.c @@ -582,8 +582,8 @@ handle_channel( struct net_device *dev ) /* - * Routine returns 1 if it need to acknoweledge received frame. - * Empty frame received without errors won't be acknoweledged. + * Routine returns 1 if it needs to acknowledge received frame. + * Empty frame received without errors won't be acknowledged. */ static int diff --git a/drivers/net/wimax/i2400m/usb-notif.c b/drivers/net/wimax/i2400m/usb-notif.c index fc1355d98bc6..5d429f816125 100644 --- a/drivers/net/wimax/i2400m/usb-notif.c +++ b/drivers/net/wimax/i2400m/usb-notif.c @@ -206,7 +206,6 @@ int i2400mu_notification_setup(struct i2400mu *i2400mu) i2400mu->notif_urb = usb_alloc_urb(0, GFP_KERNEL); if (!i2400mu->notif_urb) { ret = -ENOMEM; - dev_err(dev, "notification: cannot allocate URB\n"); goto error_alloc_urb; } epd = usb_get_epd(i2400mu->usb_iface, diff --git a/drivers/net/wireless/ath/ar5523/ar5523.c b/drivers/net/wireless/ath/ar5523/ar5523.c index 8aded24bcdf4..7a60d2e652da 100644 --- a/drivers/net/wireless/ath/ar5523/ar5523.c +++ b/drivers/net/wireless/ath/ar5523/ar5523.c @@ -706,10 +706,8 @@ static int ar5523_alloc_rx_bufs(struct ar5523 *ar) data->ar = ar; data->urb = usb_alloc_urb(0, GFP_KERNEL); - if (!data->urb) { - ar5523_err(ar, "could not allocate rx data urb\n"); + if (!data->urb) goto err; - } list_add_tail(&data->list, &ar->rx_data_free); atomic_inc(&ar->rx_data_free_cnt); } @@ -824,7 +822,6 @@ static void ar5523_tx_work_locked(struct ar5523 *ar) urb = usb_alloc_urb(0, GFP_KERNEL); if (!urb) { - ar5523_err(ar, "Failed to allocate TX urb\n"); ieee80211_free_txskb(ar->hw, skb); continue; } @@ -949,10 +946,8 @@ static int ar5523_alloc_tx_cmd(struct ar5523 *ar) init_completion(&cmd->done); cmd->urb_tx = usb_alloc_urb(0, GFP_KERNEL); - if (!cmd->urb_tx) { - ar5523_err(ar, "could not allocate urb\n"); + if (!cmd->urb_tx) return -ENOMEM; - } cmd->buf_tx = usb_alloc_coherent(ar->dev, AR5523_MAX_TXCMDSZ, GFP_KERNEL, &cmd->urb_tx->transfer_dma); diff --git a/drivers/net/wireless/ath/ath10k/ahb.c b/drivers/net/wireless/ath/ath10k/ahb.c index acec16b9cf49..766c63bf05c4 100644 --- a/drivers/net/wireless/ath/ath10k/ahb.c +++ b/drivers/net/wireless/ath/ath10k/ahb.c @@ -91,59 +91,37 @@ static int ath10k_ahb_clock_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->cmd_clk = clk_get(dev, "wifi_wcss_cmd"); + ar_ahb->cmd_clk = devm_clk_get(dev, "wifi_wcss_cmd"); if (IS_ERR_OR_NULL(ar_ahb->cmd_clk)) { ath10k_err(ar, "failed to get cmd clk: %ld\n", PTR_ERR(ar_ahb->cmd_clk)); - ret = ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; - goto out; + return ar_ahb->cmd_clk ? PTR_ERR(ar_ahb->cmd_clk) : -ENODEV; } - ar_ahb->ref_clk = clk_get(dev, "wifi_wcss_ref"); + ar_ahb->ref_clk = devm_clk_get(dev, "wifi_wcss_ref"); if (IS_ERR_OR_NULL(ar_ahb->ref_clk)) { ath10k_err(ar, "failed to get ref clk: %ld\n", PTR_ERR(ar_ahb->ref_clk)); - ret = ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; - goto err_cmd_clk_put; + return ar_ahb->ref_clk ? PTR_ERR(ar_ahb->ref_clk) : -ENODEV; } - ar_ahb->rtc_clk = clk_get(dev, "wifi_wcss_rtc"); + ar_ahb->rtc_clk = devm_clk_get(dev, "wifi_wcss_rtc"); if (IS_ERR_OR_NULL(ar_ahb->rtc_clk)) { ath10k_err(ar, "failed to get rtc clk: %ld\n", PTR_ERR(ar_ahb->rtc_clk)); - ret = ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; - goto err_ref_clk_put; + return ar_ahb->rtc_clk ? PTR_ERR(ar_ahb->rtc_clk) : -ENODEV; } return 0; - -err_ref_clk_put: - clk_put(ar_ahb->ref_clk); - -err_cmd_clk_put: - clk_put(ar_ahb->cmd_clk); - -out: - return ret; } static void ath10k_ahb_clock_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->cmd_clk)) - clk_put(ar_ahb->cmd_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->ref_clk)) - clk_put(ar_ahb->ref_clk); - - if (!IS_ERR_OR_NULL(ar_ahb->rtc_clk)) - clk_put(ar_ahb->rtc_clk); - ar_ahb->cmd_clk = NULL; ar_ahb->ref_clk = NULL; ar_ahb->rtc_clk = NULL; @@ -213,92 +191,51 @@ static int ath10k_ahb_rst_ctrl_init(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); struct device *dev; - int ret; dev = &ar_ahb->pdev->dev; - ar_ahb->core_cold_rst = reset_control_get(dev, "wifi_core_cold"); - if (IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) { + ar_ahb->core_cold_rst = devm_reset_control_get(dev, "wifi_core_cold"); + if (IS_ERR(ar_ahb->core_cold_rst)) { ath10k_err(ar, "failed to get core cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->core_cold_rst)); - ret = ar_ahb->core_cold_rst ? - PTR_ERR(ar_ahb->core_cold_rst) : -ENODEV; - goto out; + return PTR_ERR(ar_ahb->core_cold_rst); } - ar_ahb->radio_cold_rst = reset_control_get(dev, "wifi_radio_cold"); - if (IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) { + ar_ahb->radio_cold_rst = devm_reset_control_get(dev, "wifi_radio_cold"); + if (IS_ERR(ar_ahb->radio_cold_rst)) { ath10k_err(ar, "failed to get radio cold rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_cold_rst)); - ret = ar_ahb->radio_cold_rst ? - PTR_ERR(ar_ahb->radio_cold_rst) : -ENODEV; - goto err_core_cold_rst_put; + return PTR_ERR(ar_ahb->radio_cold_rst); } - ar_ahb->radio_warm_rst = reset_control_get(dev, "wifi_radio_warm"); - if (IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) { + ar_ahb->radio_warm_rst = devm_reset_control_get(dev, "wifi_radio_warm"); + if (IS_ERR(ar_ahb->radio_warm_rst)) { ath10k_err(ar, "failed to get radio warm rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_warm_rst)); - ret = ar_ahb->radio_warm_rst ? - PTR_ERR(ar_ahb->radio_warm_rst) : -ENODEV; - goto err_radio_cold_rst_put; + return PTR_ERR(ar_ahb->radio_warm_rst); } - ar_ahb->radio_srif_rst = reset_control_get(dev, "wifi_radio_srif"); - if (IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) { + ar_ahb->radio_srif_rst = devm_reset_control_get(dev, "wifi_radio_srif"); + if (IS_ERR(ar_ahb->radio_srif_rst)) { ath10k_err(ar, "failed to get radio srif rst ctrl: %ld\n", PTR_ERR(ar_ahb->radio_srif_rst)); - ret = ar_ahb->radio_srif_rst ? - PTR_ERR(ar_ahb->radio_srif_rst) : -ENODEV; - goto err_radio_warm_rst_put; + return PTR_ERR(ar_ahb->radio_srif_rst); } - ar_ahb->cpu_init_rst = reset_control_get(dev, "wifi_cpu_init"); - if (IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) { + ar_ahb->cpu_init_rst = devm_reset_control_get(dev, "wifi_cpu_init"); + if (IS_ERR(ar_ahb->cpu_init_rst)) { ath10k_err(ar, "failed to get cpu init rst ctrl: %ld\n", PTR_ERR(ar_ahb->cpu_init_rst)); - ret = ar_ahb->cpu_init_rst ? - PTR_ERR(ar_ahb->cpu_init_rst) : -ENODEV; - goto err_radio_srif_rst_put; + return PTR_ERR(ar_ahb->cpu_init_rst); } return 0; - -err_radio_srif_rst_put: - reset_control_put(ar_ahb->radio_srif_rst); - -err_radio_warm_rst_put: - reset_control_put(ar_ahb->radio_warm_rst); - -err_radio_cold_rst_put: - reset_control_put(ar_ahb->radio_cold_rst); - -err_core_cold_rst_put: - reset_control_put(ar_ahb->core_cold_rst); - -out: - return ret; } static void ath10k_ahb_rst_ctrl_deinit(struct ath10k *ar) { struct ath10k_ahb *ar_ahb = ath10k_ahb_priv(ar); - if (!IS_ERR_OR_NULL(ar_ahb->core_cold_rst)) - reset_control_put(ar_ahb->core_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_cold_rst)) - reset_control_put(ar_ahb->radio_cold_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_warm_rst)) - reset_control_put(ar_ahb->radio_warm_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->radio_srif_rst)) - reset_control_put(ar_ahb->radio_srif_rst); - - if (!IS_ERR_OR_NULL(ar_ahb->cpu_init_rst)) - reset_control_put(ar_ahb->cpu_init_rst); - ar_ahb->core_cold_rst = NULL; ar_ahb->radio_cold_rst = NULL; ar_ahb->radio_warm_rst = NULL; @@ -462,13 +399,13 @@ static void ath10k_ahb_halt_chip(struct ath10k *ar) static irqreturn_t ath10k_ahb_interrupt_handler(int irq, void *arg) { struct ath10k *ar = arg; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); if (!ath10k_pci_irq_pending(ar)) return IRQ_NONE; ath10k_pci_disable_and_clear_legacy_irq(ar); - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } @@ -572,12 +509,13 @@ static int ath10k_ahb_resource_init(struct ath10k *ar) ar_ahb->irq = platform_get_irq_byname(pdev, "legacy"); if (ar_ahb->irq < 0) { ath10k_err(ar, "failed to get irq number: %d\n", ar_ahb->irq); + ret = ar_ahb->irq; goto err_clock_deinit; } ath10k_dbg(ar, ATH10K_DBG_BOOT, "irq: %d\n", ar_ahb->irq); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%p mem_len: %lu gcc mem: 0x%p tcsr_mem: 0x%p\n", + ath10k_dbg(ar, ATH10K_DBG_BOOT, "mem: 0x%pK mem_len: %lu gcc mem: 0x%pK tcsr_mem: 0x%pK\n", ar_ahb->mem, ar_ahb->mem_len, ar_ahb->gcc_mem, ar_ahb->tcsr_mem); return 0; @@ -717,6 +655,9 @@ static void ath10k_ahb_hif_stop(struct ath10k *ar) synchronize_irq(ar_ahb->irq); ath10k_pci_flush(ar); + + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); } static int ath10k_ahb_hif_power_up(struct ath10k *ar) @@ -748,6 +689,7 @@ static int ath10k_ahb_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce_deinit; } + napi_enable(&ar->napi); return 0; @@ -831,7 +773,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) goto err_resource_deinit; } - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); ret = ath10k_ahb_request_irq_legacy(ar); if (ret) @@ -846,6 +788,7 @@ static int ath10k_ahb_probe(struct platform_device *pdev) chip_id = ath10k_ahb_soc_read32(ar, SOC_CHIP_ID_ADDRESS); if (chip_id == 0xffffffff) { ath10k_err(ar, "failed to get chip id\n"); + ret = -ENODEV; goto err_halt_device; } diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c index 3d29b0875b3e..2872d347ea78 100644 --- a/drivers/net/wireless/ath/ath10k/bmi.c +++ b/drivers/net/wireless/ath/ath10k/bmi.c @@ -221,7 +221,7 @@ int ath10k_bmi_lz_data(struct ath10k *ar, const void *buffer, u32 length) u32 txlen; int ret; - ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%p length %d\n", + ath10k_dbg(ar, ATH10K_DBG_BMI, "bmi lz data buffer 0x%pK length %d\n", buffer, length); if (ar->bmi.done_sent) { @@ -287,7 +287,7 @@ int ath10k_bmi_fast_download(struct ath10k *ar, int ret; ath10k_dbg(ar, ATH10K_DBG_BMI, - "bmi fast download address 0x%x buffer 0x%p length %d\n", + "bmi fast download address 0x%x buffer 0x%pK length %d\n", address, buffer, length); ret = ath10k_bmi_lz_stream_start(ar, address); diff --git a/drivers/net/wireless/ath/ath10k/ce.c b/drivers/net/wireless/ath/ath10k/ce.c index 9fb8d7472d18..0b4d79659884 100644 --- a/drivers/net/wireless/ath/ath10k/ce.c +++ b/drivers/net/wireless/ath/ath10k/ce.c @@ -39,7 +39,7 @@ * chooses what to send (buffer address, length). The destination * side keeps a supply of "anonymous receive buffers" available and * it handles incoming data as it arrives (when the destination - * recieves an interrupt). + * receives an interrupt). * * The sender may send a simple buffer (address/length) or it may * send a small list of buffers. When a small list is sent, hardware @@ -433,6 +433,13 @@ void ath10k_ce_rx_update_write_idx(struct ath10k_ce_pipe *pipe, u32 nentries) unsigned int nentries_mask = dest_ring->nentries_mask; unsigned int write_index = dest_ring->write_index; u32 ctrl_addr = pipe->ctrl_addr; + u32 cur_write_idx = ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr); + + /* Prevent CE ring stuck issue that will occur when ring is full. + * Make sure that write index is 1 less than read index. + */ + if ((cur_write_idx + nentries) == dest_ring->sw_index) + nentries -= 1; write_index = CE_RING_IDX_ADD(nentries_mask, write_index, nentries); ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index); @@ -840,7 +847,7 @@ static int ath10k_ce_init_src_ring(struct ath10k *ar, ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot init ce src ring id %d entries %d base_addr %p\n", + "boot init ce src ring id %d entries %d base_addr %pK\n", ce_id, nentries, src_ring->base_addr_owner_space); return 0; @@ -874,7 +881,7 @@ static int ath10k_ce_init_dest_ring(struct ath10k *ar, ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries); ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot ce dest ring id %d entries %d base_addr %p\n", + "boot ce dest ring id %d entries %d base_addr %pK\n", ce_id, nentries, dest_ring->base_addr_owner_space); return 0; diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index e88982921aa3..21ae8d663e67 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -60,7 +60,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA988X_HW_2_0_FW_DIR, @@ -68,6 +67,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA988X_BOARD_DATA_SZ, .board_ext_size = QCA988X_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9887_HW_1_0_VERSION, @@ -79,7 +80,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 2116, .fw = { .dir = QCA9887_HW_1_0_FW_DIR, @@ -87,6 +87,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9887_BOARD_DATA_SZ, .board_ext_size = QCA9887_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -104,6 +106,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_2_1_VERSION, @@ -114,7 +118,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_2_1_FW_DIR, @@ -122,6 +125,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_0_VERSION, @@ -132,7 +137,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { .dir = QCA6174_HW_3_0_FW_DIR, @@ -140,6 +144,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA6174_HW_3_2_VERSION, @@ -150,7 +156,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .otp_exe_param = 0, .channel_counters_freq_hz = 88000, .max_probe_resp_desc_thres = 0, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_AFTER, .cal_data_len = 8124, .fw = { /* uses same binaries as hw3.0 */ @@ -159,6 +164,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA6174_BOARD_DATA_SZ, .board_ext_size = QCA6174_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA99X0_HW_2_0_DEV_VERSION, @@ -171,7 +178,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -182,6 +188,9 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9984_HW_1_0_DEV_VERSION, @@ -194,7 +203,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0xf, .rx_chain_mask = 0xf, .max_spatial_stream = 4, @@ -205,6 +213,9 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9888_HW_2_0_DEV_VERSION, @@ -216,7 +227,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .continuous_frag_desc = true, .channel_counters_freq_hz = 150000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 3, .rx_chain_mask = 3, .max_spatial_stream = 2, @@ -227,6 +237,9 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA99X0_BOARD_DATA_SZ, .board_ext_size = QCA99X0_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, { .id = QCA9377_HW_1_0_DEV_VERSION, @@ -244,6 +257,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA9377_HW_1_1_DEV_VERSION, @@ -261,6 +276,8 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA9377_BOARD_DATA_SZ, .board_ext_size = QCA9377_BOARD_EXT_DATA_SZ, }, + .hw_ops = &qca988x_ops, + .decap_align_bytes = 4, }, { .id = QCA4019_HW_1_0_DEV_VERSION, @@ -274,7 +291,6 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .cck_rate_map_rev2 = true, .channel_counters_freq_hz = 125000, .max_probe_resp_desc_thres = 24, - .hw_4addr_pad = ATH10K_HW_4ADDR_PAD_BEFORE, .tx_chain_mask = 0x3, .rx_chain_mask = 0x3, .max_spatial_stream = 2, @@ -285,6 +301,9 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = { .board_size = QCA4019_BOARD_DATA_SZ, .board_ext_size = QCA4019_BOARD_EXT_DATA_SZ, }, + .sw_decrypt_mcast_mgmt = true, + .hw_ops = &qca99x0_ops, + .decap_align_bytes = 1, }, }; @@ -304,6 +323,7 @@ static const char *const ath10k_core_fw_feature_str[] = { [ATH10K_FW_FEATURE_MFP_SUPPORT] = "mfp", [ATH10K_FW_FEATURE_PEER_FLOW_CONTROL] = "peer-flow-ctrl", [ATH10K_FW_FEATURE_BTCOEX_PARAM] = "btcoex-param", + [ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR] = "skip-null-func-war", }; static unsigned int ath10k_core_get_fw_feature_str(char *buf, @@ -699,7 +719,7 @@ static int ath10k_download_and_run_otp(struct ath10k *ar) if (!ar->running_fw->fw_file.otp_data || !ar->running_fw->fw_file.otp_len) { - ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %p otp_len %zd)!\n", + ath10k_warn(ar, "Not running otp, calibration will be incorrect (otp-data %pK otp_len %zd)!\n", ar->running_fw->fw_file.otp_data, ar->running_fw->fw_file.otp_len); return 0; @@ -745,7 +765,7 @@ static int ath10k_download_fw(struct ath10k *ar) data = ar->running_fw->fw_file.firmware_data; data_len = ar->running_fw->fw_file.firmware_len; - ret = ath10k_swap_code_seg_configure(ar); + ret = ath10k_swap_code_seg_configure(ar, &ar->running_fw->fw_file); if (ret) { ath10k_err(ar, "failed to configure fw code swap: %d\n", ret); @@ -753,7 +773,7 @@ static int ath10k_download_fw(struct ath10k *ar) } ath10k_dbg(ar, ATH10K_DBG_BOOT, - "boot uploading firmware image %p len %d\n", + "boot uploading firmware image %pK len %d\n", data, data_len); ret = ath10k_bmi_fast_download(ar, address, data, data_len); @@ -787,7 +807,7 @@ static void ath10k_core_free_firmware_files(struct ath10k *ar) if (!IS_ERR(ar->pre_cal_file)) release_firmware(ar->pre_cal_file); - ath10k_swap_code_seg_release(ar); + ath10k_swap_code_seg_release(ar, &ar->normal_mode_fw.fw_file); ar->normal_mode_fw.fw_file.otp_data = NULL; ar->normal_mode_fw.fw_file.otp_len = 0; @@ -1497,14 +1517,14 @@ static void ath10k_core_restart(struct work_struct *work) ieee80211_stop_queues(ar->hw); ath10k_drain_tx(ar); - complete_all(&ar->scan.started); - complete_all(&ar->scan.completed); - complete_all(&ar->scan.on_channel); - complete_all(&ar->offchan_tx_completed); - complete_all(&ar->install_key_done); - complete_all(&ar->vdev_setup_done); - complete_all(&ar->thermal.wmi_sync); - complete_all(&ar->bss_survey_done); + complete(&ar->scan.started); + complete(&ar->scan.completed); + complete(&ar->scan.on_channel); + complete(&ar->offchan_tx_completed); + complete(&ar->install_key_done); + complete(&ar->vdev_setup_done); + complete(&ar->thermal.wmi_sync); + complete(&ar->bss_survey_done); wake_up(&ar->htt.empty_tx_wq); wake_up(&ar->wmi.tx_credits_wq); wake_up(&ar->peer_mapping_wq); @@ -1705,6 +1725,55 @@ static int ath10k_core_init_firmware_features(struct ath10k *ar) return 0; } +static int ath10k_core_reset_rx_filter(struct ath10k *ar) +{ + int ret; + int vdev_id; + int vdev_type; + int vdev_subtype; + const u8 *vdev_addr; + + vdev_id = 0; + vdev_type = WMI_VDEV_TYPE_STA; + vdev_subtype = ath10k_wmi_get_vdev_subtype(ar, WMI_VDEV_SUBTYPE_NONE); + vdev_addr = ar->mac_addr; + + ret = ath10k_wmi_vdev_create(ar, vdev_id, vdev_type, vdev_subtype, + vdev_addr); + if (ret) { + ath10k_err(ar, "failed to create dummy vdev: %d\n", ret); + return ret; + } + + ret = ath10k_wmi_vdev_delete(ar, vdev_id); + if (ret) { + ath10k_err(ar, "failed to delete dummy vdev: %d\n", ret); + return ret; + } + + /* WMI and HTT may use separate HIF pipes and are not guaranteed to be + * serialized properly implicitly. + * + * Moreover (most) WMI commands have no explicit acknowledges. It is + * possible to infer it implicitly by poking firmware with echo + * command - getting a reply means all preceding comments have been + * (mostly) processed. + * + * In case of vdev create/delete this is sufficient. + * + * Without this it's possible to end up with a race when HTT Rx ring is + * started before vdev create/delete hack is complete allowing a short + * window of opportunity to receive (and Tx ACK) a bunch of frames. + */ + ret = ath10k_wmi_barrier(ar); + if (ret) { + ath10k_err(ar, "failed to ping firmware: %d\n", ret); + return ret; + } + + return 0; +} + int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, const struct ath10k_fw_components *fw) { @@ -1872,6 +1941,25 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } + /* Some firmware revisions do not properly set up hardware rx filter + * registers. + * + * A known example from QCA9880 and 10.2.4 is that MAC_PCU_ADDR1_MASK + * is filled with 0s instead of 1s allowing HW to respond with ACKs to + * any frames that matches MAC_PCU_RX_FILTER which is also + * misconfigured to accept anything. + * + * The ADDR1 is programmed using internal firmware structure field and + * can't be (easily/sanely) reached from the driver explicitly. It is + * possible to implicitly make it correct by creating a dummy vdev and + * then deleting it. + */ + status = ath10k_core_reset_rx_filter(ar); + if (status) { + ath10k_err(ar, "failed to reset rx filter: %d\n", status); + goto err_hif_stop; + } + /* If firmware indicates Full Rx Reorder support it must be used in a * slightly different manner. Let HTT code know. */ @@ -1884,7 +1972,10 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode, goto err_hif_stop; } - ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; + if (ar->max_num_vdevs >= 64) + ar->free_vdev_map = 0xFFFFFFFFFFFFFFFFLL; + else + ar->free_vdev_map = (1LL << ar->max_num_vdevs) - 1; INIT_LIST_HEAD(&ar->arvifs); @@ -2031,7 +2122,7 @@ static int ath10k_core_probe_fw(struct ath10k *ar) goto err_free_firmware_files; } - ret = ath10k_swap_code_seg_init(ar); + ret = ath10k_swap_code_seg_init(ar, &ar->normal_mode_fw.fw_file); if (ret) { ath10k_err(ar, "failed to initialize code swap segment: %d\n", ret); @@ -2072,6 +2163,9 @@ static void ath10k_core_register_work(struct work_struct *work) struct ath10k *ar = container_of(work, struct ath10k, register_work); int status; + /* peer stats are enabled by default */ + set_bit(ATH10K_FLAG_PEER_STATS, &ar->dev_flags); + status = ath10k_core_probe_fw(ar); if (status) { ath10k_err(ar, "could not probe fw (%d)\n", status); @@ -2249,6 +2343,8 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev, INIT_WORK(&ar->register_work, ath10k_core_register_work); INIT_WORK(&ar->restart_work, ath10k_core_restart); + init_dummy_netdev(&ar->napi_dev); + ret = ath10k_debug_create(ar); if (ret) goto err_free_aux_wq; diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h index 30ae5bf81611..dda49af1eb74 100644 --- a/drivers/net/wireless/ath/ath10k/core.h +++ b/drivers/net/wireless/ath/ath10k/core.h @@ -65,6 +65,10 @@ #define ATH10K_KEEPALIVE_MAX_IDLE 3895 #define ATH10K_KEEPALIVE_MAX_UNRESPONSIVE 3900 +/* NAPI poll budget */ +#define ATH10K_NAPI_BUDGET 64 +#define ATH10K_NAPI_QUOTA_LIMIT 60 + struct ath10k; enum ath10k_bus { @@ -142,6 +146,7 @@ struct ath10k_wmi { enum ath10k_htc_ep_id eid; struct completion service_ready; struct completion unified_ready; + struct completion barrier; wait_queue_head_t tx_credits_wq; DECLARE_BITMAP(svc_map, WMI_SERVICE_MAX); struct wmi_cmd_map *cmd; @@ -196,10 +201,10 @@ struct ath10k_fw_stats_pdev { /* PDEV stats */ s32 ch_noise_floor; - u32 tx_frame_count; - u32 rx_frame_count; - u32 rx_clear_count; - u32 cycle_count; + u32 tx_frame_count; /* Cycles spent transmitting frames */ + u32 rx_frame_count; /* Cycles spent receiving frames */ + u32 rx_clear_count; /* Total channel busy time, evidently */ + u32 cycle_count; /* Total on-channel time */ u32 phy_err_count; u32 chan_tx_power; u32 ack_rx_bad; @@ -440,7 +445,7 @@ struct ath10k_debug { struct completion tpc_complete; /* protected by conf_mutex */ - u32 fw_dbglog_mask; + u64 fw_dbglog_mask; u32 fw_dbglog_level; u32 pktlog_filter; u32 reg_addr; @@ -551,6 +556,13 @@ enum ath10k_fw_features { */ ATH10K_FW_FEATURE_BTCOEX_PARAM = 14, + /* Older firmware with HTT delivers incorrect tx status for null func + * frames to driver, but this fixed in 10.2 and 10.4 firmware versions. + * Also this workaround results in reporting of incorrect null func + * status for 10.4. This flag is used to skip the workaround. + */ + ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR = 15, + /* keep last */ ATH10K_FW_FEATURE_COUNT, }; @@ -663,6 +675,15 @@ struct ath10k_fw_file { const void *codeswap_data; size_t codeswap_len; + + /* The original idea of struct ath10k_fw_file was that it only + * contains struct firmware and pointers to various parts (actual + * firmware binary, otp, metadata etc) of the file. This seg_info + * is actually created separate but as this is used similarly as + * the other firmware components it's more convenient to have it + * here. + */ + struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; }; struct ath10k_fw_components { @@ -715,53 +736,7 @@ struct ath10k { struct ath10k_htc htc; struct ath10k_htt htt; - struct ath10k_hw_params { - u32 id; - u16 dev_id; - const char *name; - u32 patch_load_addr; - int uart_pin; - u32 otp_exe_param; - - /* Type of hw cycle counter wraparound logic, for more info - * refer enum ath10k_hw_cc_wraparound_type. - */ - enum ath10k_hw_cc_wraparound_type cc_wraparound_type; - - /* Some of chip expects fragment descriptor to be continuous - * memory for any TX operation. Set continuous_frag_desc flag - * for the hardware which have such requirement. - */ - bool continuous_frag_desc; - - /* CCK hardware rate table mapping for the newer chipsets - * like QCA99X0, QCA4019 got revised. The CCK h/w rate values - * are in a proper order with respect to the rate/preamble - */ - bool cck_rate_map_rev2; - - u32 channel_counters_freq_hz; - - /* Mgmt tx descriptors threshold for limiting probe response - * frames. - */ - u32 max_probe_resp_desc_thres; - - /* The padding bytes's location is different on various chips */ - enum ath10k_hw_4addr_pad hw_4addr_pad; - - u32 tx_chain_mask; - u32 rx_chain_mask; - u32 max_spatial_stream; - u32 cal_data_len; - - struct ath10k_hw_params_fw { - const char *dir; - const char *board; - size_t board_size; - size_t board_ext_size; - } fw; - } hw_params; + struct ath10k_hw_params hw_params; /* contains the firmware images used with ATH10K_FIRMWARE_MODE_NORMAL */ struct ath10k_fw_components normal_mode_fw; @@ -774,10 +749,6 @@ struct ath10k { const struct firmware *pre_cal_file; const struct firmware *cal_file; - struct { - struct ath10k_swap_code_seg_info *firmware_swap_code_seg_info; - } swap; - struct { u32 vendor; u32 device; @@ -936,6 +907,10 @@ struct ath10k { struct ath10k_thermal thermal; struct ath10k_wow wow; + /* NAPI */ + struct net_device napi_dev; + struct napi_struct napi; + /* must be last */ u8 drv_priv[0] __aligned(sizeof(void *)); }; diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c index 8f0fd41dfd4b..832da6ed9f13 100644 --- a/drivers/net/wireless/ath/ath10k/debug.c +++ b/drivers/net/wireless/ath/ath10k/debug.c @@ -1228,9 +1228,9 @@ static ssize_t ath10k_read_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; unsigned int len; - char buf[64]; + char buf[96]; - len = scnprintf(buf, sizeof(buf), "0x%08x %u\n", + len = scnprintf(buf, sizeof(buf), "0x%16llx %u\n", ar->debug.fw_dbglog_mask, ar->debug.fw_dbglog_level); return simple_read_from_buffer(user_buf, count, ppos, buf, len); @@ -1242,15 +1242,16 @@ static ssize_t ath10k_write_fw_dbglog(struct file *file, { struct ath10k *ar = file->private_data; int ret; - char buf[64]; - unsigned int log_level, mask; + char buf[96]; + unsigned int log_level; + u64 mask; simple_write_to_buffer(buf, sizeof(buf) - 1, ppos, user_buf, count); /* make sure that buf is null terminated */ buf[sizeof(buf) - 1] = 0; - ret = sscanf(buf, "%x %u", &mask, &log_level); + ret = sscanf(buf, "%llx %u", &mask, &log_level); if (!ret) return -EINVAL; diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c index 5b3c6bcf9598..175aae38c375 100644 --- a/drivers/net/wireless/ath/ath10k/htc.c +++ b/drivers/net/wireless/ath/ath10k/htc.c @@ -44,7 +44,7 @@ static struct sk_buff *ath10k_htc_build_tx_ctrl_skb(void *ar) skb_cb = ATH10K_SKB_CB(skb); memset(skb_cb, 0, sizeof(*skb_cb)); - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %p\n", __func__, skb); + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: skb %pK\n", __func__, skb); return skb; } @@ -62,7 +62,7 @@ static void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep, { struct ath10k *ar = ep->htc->ar; - ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %p\n", __func__, + ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__, ep->eid, skb); ath10k_htc_restore_tx_skb(ep->htc, skb); @@ -404,7 +404,7 @@ void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb) goto out; } - ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_HTC, "htc rx completion ep %d skb %pK\n", eid, skb); ep->ep_ops.ep_rx_complete(ar, skb); diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h index 430a83e142aa..0d2ed09f202b 100644 --- a/drivers/net/wireless/ath/ath10k/htt.h +++ b/drivers/net/wireless/ath/ath10k/htt.h @@ -595,7 +595,7 @@ enum htt_rx_mpdu_status { /* only accept EAPOL frames */ HTT_RX_IND_MPDU_STATUS_UNAUTH_PEER, HTT_RX_IND_MPDU_STATUS_OUT_OF_SYNC, - /* Non-data in promiscous mode */ + /* Non-data in promiscuous mode */ HTT_RX_IND_MPDU_STATUS_MGMT_CTRL, HTT_RX_IND_MPDU_STATUS_TKIP_MIC_ERR, HTT_RX_IND_MPDU_STATUS_DECRYPT_ERR, @@ -900,7 +900,7 @@ struct htt_rx_in_ord_ind { * Purpose: indicate how many 32-bit integers follow the message header * - NUM_CHARS * Bits 31:16 - * Purpose: indicate how many 8-bit charaters follow the series of integers + * Purpose: indicate how many 8-bit characters follow the series of integers */ struct htt_rx_test { u8 num_ints; @@ -1042,10 +1042,10 @@ struct htt_dbg_stats_wal_tx_stats { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_tx_timeout; /* wal pdev resets */ @@ -1665,7 +1665,6 @@ struct ath10k_htt { /* This is used to group tx/rx completions separately and process them * in batches to reduce cache stalls */ - struct tasklet_struct txrx_compl_task; struct sk_buff_head rx_compl_q; struct sk_buff_head rx_in_ord_compl_q; struct sk_buff_head tx_fetch_ind_q; @@ -1798,5 +1797,6 @@ int ath10k_htt_tx(struct ath10k_htt *htt, struct sk_buff *msdu); void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, struct sk_buff *skb); +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget); #endif diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index 24c8d65bcf34..0b4c1562420f 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -34,7 +34,6 @@ #define HTT_RX_RING_REFILL_RESCHED_MS 5 static int ath10k_htt_rx_get_csum_state(struct sk_buff *skb); -static void ath10k_htt_txrx_compl_task(unsigned long ptr); static struct sk_buff * ath10k_htt_rx_find_skb_paddr(struct ath10k *ar, u32 paddr) @@ -226,7 +225,6 @@ int ath10k_htt_rx_ring_refill(struct ath10k *ar) void ath10k_htt_rx_free(struct ath10k_htt *htt) { del_timer_sync(&htt->rx_ring.refill_retry_timer); - tasklet_kill(&htt->txrx_compl_task); skb_queue_purge(&htt->rx_compl_q); skb_queue_purge(&htt->rx_in_ord_compl_q); @@ -520,9 +518,6 @@ int ath10k_htt_rx_alloc(struct ath10k_htt *htt) skb_queue_head_init(&htt->tx_fetch_ind_q); atomic_set(&htt->num_mpdus_ready, 0); - tasklet_init(&htt->txrx_compl_task, ath10k_htt_txrx_compl_task, - (unsigned long)htt); - ath10k_dbg(ar, ATH10K_DBG_BOOT, "htt rx ring size %d fill_level %d\n", htt->rx_ring.size, htt->rx_ring.fill_level); return 0; @@ -931,7 +926,7 @@ static void ath10k_process_rx(struct ath10k *ar, *status = *rx_status; ath10k_dbg(ar, ATH10K_DBG_DATA, - "rx skb %p len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", + "rx skb %pK len %u peer %pM %s %s sn %u %s%s%s%s%s %srate_idx %u vht_nss %u freq %u band %u flag 0x%llx fcs-err %i mic-err %i amsdu-more %i\n", skb, skb->len, ieee80211_get_SA(hdr), @@ -958,7 +953,7 @@ static void ath10k_process_rx(struct ath10k *ar, trace_ath10k_rx_hdr(ar, skb->data, skb->len); trace_ath10k_rx_payload(ar, skb->data, skb->len); - ieee80211_rx(ar->hw, skb); + ieee80211_rx_napi(ar->hw, NULL, skb, &ar->napi); } static int ath10k_htt_rx_nwifi_hdrlen(struct ath10k *ar, @@ -1056,9 +1051,11 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, const u8 first_hdr[64]) { struct ieee80211_hdr *hdr; + struct htt_rx_desc *rxd; size_t hdr_len; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; /* Delivered decapped frame: * [nwifi 802.11 header] <-- replaced with 802.11 hdr @@ -1072,19 +1069,12 @@ static void ath10k_htt_rx_h_undecap_nwifi(struct ath10k *ar, */ /* pull decapped header and copy SA & DA */ - if ((ar->hw_params.hw_4addr_pad == ATH10K_HW_4ADDR_PAD_BEFORE) && - ieee80211_has_a4(((struct ieee80211_hdr *)first_hdr)->frame_control)) { - /* The QCA99X0 4 address mode pad 2 bytes at the - * beginning of MSDU - */ - hdr = (struct ieee80211_hdr *)(msdu->data + 2); - /* The skb length need be extended 2 as the 2 bytes at the tail - * be excluded due to the padding - */ - skb_put(msdu, 2); - } else { - hdr = (struct ieee80211_hdr *)(msdu->data); - } + rxd = (void *)msdu->data - sizeof(*rxd); + + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + + hdr = (struct ieee80211_hdr *)(msdu->data + l3_pad_bytes); hdr_len = ath10k_htt_rx_nwifi_hdrlen(ar, hdr); ether_addr_copy(da, ieee80211_get_DA(hdr)); @@ -1113,6 +1103,7 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, size_t hdr_len, crypto_len; void *rfc1042; bool is_first, is_last, is_amsdu; + int bytes_aligned = ar->hw_params.decap_align_bytes; rxd = (void *)msdu->data - sizeof(*rxd); hdr = (void *)rxd->rx_hdr_status; @@ -1129,8 +1120,8 @@ static void *ath10k_htt_rx_h_find_rfc1042(struct ath10k *ar, hdr_len = ieee80211_hdrlen(hdr->frame_control); crypto_len = ath10k_htt_rx_crypto_param_len(ar, enctype); - rfc1042 += round_up(hdr_len, 4) + - round_up(crypto_len, 4); + rfc1042 += round_up(hdr_len, bytes_aligned) + + round_up(crypto_len, bytes_aligned); } if (is_amsdu) @@ -1151,6 +1142,8 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, void *rfc1042; u8 da[ETH_ALEN]; u8 sa[ETH_ALEN]; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [eth header] <-- replaced with 802.11 hdr & rfc1042/llc @@ -1161,6 +1154,11 @@ static void ath10k_htt_rx_h_undecap_eth(struct ath10k *ar, if (WARN_ON_ONCE(!rfc1042)) return; + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, l3_pad_bytes); + /* pull decapped header and copy SA & DA */ eth = (struct ethhdr *)msdu->data; ether_addr_copy(da, eth->h_dest); @@ -1191,6 +1189,8 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, { struct ieee80211_hdr *hdr; size_t hdr_len; + int l3_pad_bytes; + struct htt_rx_desc *rxd; /* Delivered decapped frame: * [amsdu header] <-- replaced with 802.11 hdr @@ -1198,7 +1198,11 @@ static void ath10k_htt_rx_h_undecap_snap(struct ath10k *ar, * [payload] */ - skb_pull(msdu, sizeof(struct amsdu_subframe_hdr)); + rxd = (void *)msdu->data - sizeof(*rxd); + l3_pad_bytes = ath10k_rx_desc_get_l3_pad_bytes(&ar->hw_params, rxd); + + skb_put(msdu, l3_pad_bytes); + skb_pull(msdu, sizeof(struct amsdu_subframe_hdr) + l3_pad_bytes); hdr = (struct ieee80211_hdr *)first_hdr; hdr_len = ieee80211_hdrlen(hdr->frame_control); @@ -1527,7 +1531,7 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) struct ath10k *ar = htt->ar; struct ieee80211_rx_status *rx_status = &htt->rx_status; struct sk_buff_head amsdu; - int ret; + int ret, num_msdus; __skb_queue_head_init(&amsdu); @@ -1549,13 +1553,14 @@ static int ath10k_htt_rx_handle_amsdu(struct ath10k_htt *htt) return ret; } + num_msdus = skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, rx_status, 0xffff); ath10k_htt_rx_h_unchain(ar, &amsdu, ret > 0); ath10k_htt_rx_h_filter(ar, &amsdu, rx_status); ath10k_htt_rx_h_mpdu(ar, &amsdu, rx_status); ath10k_htt_rx_h_deliver(ar, &amsdu, rx_status); - return 0; + return num_msdus; } static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, @@ -1579,15 +1584,6 @@ static void ath10k_htt_rx_proc_rx_ind(struct ath10k_htt *htt, mpdu_count += mpdu_ranges[i].mpdu_count; atomic_add(mpdu_count, &htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); -} - -static void ath10k_htt_rx_frag_handler(struct ath10k_htt *htt) -{ - atomic_inc(&htt->num_mpdus_ready); - - tasklet_schedule(&htt->txrx_compl_task); } static void ath10k_htt_rx_tx_compl_ind(struct ath10k *ar, @@ -1772,14 +1768,15 @@ static void ath10k_htt_rx_h_rx_offload_prot(struct ieee80211_rx_status *status, RX_FLAG_MMIC_STRIPPED; } -static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, - struct sk_buff_head *list) +static int ath10k_htt_rx_h_rx_offload(struct ath10k *ar, + struct sk_buff_head *list) { struct ath10k_htt *htt = &ar->htt; struct ieee80211_rx_status *status = &htt->rx_status; struct htt_rx_offload_msdu *rx; struct sk_buff *msdu; size_t offset; + int num_msdu = 0; while ((msdu = __skb_dequeue(list))) { /* Offloaded frames don't have Rx descriptor. Instead they have @@ -1819,10 +1816,12 @@ static void ath10k_htt_rx_h_rx_offload(struct ath10k *ar, ath10k_htt_rx_h_rx_offload_prot(status, msdu); ath10k_htt_rx_h_channel(ar, status, NULL, rx->vdev_id); ath10k_process_rx(ar, status, msdu); + num_msdu++; } + return num_msdu; } -static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) +static int ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) { struct ath10k_htt *htt = &ar->htt; struct htt_resp *resp = (void *)skb->data; @@ -1835,12 +1834,12 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) u8 tid; bool offload; bool frag; - int ret; + int ret, num_msdus = 0; lockdep_assert_held(&htt->rx_ring.lock); if (htt->rx_confused) - return; + return -EIO; skb_pull(skb, sizeof(resp->hdr)); skb_pull(skb, sizeof(resp->rx_in_ord_ind)); @@ -1859,7 +1858,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (skb->len < msdu_count * sizeof(*resp->rx_in_ord_ind.msdu_descs)) { ath10k_warn(ar, "dropping invalid in order rx indication\n"); - return; + return -EINVAL; } /* The event can deliver more than 1 A-MSDU. Each A-MSDU is later @@ -1870,14 +1869,14 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) if (ret < 0) { ath10k_warn(ar, "failed to pop paddr list: %d\n", ret); htt->rx_confused = true; - return; + return -EIO; } /* Offloaded frames are very different and need to be handled * separately. */ if (offload) - ath10k_htt_rx_h_rx_offload(ar, &list); + num_msdus = ath10k_htt_rx_h_rx_offload(ar, &list); while (!skb_queue_empty(&list)) { __skb_queue_head_init(&amsdu); @@ -1890,6 +1889,7 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) * better to report something than nothing though. This * should still give an idea about rx rate to the user. */ + num_msdus += skb_queue_len(&amsdu); ath10k_htt_rx_h_ppdu(ar, &amsdu, status, vdev_id); ath10k_htt_rx_h_filter(ar, &amsdu, status); ath10k_htt_rx_h_mpdu(ar, &amsdu, status); @@ -1902,9 +1902,10 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to extract amsdu: %d\n", ret); htt->rx_confused = true; __skb_queue_purge(&list); - return; + return -EIO; } } + return num_msdus; } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, @@ -2267,7 +2268,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) } case HTT_T2H_MSG_TYPE_TX_COMPL_IND: ath10k_htt_rx_tx_compl_ind(htt->ar, skb); - tasklet_schedule(&htt->txrx_compl_task); break; case HTT_T2H_MSG_TYPE_SEC_IND: { struct ath10k *ar = htt->ar; @@ -2284,7 +2284,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) case HTT_T2H_MSG_TYPE_RX_FRAG_IND: { ath10k_dbg_dump(ar, ATH10K_DBG_HTT_DUMP, NULL, "htt event: ", skb->data, skb->len); - ath10k_htt_rx_frag_handler(htt); + atomic_inc(&htt->num_mpdus_ready); break; } case HTT_T2H_MSG_TYPE_TEST: @@ -2320,8 +2320,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } case HTT_T2H_MSG_TYPE_RX_IN_ORD_PADDR_IND: { - skb_queue_tail(&htt->rx_in_ord_compl_q, skb); - tasklet_schedule(&htt->txrx_compl_task); + __skb_queue_tail(&htt->rx_in_ord_compl_q, skb); return false; } case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: @@ -2347,7 +2346,6 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb) break; } skb_queue_tail(&htt->tx_fetch_ind_q, tx_fetch_ind); - tasklet_schedule(&htt->txrx_compl_task); break; } case HTT_T2H_MSG_TYPE_TX_FETCH_CONFIRM: @@ -2376,27 +2374,77 @@ void ath10k_htt_rx_pktlog_completion_handler(struct ath10k *ar, } EXPORT_SYMBOL(ath10k_htt_rx_pktlog_completion_handler); -static void ath10k_htt_txrx_compl_task(unsigned long ptr) +int ath10k_htt_txrx_compl_task(struct ath10k *ar, int budget) { - struct ath10k_htt *htt = (struct ath10k_htt *)ptr; - struct ath10k *ar = htt->ar; + struct ath10k_htt *htt = &ar->htt; struct htt_tx_done tx_done = {}; - struct sk_buff_head rx_ind_q; struct sk_buff_head tx_ind_q; struct sk_buff *skb; unsigned long flags; - int num_mpdus; + int quota = 0, done, num_rx_msdus; + bool resched_napi = false; - __skb_queue_head_init(&rx_ind_q); __skb_queue_head_init(&tx_ind_q); - spin_lock_irqsave(&htt->rx_in_ord_compl_q.lock, flags); - skb_queue_splice_init(&htt->rx_in_ord_compl_q, &rx_ind_q); - spin_unlock_irqrestore(&htt->rx_in_ord_compl_q.lock, flags); + /* Since in-ord-ind can deliver more than 1 A-MSDU in single event, + * process it first to utilize full available quota. + */ + while (quota < budget) { + if (skb_queue_empty(&htt->rx_in_ord_compl_q)) + break; - spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); - skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); - spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + skb = __skb_dequeue(&htt->rx_in_ord_compl_q); + if (!skb) { + resched_napi = true; + goto exit; + } + + spin_lock_bh(&htt->rx_ring.lock); + num_rx_msdus = ath10k_htt_rx_in_ord_ind(ar, skb); + spin_unlock_bh(&htt->rx_ring.lock); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + dev_kfree_skb_any(skb); + if (num_rx_msdus > 0) + quota += num_rx_msdus; + + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + !skb_queue_empty(&htt->rx_in_ord_compl_q)) { + resched_napi = true; + goto exit; + } + } + + while (quota < budget) { + /* no more data to receive */ + if (!atomic_read(&htt->num_mpdus_ready)) + break; + + num_rx_msdus = ath10k_htt_rx_handle_amsdu(htt); + if (num_rx_msdus < 0) { + resched_napi = true; + goto exit; + } + + quota += num_rx_msdus; + atomic_dec(&htt->num_mpdus_ready); + if ((quota > ATH10K_NAPI_QUOTA_LIMIT) && + atomic_read(&htt->num_mpdus_ready)) { + resched_napi = true; + goto exit; + } + } + + /* From NAPI documentation: + * The napi poll() function may also process TX completions, in which + * case if it processes the entire TX ring then it should count that + * work as the rest of the budget. + */ + if ((quota < budget) && !kfifo_is_empty(&htt->txdone_fifo)) + quota = budget; /* kfifo_get: called only within txrx_tasklet so it's neatly serialized. * From kfifo_get() documentation: @@ -2406,27 +2454,24 @@ static void ath10k_htt_txrx_compl_task(unsigned long ptr) while (kfifo_get(&htt->txdone_fifo, &tx_done)) ath10k_txrx_tx_unref(htt, &tx_done); + ath10k_mac_tx_push_pending(ar); + + spin_lock_irqsave(&htt->tx_fetch_ind_q.lock, flags); + skb_queue_splice_init(&htt->tx_fetch_ind_q, &tx_ind_q); + spin_unlock_irqrestore(&htt->tx_fetch_ind_q.lock, flags); + while ((skb = __skb_dequeue(&tx_ind_q))) { ath10k_htt_rx_tx_fetch_ind(ar, skb); dev_kfree_skb_any(skb); } - num_mpdus = atomic_read(&htt->num_mpdus_ready); - - while (num_mpdus) { - if (ath10k_htt_rx_handle_amsdu(htt)) - break; - - num_mpdus--; - atomic_dec(&htt->num_mpdus_ready); - } - - while ((skb = __skb_dequeue(&rx_ind_q))) { - spin_lock_bh(&htt->rx_ring.lock); - ath10k_htt_rx_in_ord_ind(ar, skb); - spin_unlock_bh(&htt->rx_ring.lock); - dev_kfree_skb_any(skb); - } - +exit: ath10k_htt_rx_msdu_buff_replenish(htt); + /* In case of rx failure or more data to read, report budget + * to reschedule NAPI poll + */ + done = resched_napi ? budget : quota; + + return done; } +EXPORT_SYMBOL(ath10k_htt_txrx_compl_task); diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c index 7c072b605bc7..ae5b33fe5ba8 100644 --- a/drivers/net/wireless/ath/ath10k/htt_tx.c +++ b/drivers/net/wireless/ath/ath10k/htt_tx.c @@ -390,8 +390,6 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt) { int size; - tasklet_kill(&htt->txrx_compl_task); - idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar); idr_destroy(&htt->pending_tx); diff --git a/drivers/net/wireless/ath/ath10k/hw.c b/drivers/net/wireless/ath/ath10k/hw.c index f903d468dbe6..675e75d66db2 100644 --- a/drivers/net/wireless/ath/ath10k/hw.c +++ b/drivers/net/wireless/ath/ath10k/hw.c @@ -85,7 +85,7 @@ const struct ath10k_hw_regs qca99x0_regs = { .ce7_base_address = 0x0004bc00, /* Note: qca99x0 supports upto 12 Copy Engines. Other than address of * CE0 and CE1 no other copy engine is directly referred in the code. - * It is not really neccessary to assign address for newly supported + * It is not really necessary to assign address for newly supported * CEs in this address table. * Copy Engine Address * CE8 0x0004c000 @@ -219,3 +219,16 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, survey->time = CCNT_TO_MSEC(ar, cc); survey->time_busy = CCNT_TO_MSEC(ar, rcc); } + +const struct ath10k_hw_ops qca988x_ops = { +}; + +static int ath10k_qca99x0_rx_desc_get_l3_pad_bytes(struct htt_rx_desc *rxd) +{ + return MS(__le32_to_cpu(rxd->msdu_end.qca99x0.info1), + RX_MSDU_END_INFO1_L3_HDR_PAD); +} + +const struct ath10k_hw_ops qca99x0_ops = { + .rx_desc_get_l3_pad_bytes = ath10k_qca99x0_rx_desc_get_l3_pad_bytes, +}; diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h index e014cd732a0d..6038b7486f1d 100644 --- a/drivers/net/wireless/ath/ath10k/hw.h +++ b/drivers/net/wireless/ath/ath10k/hw.h @@ -284,7 +284,7 @@ void ath10k_hw_fill_survey_time(struct ath10k *ar, struct survey_info *survey, #define QCA_REV_9377(ar) ((ar)->hw_rev == ATH10K_HW_QCA9377) #define QCA_REV_40XX(ar) ((ar)->hw_rev == ATH10K_HW_QCA4019) -/* Known pecularities: +/* Known peculiarities: * - raw appears in nwifi decap, raw and nwifi appear in ethernet decap * - raw have FCS, nwifi doesn't * - ethernet frames have 802.11 header decapped and parts (base hdr, cipher @@ -338,11 +338,6 @@ enum ath10k_hw_rate_rev2_cck { ATH10K_HW_RATE_REV2_CCK_SP_11M, }; -enum ath10k_hw_4addr_pad { - ATH10K_HW_4ADDR_PAD_AFTER, - ATH10K_HW_4ADDR_PAD_BEFORE, -}; - enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_DISABLED = 0, @@ -363,6 +358,80 @@ enum ath10k_hw_cc_wraparound_type { ATH10K_HW_CC_WRAP_SHIFTED_EACH = 2, }; +struct ath10k_hw_params { + u32 id; + u16 dev_id; + const char *name; + u32 patch_load_addr; + int uart_pin; + u32 otp_exe_param; + + /* Type of hw cycle counter wraparound logic, for more info + * refer enum ath10k_hw_cc_wraparound_type. + */ + enum ath10k_hw_cc_wraparound_type cc_wraparound_type; + + /* Some of chip expects fragment descriptor to be continuous + * memory for any TX operation. Set continuous_frag_desc flag + * for the hardware which have such requirement. + */ + bool continuous_frag_desc; + + /* CCK hardware rate table mapping for the newer chipsets + * like QCA99X0, QCA4019 got revised. The CCK h/w rate values + * are in a proper order with respect to the rate/preamble + */ + bool cck_rate_map_rev2; + + u32 channel_counters_freq_hz; + + /* Mgmt tx descriptors threshold for limiting probe response + * frames. + */ + u32 max_probe_resp_desc_thres; + + u32 tx_chain_mask; + u32 rx_chain_mask; + u32 max_spatial_stream; + u32 cal_data_len; + + struct ath10k_hw_params_fw { + const char *dir; + const char *board; + size_t board_size; + size_t board_ext_size; + } fw; + + /* qca99x0 family chips deliver broadcast/multicast management + * frames encrypted and expect software do decryption. + */ + bool sw_decrypt_mcast_mgmt; + + const struct ath10k_hw_ops *hw_ops; + + /* Number of bytes used for alignment in rx_hdr_status of rx desc. */ + int decap_align_bytes; +}; + +struct htt_rx_desc; + +/* Defines needed for Rx descriptor abstraction */ +struct ath10k_hw_ops { + int (*rx_desc_get_l3_pad_bytes)(struct htt_rx_desc *rxd); +}; + +extern const struct ath10k_hw_ops qca988x_ops; +extern const struct ath10k_hw_ops qca99x0_ops; + +static inline int +ath10k_rx_desc_get_l3_pad_bytes(struct ath10k_hw_params *hw, + struct htt_rx_desc *rxd) +{ + if (hw->hw_ops->rx_desc_get_l3_pad_bytes) + return hw->hw_ops->rx_desc_get_l3_pad_bytes(rxd); + return 0; +} + /* Target specific defines for MAIN firmware */ #define TARGET_NUM_VDEVS 8 #define TARGET_NUM_PEER_AST 2 diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 0bbd0a00edcc..76297d69f1ed 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -824,7 +824,7 @@ static void ath10k_peer_cleanup(struct ath10k *ar, u32 vdev_id) */ for (i = 0; i < ARRAY_SIZE(ar->peer_map); i++) { if (ar->peer_map[i] == peer) { - ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %p idx %d)\n", + ath10k_warn(ar, "removing stale peer_map entry for %pM (ptr %pK idx %d)\n", peer->addr, peer, i); ar->peer_map[i] = NULL; } @@ -2793,7 +2793,7 @@ static void ath10k_bss_disassoc(struct ieee80211_hw *hw, ret = ath10k_wmi_vdev_down(ar, arvif->vdev_id); if (ret) - ath10k_warn(ar, "faield to down vdev %i: %d\n", + ath10k_warn(ar, "failed to down vdev %i: %d\n", arvif->vdev_id, ret); arvif->def_wep_key_idx = -1; @@ -3255,6 +3255,8 @@ ath10k_mac_tx_h_get_txmode(struct ath10k *ar, if (ar->htt.target_version_major < 3 && (ieee80211_is_nullfunc(fc) || ieee80211_is_qos_nullfunc(fc)) && !test_bit(ATH10K_FW_FEATURE_HAS_WMI_MGMT_TX, + ar->running_fw->fw_file.fw_features) && + !test_bit(ATH10K_FW_FEATURE_SKIP_NULL_FUNC_WAR, ar->running_fw->fw_file.fw_features)) return ATH10K_HW_TXRX_MGMT; @@ -3524,7 +3526,7 @@ static int ath10k_mac_tx(struct ath10k *ar, if (info->flags & IEEE80211_TX_CTL_TX_OFFCHAN) { if (!ath10k_mac_tx_frm_has_freq(ar)) { - ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "queued offchannel skb %pK\n", skb); skb_queue_tail(&ar->offchan_tx_queue, skb); @@ -3586,7 +3588,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) mutex_lock(&ar->conf_mutex); - ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %p\n", + ath10k_dbg(ar, ATH10K_DBG_MAC, "mac offchannel skb %pK\n", skb); hdr = (struct ieee80211_hdr *)skb->data; @@ -3643,7 +3645,7 @@ void ath10k_offchan_tx_work(struct work_struct *work) time_left = wait_for_completion_timeout(&ar->offchan_tx_completed, 3 * HZ); if (time_left == 0) - ath10k_warn(ar, "timed out waiting for offchannel skb %p\n", + ath10k_warn(ar, "timed out waiting for offchannel skb %pK\n", skb); if (!peer && tmp_peer_created) { @@ -3777,7 +3779,9 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, enum ath10k_hw_txrx_mode txmode; enum ath10k_mac_tx_path txpath; struct sk_buff *skb; + struct ieee80211_hdr *hdr; size_t skb_len; + bool is_mgmt, is_presp; int ret; spin_lock_bh(&ar->htt.tx_lock); @@ -3801,6 +3805,22 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, skb_len = skb->len; txmode = ath10k_mac_tx_h_get_txmode(ar, vif, sta, skb); txpath = ath10k_mac_tx_h_get_txpath(ar, skb, txmode); + is_mgmt = (txpath == ATH10K_MAC_TX_HTT_MGMT); + + if (is_mgmt) { + hdr = (struct ieee80211_hdr *)skb->data; + is_presp = ieee80211_is_probe_resp(hdr->frame_control); + + spin_lock_bh(&ar->htt.tx_lock); + ret = ath10k_htt_tx_mgmt_inc_pending(htt, is_mgmt, is_presp); + + if (ret) { + ath10k_htt_tx_dec_pending(htt); + spin_unlock_bh(&ar->htt.tx_lock); + return ret; + } + spin_unlock_bh(&ar->htt.tx_lock); + } ret = ath10k_mac_tx(ar, vif, sta, txmode, txpath, skb); if (unlikely(ret)) { @@ -3808,6 +3828,8 @@ int ath10k_mac_tx_push_txq(struct ieee80211_hw *hw, spin_lock_bh(&ar->htt.tx_lock); ath10k_htt_tx_dec_pending(htt); + if (is_mgmt) + ath10k_htt_tx_mgmt_dec_pending(htt); spin_unlock_bh(&ar->htt.tx_lock); return ret; @@ -3894,7 +3916,7 @@ void __ath10k_scan_finish(struct ath10k *ar) ar->scan.roc_freq = 0; ath10k_offchan_tx_purge(ar); cancel_delayed_work(&ar->scan.timeout); - complete_all(&ar->scan.completed); + complete(&ar->scan.completed); break; } } @@ -4100,13 +4122,29 @@ static void ath10k_mac_op_wake_tx_queue(struct ieee80211_hw *hw, { struct ath10k *ar = hw->priv; struct ath10k_txq *artxq = (void *)txq->drv_priv; + struct ieee80211_txq *f_txq; + struct ath10k_txq *f_artxq; + int ret = 0; + int max = 16; spin_lock_bh(&ar->txqs_lock); if (list_empty(&artxq->list)) list_add_tail(&artxq->list, &ar->txqs); + + f_artxq = list_first_entry(&ar->txqs, struct ath10k_txq, list); + f_txq = container_of((void *)f_artxq, struct ieee80211_txq, drv_priv); + list_del_init(&f_artxq->list); + + while (ath10k_mac_tx_can_push(hw, f_txq) && max--) { + ret = ath10k_mac_tx_push_txq(hw, f_txq); + if (ret) + break; + } + if (ret != -ENOENT) + list_add_tail(&f_artxq->list, &ar->txqs); spin_unlock_bh(&ar->txqs_lock); - ath10k_mac_tx_push_pending(ar); + ath10k_htt_tx_txq_update(hw, f_txq); ath10k_htt_tx_txq_update(hw, txq); } @@ -5186,7 +5224,7 @@ static void ath10k_configure_filter(struct ieee80211_hw *hw, ret = ath10k_monitor_recalc(ar); if (ret) - ath10k_warn(ar, "failed to recalc montior: %d\n", ret); + ath10k_warn(ar, "failed to recalc monitor: %d\n", ret); mutex_unlock(&ar->conf_mutex); } @@ -5984,8 +6022,8 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, * Existing station deletion. */ ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac vdev %d peer delete %pM (sta gone)\n", - arvif->vdev_id, sta->addr); + "mac vdev %d peer delete %pM sta %pK (sta gone)\n", + arvif->vdev_id, sta->addr, sta); ret = ath10k_peer_delete(ar, arvif->vdev_id, sta->addr); if (ret) @@ -6001,7 +6039,7 @@ static int ath10k_sta_state(struct ieee80211_hw *hw, continue; if (peer->sta == sta) { - ath10k_warn(ar, "found sta peer %pM (ptr %p id %d) entry on vdev %i after it was supposedly removed\n", + ath10k_warn(ar, "found sta peer %pM (ptr %pK id %d) entry on vdev %i after it was supposedly removed\n", sta->addr, peer, i, arvif->vdev_id); peer->sta = NULL; @@ -6538,7 +6576,7 @@ static int ath10k_get_survey(struct ieee80211_hw *hw, int idx, goto exit; } - ath10k_mac_update_bss_chan_survey(ar, survey->channel); + ath10k_mac_update_bss_chan_survey(ar, &sband->channels[idx]); spin_lock_bh(&ar->data_lock); memcpy(survey, ar_survey, sizeof(*survey)); @@ -7134,7 +7172,7 @@ ath10k_mac_op_add_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx add freq %hu width %d ptr %p\n", + "mac chanctx add freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7158,7 +7196,7 @@ ath10k_mac_op_remove_chanctx(struct ieee80211_hw *hw, struct ath10k *ar = hw->priv; ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx remove freq %hu width %d ptr %p\n", + "mac chanctx remove freq %hu width %d ptr %pK\n", ctx->def.chan->center_freq, ctx->def.width, ctx); mutex_lock(&ar->conf_mutex); @@ -7223,7 +7261,7 @@ ath10k_mac_op_change_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx change freq %hu width %d ptr %p changed %x\n", + "mac chanctx change freq %hu width %d ptr %pK changed %x\n", ctx->def.chan->center_freq, ctx->def.width, ctx, changed); /* This shouldn't really happen because channel switching should use @@ -7281,7 +7319,7 @@ ath10k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx assign ptr %p vdev_id %i\n", + "mac chanctx assign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); if (WARN_ON(arvif->is_started)) { @@ -7342,7 +7380,7 @@ ath10k_mac_op_unassign_vif_chanctx(struct ieee80211_hw *hw, mutex_lock(&ar->conf_mutex); ath10k_dbg(ar, ATH10K_DBG_MAC, - "mac chanctx unassign ptr %p vdev_id %i\n", + "mac chanctx unassign ptr %pK vdev_id %i\n", ctx, arvif->vdev_id); WARN_ON(!arvif->is_started); diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c index 07933c51a850..0457e315d336 100644 --- a/drivers/net/wireless/ath/ath10k/pci.c +++ b/drivers/net/wireless/ath/ath10k/pci.c @@ -1506,12 +1506,10 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe, ath10k_ce_per_engine_service(ar, pipe); } -void ath10k_pci_kill_tasklet(struct ath10k *ar) +static void ath10k_pci_rx_retry_sync(struct ath10k *ar) { struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - tasklet_kill(&ar_pci->intr_tq); - del_timer_sync(&ar_pci->rx_post_retry); } @@ -1570,7 +1568,7 @@ void ath10k_pci_hif_get_default_pipe(struct ath10k *ar, ul_pipe, dl_pipe); } -static void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar) { u32 val; @@ -1693,14 +1691,12 @@ static void ath10k_pci_rx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) static void ath10k_pci_tx_pipe_cleanup(struct ath10k_pci_pipe *pci_pipe) { struct ath10k *ar; - struct ath10k_pci *ar_pci; struct ath10k_ce_pipe *ce_pipe; struct ath10k_ce_ring *ce_ring; struct sk_buff *skb; int i; ar = pci_pipe->hif_ce_state; - ar_pci = ath10k_pci_priv(ar); ce_pipe = pci_pipe->ce_hdl; ce_ring = ce_pipe->src_ring; @@ -1753,7 +1749,7 @@ void ath10k_pci_ce_deinit(struct ath10k *ar) void ath10k_pci_flush(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); ath10k_pci_buffer_cleanup(ar); } @@ -1780,6 +1776,8 @@ static void ath10k_pci_hif_stop(struct ath10k *ar) ath10k_pci_irq_disable(ar); ath10k_pci_irq_sync(ar); ath10k_pci_flush(ar); + napi_synchronize(&ar->napi); + napi_disable(&ar->napi); spin_lock_irqsave(&ar_pci->ps_lock, flags); WARN_ON(ar_pci->ps_wake_refcount > 0); @@ -2533,6 +2531,7 @@ static int ath10k_pci_hif_power_up(struct ath10k *ar) ath10k_err(ar, "could not wake up target CPU: %d\n", ret); goto err_ce; } + napi_enable(&ar->napi); return 0; @@ -2725,7 +2724,7 @@ static int ath10k_pci_hif_fetch_cal_eeprom(struct ath10k *ar, void **data, return 0; err_free: - kfree(data); + kfree(caldata); return -EINVAL; } @@ -2772,35 +2771,53 @@ static irqreturn_t ath10k_pci_interrupt_handler(int irq, void *arg) return IRQ_NONE; } - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) { - if (!ath10k_pci_irq_pending(ar)) - return IRQ_NONE; + if ((ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) && + !ath10k_pci_irq_pending(ar)) + return IRQ_NONE; - ath10k_pci_disable_and_clear_legacy_irq(ar); - } - - tasklet_schedule(&ar_pci->intr_tq); + ath10k_pci_disable_and_clear_legacy_irq(ar); + ath10k_pci_irq_msi_fw_mask(ar); + napi_schedule(&ar->napi); return IRQ_HANDLED; } -static void ath10k_pci_tasklet(unsigned long data) +static int ath10k_pci_napi_poll(struct napi_struct *ctx, int budget) { - struct ath10k *ar = (struct ath10k *)data; - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); + struct ath10k *ar = container_of(ctx, struct ath10k, napi); + int done = 0; if (ath10k_pci_has_fw_crashed(ar)) { - ath10k_pci_irq_disable(ar); ath10k_pci_fw_crashed_clear(ar); ath10k_pci_fw_crashed_dump(ar); - return; + napi_complete(ctx); + return done; } ath10k_ce_per_engine_service_any(ar); - /* Re-enable legacy irq that was disabled in the irq handler */ - if (ar_pci->oper_irq_mode == ATH10K_PCI_IRQ_LEGACY) + done = ath10k_htt_txrx_compl_task(ar, budget); + + if (done < budget) { + napi_complete(ctx); + /* In case of MSI, it is possible that interrupts are received + * while NAPI poll is inprogress. So pending interrupts that are + * received after processing all copy engine pipes by NAPI poll + * will not be handled again. This is causing failure to + * complete boot sequence in x86 platform. So before enabling + * interrupts safer to check for pending interrupts for + * immediate servicing. + */ + if (CE_INTERRUPT_SUMMARY(ar)) { + napi_reschedule(ctx); + goto out; + } ath10k_pci_enable_legacy_irq(ar); + ath10k_pci_irq_msi_fw_unmask(ar); + } + +out: + return done; } static int ath10k_pci_request_irq_msi(struct ath10k *ar) @@ -2858,11 +2875,10 @@ static void ath10k_pci_free_irq(struct ath10k *ar) free_irq(ar_pci->pdev->irq, ar); } -void ath10k_pci_init_irq_tasklets(struct ath10k *ar) +void ath10k_pci_init_napi(struct ath10k *ar) { - struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); - - tasklet_init(&ar_pci->intr_tq, ath10k_pci_tasklet, (unsigned long)ar); + netif_napi_add(&ar->napi_dev, &ar->napi, ath10k_pci_napi_poll, + ATH10K_NAPI_BUDGET); } static int ath10k_pci_init_irq(struct ath10k *ar) @@ -2870,7 +2886,7 @@ static int ath10k_pci_init_irq(struct ath10k *ar) struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); int ret; - ath10k_pci_init_irq_tasklets(ar); + ath10k_pci_init_napi(ar); if (ath10k_pci_irq_mode != ATH10K_PCI_IRQ_AUTO) ath10k_info(ar, "limiting irq mode to: %d\n", @@ -3062,7 +3078,7 @@ static int ath10k_pci_claim(struct ath10k *ar) goto err_master; } - ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%p\n", ar_pci->mem); + ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot pci_mem 0x%pK\n", ar_pci->mem); return 0; err_master: @@ -3131,7 +3147,8 @@ int ath10k_pci_setup_resource(struct ath10k *ar) void ath10k_pci_release_resource(struct ath10k *ar) { - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); + netif_napi_del(&ar->napi); ath10k_pci_ce_deinit(ar); ath10k_pci_free_pipes(ar); } @@ -3297,7 +3314,7 @@ static int ath10k_pci_probe(struct pci_dev *pdev, err_free_irq: ath10k_pci_free_irq(ar); - ath10k_pci_kill_tasklet(ar); + ath10k_pci_rx_retry_sync(ar); err_deinit_irq: ath10k_pci_deinit_irq(ar); diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h index 6eca1df2ce60..9854ad56b2de 100644 --- a/drivers/net/wireless/ath/ath10k/pci.h +++ b/drivers/net/wireless/ath/ath10k/pci.h @@ -177,8 +177,6 @@ struct ath10k_pci { /* Operating interrupt mode */ enum ath10k_pci_irq_mode oper_irq_mode; - struct tasklet_struct intr_tq; - struct ath10k_pci_pipe pipe_info[CE_COUNT_MAX]; /* Copy Engine used for Diagnostic Accesses */ @@ -294,8 +292,7 @@ void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_free_pipes(struct ath10k *ar); void ath10k_pci_rx_replenish_retry(unsigned long ptr); void ath10k_pci_ce_deinit(struct ath10k *ar); -void ath10k_pci_init_irq_tasklets(struct ath10k *ar); -void ath10k_pci_kill_tasklet(struct ath10k *ar); +void ath10k_pci_init_napi(struct ath10k *ar); int ath10k_pci_init_pipes(struct ath10k *ar); int ath10k_pci_init_config(struct ath10k *ar); void ath10k_pci_rx_post(struct ath10k *ar); @@ -303,6 +300,7 @@ void ath10k_pci_flush(struct ath10k *ar); void ath10k_pci_enable_legacy_irq(struct ath10k *ar); bool ath10k_pci_irq_pending(struct ath10k *ar); void ath10k_pci_disable_and_clear_legacy_irq(struct ath10k *ar); +void ath10k_pci_irq_msi_fw_mask(struct ath10k *ar); int ath10k_pci_wait_for_target_init(struct ath10k *ar); int ath10k_pci_setup_resource(struct ath10k *ar); void ath10k_pci_release_resource(struct ath10k *ar); diff --git a/drivers/net/wireless/ath/ath10k/swap.c b/drivers/net/wireless/ath/ath10k/swap.c index 0c5f5863dac8..adf4592374b4 100644 --- a/drivers/net/wireless/ath/ath10k/swap.c +++ b/drivers/net/wireless/ath/ath10k/swap.c @@ -134,17 +134,18 @@ ath10k_swap_code_seg_alloc(struct ath10k *ar, size_t swap_bin_len) return seg_info; } -int ath10k_swap_code_seg_configure(struct ath10k *ar) +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info = NULL; - if (!ar->swap.firmware_swap_code_seg_info) + if (!fw_file->firmware_swap_code_seg_info) return 0; ath10k_dbg(ar, ATH10K_DBG_BOOT, "boot found firmware code swap binary\n"); - seg_info = ar->swap.firmware_swap_code_seg_info; + seg_info = fw_file->firmware_swap_code_seg_info; ret = ath10k_bmi_write_memory(ar, seg_info->target_addr, &seg_info->seg_hw_info, @@ -158,28 +159,29 @@ int ath10k_swap_code_seg_configure(struct ath10k *ar) return 0; } -void ath10k_swap_code_seg_release(struct ath10k *ar) +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file) { - ath10k_swap_code_seg_free(ar, ar->swap.firmware_swap_code_seg_info); + ath10k_swap_code_seg_free(ar, fw_file->firmware_swap_code_seg_info); /* FIXME: these two assignments look to bein wrong place! Shouldn't * they be in ath10k_core_free_firmware_files() like the rest? */ - ar->normal_mode_fw.fw_file.codeswap_data = NULL; - ar->normal_mode_fw.fw_file.codeswap_len = 0; + fw_file->codeswap_data = NULL; + fw_file->codeswap_len = 0; - ar->swap.firmware_swap_code_seg_info = NULL; + fw_file->firmware_swap_code_seg_info = NULL; } -int ath10k_swap_code_seg_init(struct ath10k *ar) +int ath10k_swap_code_seg_init(struct ath10k *ar, struct ath10k_fw_file *fw_file) { int ret; struct ath10k_swap_code_seg_info *seg_info; const void *codeswap_data; size_t codeswap_len; - codeswap_data = ar->normal_mode_fw.fw_file.codeswap_data; - codeswap_len = ar->normal_mode_fw.fw_file.codeswap_len; + codeswap_data = fw_file->codeswap_data; + codeswap_len = fw_file->codeswap_len; if (!codeswap_len || !codeswap_data) return 0; @@ -200,7 +202,7 @@ int ath10k_swap_code_seg_init(struct ath10k *ar) return ret; } - ar->swap.firmware_swap_code_seg_info = seg_info; + fw_file->firmware_swap_code_seg_info = seg_info; return 0; } diff --git a/drivers/net/wireless/ath/ath10k/swap.h b/drivers/net/wireless/ath/ath10k/swap.h index 36991c7b07a0..f5dc0476493e 100644 --- a/drivers/net/wireless/ath/ath10k/swap.h +++ b/drivers/net/wireless/ath/ath10k/swap.h @@ -23,6 +23,8 @@ /* Currently only one swap segment is supported */ #define ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED 1 +struct ath10k_fw_file; + struct ath10k_swap_code_seg_tlv { __le32 address; __le32 length; @@ -58,8 +60,11 @@ struct ath10k_swap_code_seg_info { dma_addr_t paddr[ATH10K_SWAP_CODE_SEG_NUM_SUPPORTED]; }; -int ath10k_swap_code_seg_configure(struct ath10k *ar); -void ath10k_swap_code_seg_release(struct ath10k *ar); -int ath10k_swap_code_seg_init(struct ath10k *ar); +int ath10k_swap_code_seg_configure(struct ath10k *ar, + const struct ath10k_fw_file *fw_file); +void ath10k_swap_code_seg_release(struct ath10k *ar, + struct ath10k_fw_file *fw_file); +int ath10k_swap_code_seg_init(struct ath10k *ar, + struct ath10k_fw_file *fw_file); #endif diff --git a/drivers/net/wireless/ath/ath10k/targaddrs.h b/drivers/net/wireless/ath/ath10k/targaddrs.h index aaf53a81e78b..a47cab44d9c8 100644 --- a/drivers/net/wireless/ath/ath10k/targaddrs.h +++ b/drivers/net/wireless/ath/ath10k/targaddrs.h @@ -405,7 +405,7 @@ Fw Mode/SubMode Mask * 1. target firmware would check magic number and if it's a match, firmware * would consider the bits[0:15] are valid and base on that to calculate * the end of DRAM. Early allocation would be located at that area and - * may be reclaimed when necesary + * may be reclaimed when necessary * 2. if no magic number is found, early allocation would happen at "_end" * symbol of ROM which is located before the app-data and might NOT be * re-claimable. If this is adopted, link script should keep this in diff --git a/drivers/net/wireless/ath/ath10k/testmode.c b/drivers/net/wireless/ath/ath10k/testmode.c index 120f4234d3b0..ed85f938e3c0 100644 --- a/drivers/net/wireless/ath/ath10k/testmode.c +++ b/drivers/net/wireless/ath/ath10k/testmode.c @@ -23,6 +23,7 @@ #include "wmi.h" #include "hif.h" #include "hw.h" +#include "core.h" #include "testmode_i.h" @@ -45,7 +46,7 @@ bool ath10k_tm_event_wmi(struct ath10k *ar, u32 cmd_id, struct sk_buff *skb) int ret; ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode event wmi cmd_id %d skb %p skb->len %d\n", + "testmode event wmi cmd_id %d skb %pK skb->len %d\n", cmd_id, skb, skb->len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", skb->data, skb->len); @@ -240,6 +241,18 @@ static int ath10k_tm_cmd_utf_start(struct ath10k *ar, struct nlattr *tb[]) goto err; } + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) { + ret = ath10k_swap_code_seg_init(ar, + &ar->testmode.utf_mode_fw.fw_file); + if (ret) { + ath10k_warn(ar, + "failed to init utf code swap segment: %d\n", + ret); + goto err_release_utf_mode_fw; + } + } + spin_lock_bh(&ar->data_lock); ar->testmode.utf_monitor = true; spin_unlock_bh(&ar->data_lock); @@ -279,6 +292,11 @@ err_power_down: ath10k_hif_power_down(ar); err_release_utf_mode_fw: + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; @@ -301,6 +319,11 @@ static void __ath10k_tm_cmd_utf_stop(struct ath10k *ar) spin_unlock_bh(&ar->data_lock); + if (ar->testmode.utf_mode_fw.fw_file.codeswap_data && + ar->testmode.utf_mode_fw.fw_file.codeswap_len) + ath10k_swap_code_seg_release(ar, + &ar->testmode.utf_mode_fw.fw_file); + release_firmware(ar->testmode.utf_mode_fw.fw_file.firmware); ar->testmode.utf_mode_fw.fw_file.firmware = NULL; @@ -360,7 +383,7 @@ static int ath10k_tm_cmd_wmi(struct ath10k *ar, struct nlattr *tb[]) cmd_id = nla_get_u32(tb[ATH10K_TM_ATTR_WMI_CMDID]); ath10k_dbg(ar, ATH10K_DBG_TESTMODE, - "testmode cmd wmi cmd_id %d buf %p buf_len %d\n", + "testmode cmd wmi cmd_id %d buf %pK buf_len %d\n", cmd_id, buf, buf_len); ath10k_dbg_dump(ar, ATH10K_DBG_TESTMODE, NULL, "", buf, buf_len); diff --git a/drivers/net/wireless/ath/ath10k/thermal.c b/drivers/net/wireless/ath/ath10k/thermal.c index 444b52c7e4f3..0a47269be289 100644 --- a/drivers/net/wireless/ath/ath10k/thermal.c +++ b/drivers/net/wireless/ath/ath10k/thermal.c @@ -192,7 +192,7 @@ int ath10k_thermal_register(struct ath10k *ar) /* Avoid linking error on devm_hwmon_device_register_with_groups, I * guess linux/hwmon.h is missing proper stubs. */ - if (!config_enabled(CONFIG_HWMON)) + if (!IS_REACHABLE(CONFIG_HWMON)) return 0; hwmon_dev = devm_hwmon_device_register_with_groups(ar->dev, diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c index b29a86a26c13..9852c5d51139 100644 --- a/drivers/net/wireless/ath/ath10k/txrx.c +++ b/drivers/net/wireless/ath/ath10k/txrx.c @@ -44,7 +44,7 @@ static void ath10k_report_offchan_tx(struct ath10k *ar, struct sk_buff *skb) complete(&ar->offchan_tx_completed); ar->offchan_tx_skb = NULL; /* just for sanity */ - ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %p\n", skb); + ath10k_dbg(ar, ATH10K_DBG_HTT, "completed offchannel skb %pK\n", skb); out: spin_unlock_bh(&ar->data_lock); } @@ -119,8 +119,6 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt, ieee80211_tx_status(htt->ar->hw, msdu); /* we do not own the msdu anymore */ - ath10k_mac_tx_push_pending(ar); - return 0; } diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h index 64ebd304f907..c9a8bb1186f2 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-ops.h +++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h @@ -51,6 +51,8 @@ struct wmi_ops { struct wmi_roam_ev_arg *arg); int (*pull_wow_event)(struct ath10k *ar, struct sk_buff *skb, struct wmi_wow_ev_arg *arg); + int (*pull_echo_ev)(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg); enum wmi_txbf_conf (*get_txbf_conf_scheme)(struct ath10k *ar); struct sk_buff *(*gen_pdev_suspend)(struct ath10k *ar, u32 suspend_opt); @@ -123,7 +125,7 @@ struct wmi_ops { enum wmi_force_fw_hang_type type, u32 delay_ms); struct sk_buff *(*gen_mgmt_tx)(struct ath10k *ar, struct sk_buff *skb); - struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u32 module_enable, + struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable, u32 log_level); struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter); struct sk_buff *(*gen_pktlog_disable)(struct ath10k *ar); @@ -194,6 +196,7 @@ struct wmi_ops { struct sk_buff *(*gen_pdev_bss_chan_info_req) (struct ath10k *ar, enum wmi_bss_survey_req_type type); + struct sk_buff *(*gen_echo)(struct ath10k *ar, u32 value); }; int ath10k_wmi_cmd_send(struct ath10k *ar, struct sk_buff *skb, u32 cmd_id); @@ -349,6 +352,16 @@ ath10k_wmi_pull_wow_event(struct ath10k *ar, struct sk_buff *skb, return ar->wmi.ops->pull_wow_event(ar, skb, arg); } +static inline int +ath10k_wmi_pull_echo_ev(struct ath10k *ar, struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + if (!ar->wmi.ops->pull_echo_ev) + return -EOPNOTSUPP; + + return ar->wmi.ops->pull_echo_ev(ar, skb, arg); +} + static inline enum wmi_txbf_conf ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar) { @@ -932,7 +945,7 @@ ath10k_wmi_force_fw_hang(struct ath10k *ar, } static inline int -ath10k_wmi_dbglog_cfg(struct ath10k *ar, u32 module_enable, u32 log_level) +ath10k_wmi_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct sk_buff *skb; @@ -1382,4 +1395,20 @@ ath10k_wmi_pdev_bss_chan_info_request(struct ath10k *ar, wmi->cmd->pdev_bss_chan_info_request_cmdid); } +static inline int +ath10k_wmi_echo(struct ath10k *ar, u32 value) +{ + struct ath10k_wmi *wmi = &ar->wmi; + struct sk_buff *skb; + + if (!wmi->ops->gen_echo) + return -EOPNOTSUPP; + + skb = wmi->ops->gen_echo(ar, value); + if (IS_ERR(skb)) + return PTR_ERR(skb); + + return ath10k_wmi_cmd_send(ar, skb, wmi->cmd->echo_cmdid); +} + #endif diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c index e09337ee7c96..e64f59300a7c 100644 --- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c +++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c @@ -1223,6 +1223,33 @@ ath10k_wmi_tlv_op_pull_wow_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_tlv_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + const void **tb; + const struct wmi_echo_event *ev; + int ret; + + tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC); + if (IS_ERR(tb)) { + ret = PTR_ERR(tb); + ath10k_warn(ar, "failed to parse tlv: %d\n", ret); + return ret; + } + + ev = tb[WMI_TLV_TAG_STRUCT_ECHO_EVENT]; + if (!ev) { + kfree(tb); + return -EPROTO; + } + + arg->value = ev->value; + + kfree(tb); + return 0; +} + static struct sk_buff * ath10k_wmi_tlv_op_gen_pdev_suspend(struct ath10k *ar, u32 opt) { @@ -2441,7 +2468,7 @@ ath10k_wmi_tlv_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_tlv_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_tlv_dbglog_cmd *cmd; struct wmi_tlv *tlv; @@ -3081,6 +3108,34 @@ ath10k_wmi_tlv_op_gen_adaptive_qcs(struct ath10k *ar, bool enable) return skb; } +static struct sk_buff * +ath10k_wmi_tlv_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct wmi_tlv *tlv; + struct sk_buff *skb; + void *ptr; + size_t len; + + len = sizeof(*tlv) + sizeof(*cmd); + skb = ath10k_wmi_alloc_skb(ar, len); + if (!skb) + return ERR_PTR(-ENOMEM); + + ptr = (void *)skb->data; + tlv = ptr; + tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_ECHO_CMD); + tlv->len = __cpu_to_le16(sizeof(*cmd)); + cmd = (void *)tlv->value; + cmd->value = cpu_to_le32(value); + + ptr += sizeof(*tlv); + ptr += sizeof(*cmd); + + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv echo value 0x%08x\n", value); + return skb; +} + /****************/ /* TLV mappings */ /****************/ @@ -3429,6 +3484,7 @@ static const struct wmi_ops wmi_tlv_ops = { .pull_fw_stats = ath10k_wmi_tlv_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_tlv_op_pull_roam_ev, .pull_wow_event = ath10k_wmi_tlv_op_pull_wow_ev, + .pull_echo_ev = ath10k_wmi_tlv_op_pull_echo_ev, .get_txbf_conf_scheme = ath10k_wmi_tlv_txbf_conf_scheme, .gen_pdev_suspend = ath10k_wmi_tlv_op_gen_pdev_suspend, @@ -3485,6 +3541,7 @@ static const struct wmi_ops wmi_tlv_ops = { .gen_adaptive_qcs = ath10k_wmi_tlv_op_gen_adaptive_qcs, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_tlv_op_gen_echo, }; static const struct wmi_peer_flags_map wmi_tlv_peer_flags_map = { diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c index d2462886b75c..54df425bb0fc 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.c +++ b/drivers/net/wireless/ath/ath10k/wmi.c @@ -29,6 +29,9 @@ #include "p2p.h" #include "hw.h" +#define ATH10K_WMI_BARRIER_ECHO_ID 0xBA991E9 +#define ATH10K_WMI_BARRIER_TIMEOUT_HZ (3 * HZ) + /* MAIN WMI cmd track */ static struct wmi_cmd_map wmi_cmd_map = { .init_cmdid = WMI_INIT_CMDID, @@ -1874,7 +1877,7 @@ ath10k_wmi_op_gen_mgmt_tx(struct ath10k *ar, struct sk_buff *msdu) ether_addr_copy(cmd->hdr.peer_macaddr.addr, ieee80211_get_DA(hdr)); memcpy(cmd->buf, msdu->data, msdu->len); - ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %p len %d ftype %02x stype %02x\n", + ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi mgmt tx skb %pK len %d ftype %02x stype %02x\n", msdu, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); trace_ath10k_tx_hdr(ar, skb->data, skb->len); @@ -2240,6 +2243,29 @@ static int ath10k_wmi_10_4_op_pull_mgmt_rx_ev(struct ath10k *ar, return 0; } +static bool ath10k_wmi_rx_is_decrypted(struct ath10k *ar, + struct ieee80211_hdr *hdr) +{ + if (!ieee80211_has_protected(hdr->frame_control)) + return false; + + /* FW delivers WEP Shared Auth frame with Protected Bit set and + * encrypted payload. However in case of PMF it delivers decrypted + * frames with Protected Bit set. + */ + if (ieee80211_is_auth(hdr->frame_control)) + return false; + + /* qca99x0 based FW delivers broadcast or multicast management frames + * (ex: group privacy action frames in mesh) as encrypted payload. + */ + if (is_multicast_ether_addr(ieee80211_get_DA(hdr)) && + ar->hw_params.sw_decrypt_mcast_mgmt) + return false; + + return true; +} + int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_mgmt_rx_ev_arg arg = {}; @@ -2326,11 +2352,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_wmi_handle_wep_reauth(ar, skb, status); - /* FW delivers WEP Shared Auth frame with Protected Bit set and - * encrypted payload. However in case of PMF it delivers decrypted - * frames with Protected Bit set. */ - if (ieee80211_has_protected(hdr->frame_control) && - !ieee80211_is_auth(hdr->frame_control)) { + if (ath10k_wmi_rx_is_decrypted(ar, hdr)) { status->flag |= RX_FLAG_DECRYPTED; if (!ieee80211_is_action(hdr->frame_control) && @@ -2347,7 +2369,7 @@ int ath10k_wmi_event_mgmt_rx(struct ath10k *ar, struct sk_buff *skb) ath10k_mac_handle_beacon(ar, skb); ath10k_dbg(ar, ATH10K_DBG_MGMT, - "event mgmt rx skb %p len %d ftype %02x stype %02x\n", + "event mgmt rx skb %pK len %d ftype %02x stype %02x\n", skb, skb->len, fc & IEEE80211_FCTL_FTYPE, fc & IEEE80211_FCTL_STYPE); @@ -2495,7 +2517,21 @@ exit: void ath10k_wmi_event_echo(struct ath10k *ar, struct sk_buff *skb) { - ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_ECHO_EVENTID\n"); + struct wmi_echo_ev_arg arg = {}; + int ret; + + ret = ath10k_wmi_pull_echo_ev(ar, skb, &arg); + if (ret) { + ath10k_warn(ar, "failed to parse echo: %d\n", ret); + return; + } + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi event echo value 0x%08x\n", + le32_to_cpu(arg.value)); + + if (le32_to_cpu(arg.value) == ATH10K_WMI_BARRIER_ECHO_ID) + complete(&ar->wmi.barrier); } int ath10k_wmi_event_debug_mesg(struct ath10k *ar, struct sk_buff *skb) @@ -3478,6 +3514,12 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) continue; } + /* mac80211 would have already asked us to stop beaconing and + * bring the vdev down, so continue in that case + */ + if (!arvif->is_up) + continue; + /* There are no completions for beacons so wait for next SWBA * before telling mac80211 to decrement CSA counter * @@ -3527,7 +3569,6 @@ void ath10k_wmi_event_host_swba(struct ath10k *ar, struct sk_buff *skb) ath10k_warn(ar, "failed to map beacon: %d\n", ret); dev_kfree_skb_any(bcn); - ret = -EIO; goto skip; } @@ -4792,6 +4833,17 @@ static int ath10k_wmi_op_pull_roam_ev(struct ath10k *ar, struct sk_buff *skb, return 0; } +static int ath10k_wmi_op_pull_echo_ev(struct ath10k *ar, + struct sk_buff *skb, + struct wmi_echo_ev_arg *arg) +{ + struct wmi_echo_event *ev = (void *)skb->data; + + arg->value = ev->value; + + return 0; +} + int ath10k_wmi_event_ready(struct ath10k *ar, struct sk_buff *skb) { struct wmi_rdy_ev_arg arg = {}; @@ -5124,6 +5176,7 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_2_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5133,6 +5186,18 @@ static void ath10k_wmi_10_2_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_2_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_2_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); @@ -5248,6 +5313,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) { struct wmi_cmd_hdr *cmd_hdr; enum wmi_10_4_event_id id; + bool consumed; cmd_hdr = (struct wmi_cmd_hdr *)skb->data; id = MS(__le32_to_cpu(cmd_hdr->cmd_id), WMI_CMD_HDR_CMD_ID); @@ -5257,6 +5323,18 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) trace_ath10k_wmi_event(ar, id, skb->data, skb->len); + consumed = ath10k_tm_event_wmi(ar, id, skb); + + /* Ready event must be handled normally also in UTF mode so that we + * know the UTF firmware has booted, others we are just bypass WMI + * events to testmode. + */ + if (consumed && id != WMI_10_4_READY_EVENTID) { + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi testmode consumed 0x%x\n", id); + goto out; + } + switch (id) { case WMI_10_4_MGMT_RX_EVENTID: ath10k_wmi_event_mgmt_rx(ar, skb); @@ -5306,6 +5384,7 @@ static void ath10k_wmi_10_4_op_rx(struct ath10k *ar, struct sk_buff *skb) break; case WMI_10_4_WOW_WAKEUP_HOST_EVENTID: case WMI_10_4_PEER_RATECODE_LIST_EVENTID: + case WMI_10_4_WDS_PEER_EVENTID: ath10k_dbg(ar, ATH10K_DBG_WMI, "received event id %d not implemented\n", id); break; @@ -6863,7 +6942,7 @@ ath10k_wmi_op_gen_force_fw_hang(struct ath10k *ar, } static struct sk_buff * -ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, +ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, u32 log_level) { struct wmi_dbglog_cfg_cmd *cmd; @@ -6900,6 +6979,44 @@ ath10k_wmi_op_gen_dbglog_cfg(struct ath10k *ar, u32 module_enable, return skb; } +static struct sk_buff * +ath10k_wmi_10_4_op_gen_dbglog_cfg(struct ath10k *ar, u64 module_enable, + u32 log_level) +{ + struct wmi_10_4_dbglog_cfg_cmd *cmd; + struct sk_buff *skb; + u32 cfg; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_10_4_dbglog_cfg_cmd *)skb->data; + + if (module_enable) { + cfg = SM(log_level, + ATH10K_DBGLOG_CFG_LOG_LVL); + } else { + /* set back defaults, all modules with WARN level */ + cfg = SM(ATH10K_DBGLOG_LEVEL_WARN, + ATH10K_DBGLOG_CFG_LOG_LVL); + module_enable = ~0; + } + + cmd->module_enable = __cpu_to_le64(module_enable); + cmd->module_valid = __cpu_to_le64(~0); + cmd->config_enable = __cpu_to_le32(cfg); + cmd->config_valid = __cpu_to_le32(ATH10K_DBGLOG_CFG_LOG_LVL_MASK); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi dbglog cfg modules 0x%016llx 0x%016llx config %08x %08x\n", + __le64_to_cpu(cmd->module_enable), + __le64_to_cpu(cmd->module_valid), + __le32_to_cpu(cmd->config_enable), + __le32_to_cpu(cmd->config_valid)); + return skb; +} + static struct sk_buff * ath10k_wmi_op_gen_pktlog_enable(struct ath10k *ar, u32 ev_bitmap) { @@ -7649,6 +7766,48 @@ ath10k_wmi_10_4_ext_resource_config(struct ath10k *ar, return skb; } +static struct sk_buff * +ath10k_wmi_op_gen_echo(struct ath10k *ar, u32 value) +{ + struct wmi_echo_cmd *cmd; + struct sk_buff *skb; + + skb = ath10k_wmi_alloc_skb(ar, sizeof(*cmd)); + if (!skb) + return ERR_PTR(-ENOMEM); + + cmd = (struct wmi_echo_cmd *)skb->data; + cmd->value = cpu_to_le32(value); + + ath10k_dbg(ar, ATH10K_DBG_WMI, + "wmi echo value 0x%08x\n", value); + return skb; +} + +int +ath10k_wmi_barrier(struct ath10k *ar) +{ + int ret; + int time_left; + + spin_lock_bh(&ar->data_lock); + reinit_completion(&ar->wmi.barrier); + spin_unlock_bh(&ar->data_lock); + + ret = ath10k_wmi_echo(ar, ATH10K_WMI_BARRIER_ECHO_ID); + if (ret) { + ath10k_warn(ar, "failed to submit wmi echo: %d\n", ret); + return ret; + } + + time_left = wait_for_completion_timeout(&ar->wmi.barrier, + ATH10K_WMI_BARRIER_TIMEOUT_HZ); + if (!time_left) + return -ETIMEDOUT; + + return 0; +} + static const struct wmi_ops wmi_ops = { .rx = ath10k_wmi_op_rx, .map_svc = wmi_main_svc_map, @@ -7665,6 +7824,7 @@ static const struct wmi_ops wmi_ops = { .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_fw_stats = ath10k_wmi_main_op_pull_fw_stats, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7709,6 +7869,7 @@ static const struct wmi_ops wmi_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_main_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7738,6 +7899,7 @@ static const struct wmi_ops wmi_10_1_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7777,6 +7939,7 @@ static const struct wmi_ops wmi_10_1_ops = { .gen_delba_send = ath10k_wmi_op_gen_delba_send, .fw_stats_fill = ath10k_wmi_10x_op_fw_stats_fill, .get_vdev_subtype = ath10k_wmi_op_get_vdev_subtype, + .gen_echo = ath10k_wmi_op_gen_echo, /* .gen_bcn_tmpl not implemented */ /* .gen_prb_tmpl not implemented */ /* .gen_p2p_go_bcn_ie not implemented */ @@ -7796,6 +7959,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -7807,6 +7971,7 @@ static const struct wmi_ops wmi_10_2_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7862,6 +8027,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_svc_rdy = ath10k_wmi_10x_op_pull_svc_rdy_ev, .gen_pdev_set_rd = ath10k_wmi_10x_op_gen_pdev_set_rd, .gen_start_scan = ath10k_wmi_10x_op_gen_start_scan, + .gen_echo = ath10k_wmi_op_gen_echo, .pull_scan = ath10k_wmi_op_pull_scan_ev, .pull_mgmt_rx = ath10k_wmi_op_pull_mgmt_rx_ev, @@ -7873,6 +8039,7 @@ static const struct wmi_ops wmi_10_2_4_ops = { .pull_phyerr = ath10k_wmi_op_pull_phyerr_ev, .pull_rdy = ath10k_wmi_op_pull_rdy_ev, .pull_roam_ev = ath10k_wmi_op_pull_roam_ev, + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_pdev_suspend = ath10k_wmi_op_gen_pdev_suspend, .gen_pdev_resume = ath10k_wmi_op_gen_pdev_resume, @@ -7968,7 +8135,7 @@ static const struct wmi_ops wmi_10_4_ops = { .gen_pdev_set_wmm = ath10k_wmi_op_gen_pdev_set_wmm, .gen_force_fw_hang = ath10k_wmi_op_gen_force_fw_hang, .gen_mgmt_tx = ath10k_wmi_op_gen_mgmt_tx, - .gen_dbglog_cfg = ath10k_wmi_op_gen_dbglog_cfg, + .gen_dbglog_cfg = ath10k_wmi_10_4_op_gen_dbglog_cfg, .gen_pktlog_enable = ath10k_wmi_op_gen_pktlog_enable, .gen_pktlog_disable = ath10k_wmi_op_gen_pktlog_disable, .gen_pdev_set_quiet_mode = ath10k_wmi_op_gen_pdev_set_quiet_mode, @@ -7980,10 +8147,12 @@ static const struct wmi_ops wmi_10_4_ops = { .ext_resource_config = ath10k_wmi_10_4_ext_resource_config, /* shared with 10.2 */ + .pull_echo_ev = ath10k_wmi_op_pull_echo_ev, .gen_request_stats = ath10k_wmi_op_gen_request_stats, .gen_pdev_get_temperature = ath10k_wmi_10_2_op_gen_pdev_get_temperature, .get_vdev_subtype = ath10k_wmi_10_4_op_get_vdev_subtype, .gen_pdev_bss_chan_info_req = ath10k_wmi_10_2_op_gen_pdev_bss_chan_info, + .gen_echo = ath10k_wmi_op_gen_echo, }; int ath10k_wmi_attach(struct ath10k *ar) @@ -8036,6 +8205,7 @@ int ath10k_wmi_attach(struct ath10k *ar) init_completion(&ar->wmi.service_ready); init_completion(&ar->wmi.unified_ready); + init_completion(&ar->wmi.barrier); INIT_WORK(&ar->svc_rdy_work, ath10k_wmi_event_service_ready_work); diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h index 3ef468893b3f..1b243c899bef 100644 --- a/drivers/net/wireless/ath/ath10k/wmi.h +++ b/drivers/net/wireless/ath/ath10k/wmi.h @@ -55,7 +55,7 @@ * type. * * 6. Comment each parameter part of the WMI command/event structure by - * using the 2 stars at the begining of C comment instead of one star to + * using the 2 stars at the beginning of C comment instead of one star to * enable HTML document generation using Doxygen. * */ @@ -180,6 +180,7 @@ enum wmi_service { WMI_SERVICE_MESH_NON_11S, WMI_SERVICE_PEER_STATS, WMI_SERVICE_RESTRT_CHNL_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_PULL, WMI_SERVICE_TX_MODE_DYNAMIC, @@ -305,6 +306,7 @@ enum wmi_10_4_service { WMI_10_4_SERVICE_RESTRT_CHNL_SUPPORT, WMI_10_4_SERVICE_PEER_STATS, WMI_10_4_SERVICE_MESH_11S, + WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, WMI_10_4_SERVICE_TX_MODE_DYNAMIC, @@ -402,6 +404,7 @@ static inline char *wmi_service_name(int service_id) SVCSTR(WMI_SERVICE_MESH_NON_11S); SVCSTR(WMI_SERVICE_PEER_STATS); SVCSTR(WMI_SERVICE_RESTRT_CHNL_SUPPORT); + SVCSTR(WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_ONLY); SVCSTR(WMI_SERVICE_TX_MODE_PUSH_PULL); SVCSTR(WMI_SERVICE_TX_MODE_DYNAMIC); @@ -652,6 +655,8 @@ static inline void wmi_10_4_svc_map(const __le32 *in, unsigned long *out, WMI_SERVICE_PEER_STATS, len); SVCMAP(WMI_10_4_SERVICE_MESH_11S, WMI_SERVICE_MESH_11S, len); + SVCMAP(WMI_10_4_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, + WMI_SERVICE_PERIODIC_CHAN_STAT_SUPPORT, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_ONLY, WMI_SERVICE_TX_MODE_PUSH_ONLY, len); SVCMAP(WMI_10_4_SERVICE_TX_MODE_PUSH_PULL, @@ -2082,7 +2087,7 @@ struct wmi_resource_config { * In offload mode target supports features like WOW, chatter and * other protocol offloads. In order to support them some * functionalities like reorder buffering, PN checking need to be - * done in target. This determines maximum number of peers suported + * done in target. This determines maximum number of peers supported * by target in offload mode */ __le32 num_offload_peers; @@ -2263,7 +2268,7 @@ struct wmi_resource_config { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2445,7 +2450,7 @@ struct wmi_resource_config_10x { * Max. number of Tx fragments per MSDU * This parameter controls the max number of Tx fragments per MSDU. * This is sent by the target as part of the WMI_SERVICE_READY event - * and is overriden by the OS shim as required. + * and is overridden by the OS shim as required. */ __le32 max_frag_entries; } __packed; @@ -2739,7 +2744,7 @@ struct wmi_init_cmd { struct wmi_host_mem_chunks mem_chunks; } __packed; -/* _10x stucture is from 10.X FW API */ +/* _10x structure is from 10.X FW API */ struct wmi_init_cmd_10x { struct wmi_resource_config_10x resource_config; struct wmi_host_mem_chunks mem_chunks; @@ -3962,7 +3967,7 @@ struct wmi_pdev_stats_tx { /* illegal rate phy errors */ __le32 illgl_rate_phy_err; - /* wal pdev continous xretry */ + /* wal pdev continuous xretry */ __le32 pdev_cont_xretry; /* wal pdev continous xretry */ @@ -4217,10 +4222,10 @@ struct wmi_10_2_stats_event { */ struct wmi_pdev_stats_base { __le32 chan_nf; - __le32 tx_frame_count; - __le32 rx_frame_count; - __le32 rx_clear_count; - __le32 cycle_count; + __le32 tx_frame_count; /* Cycles spent transmitting frames */ + __le32 rx_frame_count; /* Cycles spent receiving frames */ + __le32 rx_clear_count; /* Total channel busy time, evidently */ + __le32 cycle_count; /* Total on-channel time */ __le32 phy_err_count; __le32 chan_tx_pwr; } __packed; @@ -4456,9 +4461,9 @@ struct wmi_vdev_start_request_cmd { __le32 flags; /* ssid field. Only valid for AP/GO/IBSS/BTAmp VDEV type. */ struct wmi_ssid ssid; - /* beacon/probe reponse xmit rate. Applicable for SoftAP. */ + /* beacon/probe response xmit rate. Applicable for SoftAP. */ __le32 bcn_tx_rate; - /* beacon/probe reponse xmit power. Applicable for SoftAP. */ + /* beacon/probe response xmit power. Applicable for SoftAP. */ __le32 bcn_tx_power; /* number of p2p NOA descriptor(s) from scan entry */ __le32 num_noa_descriptors; @@ -4686,7 +4691,7 @@ enum wmi_vdev_param { WMI_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_VDEV_PARAM_MGMT_TX_RATE, @@ -4817,7 +4822,7 @@ enum wmi_10x_vdev_param { WMI_10X_VDEV_PARAM_BEACON_INTERVAL, /* Listen interval in TUs */ WMI_10X_VDEV_PARAM_LISTEN_INTERVAL, - /* muticast rate in Mbps */ + /* multicast rate in Mbps */ WMI_10X_VDEV_PARAM_MULTICAST_RATE, /* management frame rate in Mbps */ WMI_10X_VDEV_PARAM_MGMT_TX_RATE, @@ -5062,7 +5067,7 @@ struct wmi_vdev_simple_event { } __packed; /* VDEV start response status codes */ -/* VDEV succesfully started */ +/* VDEV successfully started */ #define WMI_INIFIED_VDEV_START_RESPONSE_STATUS_SUCCESS 0x0 /* requested VDEV not found */ @@ -5378,7 +5383,7 @@ enum wmi_sta_ps_param_pspoll_count { #define WMI_UAPSD_AC_TYPE_TRIG 1 #define WMI_UAPSD_AC_BIT_MASK(ac, type) \ - ((type == WMI_UAPSD_AC_TYPE_DELI) ? (1 << (ac << 1)) : (1 << ((ac << 1) + 1))) + (type == WMI_UAPSD_AC_TYPE_DELI ? 1 << (ac << 1) : 1 << ((ac << 1) + 1)) enum wmi_sta_ps_param_uapsd { WMI_STA_PS_UAPSD_AC0_DELIVERY_EN = (1 << 0), @@ -6169,6 +6174,20 @@ struct wmi_dbglog_cfg_cmd { __le32 config_valid; } __packed; +struct wmi_10_4_dbglog_cfg_cmd { + /* bitmask to hold mod id config*/ + __le64 module_enable; + + /* see ATH10K_DBGLOG_CFG_ */ + __le32 config_enable; + + /* mask of module id bits to be changed */ + __le64 module_valid; + + /* mask of config bits to be changed, see ATH10K_DBGLOG_CFG_ */ + __le32 config_valid; +} __packed; + enum wmi_roam_reason { WMI_ROAM_REASON_BETTER_AP = 1, WMI_ROAM_REASON_BEACON_MISS = 2, @@ -6296,6 +6315,10 @@ struct wmi_roam_ev_arg { __le32 rssi; }; +struct wmi_echo_ev_arg { + __le32 value; +}; + struct wmi_pdev_temperature_event { /* temperature value in Celcius degree */ __le32 temperature; @@ -6624,5 +6647,6 @@ void ath10k_wmi_10_4_op_fw_stats_fill(struct ath10k *ar, char *buf); int ath10k_wmi_op_get_vdev_subtype(struct ath10k *ar, enum wmi_vdev_subtype subtype); +int ath10k_wmi_barrier(struct ath10k *ar); #endif /* _WMI_H_ */ diff --git a/drivers/net/wireless/ath/ath5k/debug.c b/drivers/net/wireless/ath/ath5k/debug.c index 929d7ccc031c..4f8d9ed04f5e 100644 --- a/drivers/net/wireless/ath/ath5k/debug.c +++ b/drivers/net/wireless/ath/ath5k/debug.c @@ -909,7 +909,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) struct ath5k_hw *ah = inode->i_private; bool res; int i, ret; - u32 eesize; + u32 eesize; /* NB: in 16-bit words */ u16 val, *buf; /* Get eeprom size */ @@ -932,7 +932,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) /* Create buffer and read in eeprom */ - buf = vmalloc(eesize); + buf = vmalloc(eesize * 2); if (!buf) { ret = -ENOMEM; goto err; @@ -952,7 +952,7 @@ static int open_file_eeprom(struct inode *inode, struct file *file) } ep->buf = buf; - ep->len = i; + ep->len = eesize * 2; file->private_data = (void *)ep; diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index 72e2ec67768d..b7fe0af4cb24 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -1449,14 +1449,14 @@ static int ath6kl_cfg80211_get_txpower(struct wiphy *wiphy, return -EIO; if (test_bit(CONNECTED, &vif->flags)) { - ar->tx_pwr = 0; + ar->tx_pwr = 255; if (ath6kl_wmi_get_tx_pwr_cmd(ar->wmi, vif->fw_vif_idx) != 0) { ath6kl_err("ath6kl_wmi_get_tx_pwr_cmd failed\n"); return -EIO; } - wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 0, + wait_event_interruptible_timeout(ar->event_wq, ar->tx_pwr != 255, 5 * HZ); if (signal_pending(current)) { diff --git a/drivers/net/wireless/ath/ath6kl/hif.c b/drivers/net/wireless/ath/ath6kl/hif.c index 18c070850a09..d1942537ea10 100644 --- a/drivers/net/wireless/ath/ath6kl/hif.c +++ b/drivers/net/wireless/ath/ath6kl/hif.c @@ -64,7 +64,7 @@ int ath6kl_hif_rw_comp_handler(void *context, int status) } EXPORT_SYMBOL(ath6kl_hif_rw_comp_handler); -#define REG_DUMP_COUNT_AR6003 60 +#define REGISTER_DUMP_COUNT 60 #define REGISTER_DUMP_LEN_MAX 60 static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) @@ -73,9 +73,6 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) u32 i, address, regdump_addr = 0; int ret; - if (ar->target_type != TARGET_TYPE_AR6003) - return; - /* the reg dump pointer is copied to the host interest area */ address = ath6kl_get_hi_item_addr(ar, HI_ITEM(hi_failure_state)); address = TARG_VTOP(ar->target_type, address); @@ -95,7 +92,7 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) /* fetch register dump data */ ret = ath6kl_diag_read(ar, regdump_addr, (u8 *)®dump_val[0], - REG_DUMP_COUNT_AR6003 * (sizeof(u32))); + REGISTER_DUMP_COUNT * (sizeof(u32))); if (ret) { ath6kl_warn("failed to get register dump: %d\n", ret); return; @@ -105,9 +102,9 @@ static void ath6kl_hif_dump_fw_crash(struct ath6kl *ar) ath6kl_info("hw 0x%x fw %s\n", ar->wiphy->hw_version, ar->wiphy->fw_version); - BUILD_BUG_ON(REG_DUMP_COUNT_AR6003 % 4); + BUILD_BUG_ON(REGISTER_DUMP_COUNT % 4); - for (i = 0; i < REG_DUMP_COUNT_AR6003; i += 4) { + for (i = 0; i < REGISTER_DUMP_COUNT; i += 4) { ath6kl_info("%d: 0x%8.8x 0x%8.8x 0x%8.8x 0x%8.8x\n", i, le32_to_cpu(regdump_val[i]), diff --git a/drivers/net/wireless/ath/ath6kl/wmi.c b/drivers/net/wireless/ath/ath6kl/wmi.c index b8cf04d11975..3fd1cc98fd2f 100644 --- a/drivers/net/wireless/ath/ath6kl/wmi.c +++ b/drivers/net/wireless/ath/ath6kl/wmi.c @@ -3520,7 +3520,7 @@ int ath6kl_wmi_set_pvb_cmd(struct wmi *wmi, u8 if_idx, u16 aid, ret = ath6kl_wmi_cmd_send(wmi, if_idx, skb, WMI_AP_SET_PVB_CMDID, NO_SYNC_WMIFLAG); - return 0; + return ret; } int ath6kl_wmi_set_rx_frame_format_cmd(struct wmi *wmi, u8 if_idx, diff --git a/drivers/net/wireless/ath/ath9k/Kconfig b/drivers/net/wireless/ath/ath9k/Kconfig index f68cb00450e0..8f231c67dd51 100644 --- a/drivers/net/wireless/ath/ath9k/Kconfig +++ b/drivers/net/wireless/ath/ath9k/Kconfig @@ -180,7 +180,7 @@ config ATH9K_HTC_DEBUGFS config ATH9K_HWRNG bool "Random number generator support" depends on ATH9K && (HW_RANDOM = y || HW_RANDOM = ATH9K) - default y + default n ---help--- This option incorporates the ADC register output as a source of randomness into Linux entropy pool (/dev/urandom and /dev/random) diff --git a/drivers/net/wireless/ath/ath9k/ar5008_phy.c b/drivers/net/wireless/ath/ath9k/ar5008_phy.c index 1b271b99c49e..8eea8d22e72e 100644 --- a/drivers/net/wireless/ath/ath9k/ar5008_phy.c +++ b/drivers/net/wireless/ath/ath9k/ar5008_phy.c @@ -260,8 +260,8 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, int cur_bin; int upper, lower, cur_vit_mask; int i; - int8_t mask_m[123]; - int8_t mask_p[123]; + int8_t mask_m[123] = {0}; + int8_t mask_p[123] = {0}; int8_t mask_amt; int tmp_mask; static const int pilot_mask_reg[4] = { @@ -274,9 +274,6 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, }; static const int inc[4] = { 0, 100, 0, 0 }; - memset(&mask_m, 0, sizeof(int8_t) * 123); - memset(&mask_p, 0, sizeof(int8_t) * 123); - cur_bin = -6000; upper = bin + 100; lower = bin - 100; @@ -302,7 +299,7 @@ void ar5008_hw_cmn_spur_mitigate(struct ath_hw *ah, upper = bin + 120; lower = bin - 120; - for (i = 0; i < 123; i++) { + for (i = 0; i < ARRAY_SIZE(mask_m); i++) { if ((cur_vit_mask > lower) && (cur_vit_mask < upper)) { /* workaround for gcc bug #37014 */ volatile int tmp_v = abs(cur_vit_mask - bin); diff --git a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c index 5bd2cbaf582d..08607d7fdb56 100644 --- a/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c +++ b/drivers/net/wireless/ath/ath9k/ar9003_eeprom.c @@ -3252,7 +3252,8 @@ static int ar9300_eeprom_restore_flash(struct ath_hw *ah, u8 *mptr, int i; for (i = 0; i < mdata_size / 2; i++, data++) - ath9k_hw_nvram_read(ah, i, data); + if (!ath9k_hw_nvram_read(ah, i, data)) + return -EIO; return 0; } @@ -3282,7 +3283,8 @@ static int ar9300_eeprom_restore_internal(struct ath_hw *ah, if (ath9k_hw_use_flash(ah)) { u8 txrx; - ar9300_eeprom_restore_flash(ah, mptr, mdata_size); + if (ar9300_eeprom_restore_flash(ah, mptr, mdata_size)) + return -EIO; /* check if eeprom contains valid data */ eep = (struct ar9300_eeprom *) mptr; diff --git a/drivers/net/wireless/ath/ath9k/gpio.c b/drivers/net/wireless/ath/ath9k/gpio.c index 490f74d9ddf0..ddb28861e7fe 100644 --- a/drivers/net/wireless/ath/ath9k/gpio.c +++ b/drivers/net/wireless/ath/ath9k/gpio.c @@ -22,7 +22,7 @@ #ifdef CONFIG_MAC80211_LEDS -void ath_fill_led_pin(struct ath_softc *sc) +static void ath_fill_led_pin(struct ath_softc *sc) { struct ath_hw *ah = sc->sc_ah; diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index 7cb65c303f8d..e9f32b52fc8c 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -924,7 +924,7 @@ static void ath9k_vif_iter_set_beacon(struct ath9k_vif_iter_data *iter_data, } else { if (iter_data->primary_beacon_vif->type != NL80211_IFTYPE_AP && vif->type == NL80211_IFTYPE_AP) - iter_data->primary_beacon_vif = vif; + iter_data->primary_beacon_vif = vif; } iter_data->beacons = true; diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 8ddd604bd00c..52bfbb988611 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -50,9 +50,11 @@ static u16 bits_per_symbol[][2] = { static void ath_tx_send_normal(struct ath_softc *sc, struct ath_txq *txq, struct ath_atx_tid *tid, struct sk_buff *skb); static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq); + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta); static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok); static void ath_tx_txqaddbuf(struct ath_softc *sc, struct ath_txq *txq, struct list_head *head, bool internal); @@ -77,6 +79,22 @@ enum { /* Aggregation logic */ /*********************/ +static void ath_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sta *sta = info->status.status_driver_data[0]; + + if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + ieee80211_tx_status(hw, skb); + return; + } + + if (sta) + ieee80211_tx_status_noskb(hw, sta, info); + + dev_kfree_skb(skb); +} + void ath_txq_lock(struct ath_softc *sc, struct ath_txq *txq) __acquires(&txq->axq_lock) { @@ -92,6 +110,7 @@ void ath_txq_unlock(struct ath_softc *sc, struct ath_txq *txq) void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) __releases(&txq->axq_lock) { + struct ieee80211_hw *hw = sc->hw; struct sk_buff_head q; struct sk_buff *skb; @@ -100,7 +119,7 @@ void ath_txq_unlock_complete(struct ath_softc *sc, struct ath_txq *txq) spin_unlock_bh(&txq->axq_lock); while ((skb = __skb_dequeue(&q))) - ieee80211_tx_status(sc->hw, skb); + ath_tx_status(hw, skb); } static void ath_tx_queue_tid(struct ath_softc *sc, struct ath_txq *txq, @@ -253,7 +272,7 @@ static void ath_tx_flush_tid(struct ath_softc *sc, struct ath_atx_tid *tid) } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } if (sendbar) { @@ -318,12 +337,12 @@ static void ath_tid_drain(struct ath_softc *sc, struct ath_txq *txq, bf = fi->bf; if (!bf) { - ath_tx_complete(sc, skb, ATH_TX_ERROR, txq); + ath_tx_complete(sc, skb, ATH_TX_ERROR, txq, NULL); continue; } list_add_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); } } @@ -426,15 +445,14 @@ static void ath_tx_count_frames(struct ath_softc *sc, struct ath_buf *bf, static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, struct ath_buf *bf, struct list_head *bf_q, + struct ieee80211_sta *sta, + struct ath_atx_tid *tid, struct ath_tx_status *ts, int txok) { struct ath_node *an = NULL; struct sk_buff *skb; - struct ieee80211_sta *sta; - struct ieee80211_hw *hw = sc->hw; struct ieee80211_hdr *hdr; struct ieee80211_tx_info *tx_info; - struct ath_atx_tid *tid = NULL; struct ath_buf *bf_next, *bf_last = bf->bf_lastbf; struct list_head bf_head; struct sk_buff_head bf_pending; @@ -460,12 +478,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, for (i = 0; i < ts->ts_rateindex; i++) retries += rates[i].count; - rcu_read_lock(); - - sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); if (!sta) { - rcu_read_unlock(); - INIT_LIST_HEAD(&bf_head); while (bf) { bf_next = bf->bf_next; @@ -473,7 +486,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, if (!bf->bf_state.stale || bf_next != NULL) list_move_tail(&bf->list, &bf_head); - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, ts, 0); bf = bf_next; } @@ -481,7 +494,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, } an = (struct ath_node *)sta->drv_priv; - tid = ath_get_skb_tid(sc, an, skb); seq_first = tid->seq_start; isba = ts->ts_flags & ATH9K_TX_BA; @@ -583,7 +595,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ts); } - ath_tx_complete_buf(sc, bf, txq, &bf_head, ts, + ath_tx_complete_buf(sc, bf, txq, &bf_head, sta, ts, !txfail); } else { if (tx_info->flags & IEEE80211_TX_STATUS_EOSP) { @@ -604,7 +616,8 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_tx_update_baw(sc, tid, seqno); ath_tx_complete_buf(sc, bf, txq, - &bf_head, ts, 0); + &bf_head, NULL, ts, + 0); bar_index = max_t(int, bar_index, ATH_BA_INDEX(seq_first, seqno)); break; @@ -648,8 +661,6 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, ath_txq_lock(sc, txq); } - rcu_read_unlock(); - if (needreset) ath9k_queue_reset(sc, RESET_TYPE_TX_ERROR); } @@ -664,7 +675,11 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, struct ath_tx_status *ts, struct ath_buf *bf, struct list_head *bf_head) { + struct ieee80211_hw *hw = sc->hw; struct ieee80211_tx_info *info; + struct ieee80211_sta *sta; + struct ieee80211_hdr *hdr; + struct ath_atx_tid *tid = NULL; bool txok, flush; txok = !(ts->ts_status & ATH9K_TXERR_MASK); @@ -677,6 +692,16 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ts->duration = ath9k_hw_get_duration(sc->sc_ah, bf->bf_desc, ts->ts_rateindex); + + hdr = (struct ieee80211_hdr *) bf->bf_mpdu->data; + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); + if (sta) { + struct ath_node *an = (struct ath_node *)sta->drv_priv; + tid = ath_get_skb_tid(sc, an, bf->bf_mpdu); + if (ts->ts_status & (ATH9K_TXERR_FILT | ATH9K_TXERR_XRETRY)) + tid->clear_ps_filter = true; + } + if (!bf_isampdu(bf)) { if (!flush) { info = IEEE80211_SKB_CB(bf->bf_mpdu); @@ -685,9 +710,9 @@ static void ath_tx_process_buffer(struct ath_softc *sc, struct ath_txq *txq, ath_tx_rc_status(sc, bf, ts, 1, txok ? 0 : 1, txok); ath_dynack_sample_tx_ts(sc->sc_ah, bf->bf_mpdu, ts); } - ath_tx_complete_buf(sc, bf, txq, bf_head, ts, txok); + ath_tx_complete_buf(sc, bf, txq, bf_head, sta, ts, txok); } else - ath_tx_complete_aggr(sc, txq, bf, bf_head, ts, txok); + ath_tx_complete_aggr(sc, txq, bf, bf_head, sta, tid, ts, txok); if (!flush) ath_txq_schedule(sc, txq); @@ -923,7 +948,7 @@ ath_tx_get_tid_subframe(struct ath_softc *sc, struct ath_txq *txq, list_add(&bf->list, &bf_head); __skb_unlink(skb, *q); ath_tx_update_baw(sc, tid, seqno); - ath_tx_complete_buf(sc, bf, txq, &bf_head, &ts, 0); + ath_tx_complete_buf(sc, bf, txq, &bf_head, NULL, &ts, 0); continue; } @@ -1832,6 +1857,7 @@ static void ath_drain_txq_list(struct ath_softc *sc, struct ath_txq *txq, */ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) { + rcu_read_lock(); ath_txq_lock(sc, txq); if (sc->sc_ah->caps.hw_caps & ATH9K_HW_CAP_EDMA) { @@ -1850,6 +1876,7 @@ void ath_draintxq(struct ath_softc *sc, struct ath_txq *txq) ath_drain_txq_list(sc, txq, &txq->axq_q); ath_txq_unlock_complete(sc, txq); + rcu_read_unlock(); } bool ath_drain_all_txq(struct ath_softc *sc) @@ -2472,7 +2499,8 @@ void ath_tx_cabq(struct ieee80211_hw *hw, struct ieee80211_vif *vif, /*****************/ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, - int tx_flags, struct ath_txq *txq) + int tx_flags, struct ath_txq *txq, + struct ieee80211_sta *sta) { struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); struct ath_common *common = ath9k_hw_common(sc->sc_ah); @@ -2492,15 +2520,17 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, tx_info->flags |= IEEE80211_TX_STAT_ACK; } - padpos = ieee80211_hdrlen(hdr->frame_control); - padsize = padpos & 3; - if (padsize && skb->len>padpos+padsize) { - /* - * Remove MAC header padding before giving the frame back to - * mac80211. - */ - memmove(skb->data + padsize, skb->data, padpos); - skb_pull(skb, padsize); + if (tx_info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) { + padpos = ieee80211_hdrlen(hdr->frame_control); + padsize = padpos & 3; + if (padsize && skb->len>padpos+padsize) { + /* + * Remove MAC header padding before giving the frame back to + * mac80211. + */ + memmove(skb->data + padsize, skb->data, padpos); + skb_pull(skb, padsize); + } } spin_lock_irqsave(&sc->sc_pm_lock, flags); @@ -2515,12 +2545,14 @@ static void ath_tx_complete(struct ath_softc *sc, struct sk_buff *skb, } spin_unlock_irqrestore(&sc->sc_pm_lock, flags); - __skb_queue_tail(&txq->complete_q, skb); ath_txq_skb_done(sc, txq, skb); + tx_info->status.status_driver_data[0] = sta; + __skb_queue_tail(&txq->complete_q, skb); } static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, struct ath_txq *txq, struct list_head *bf_q, + struct ieee80211_sta *sta, struct ath_tx_status *ts, int txok) { struct sk_buff *skb = bf->bf_mpdu; @@ -2548,7 +2580,7 @@ static void ath_tx_complete_buf(struct ath_softc *sc, struct ath_buf *bf, complete(&sc->paprd_complete); } else { ath_debug_stat_tx(sc, bf, ts, txq, tx_flags); - ath_tx_complete(sc, skb, tx_flags, txq); + ath_tx_complete(sc, skb, tx_flags, txq, sta); } skip_tx_complete: /* At this point, skb (bf->bf_mpdu) is consumed...make sure we don't @@ -2700,10 +2732,12 @@ void ath_tx_tasklet(struct ath_softc *sc) u32 qcumask = ((1 << ATH9K_NUM_TX_QUEUES) - 1) & ah->intr_txqs; int i; + rcu_read_lock(); for (i = 0; i < ATH9K_NUM_TX_QUEUES; i++) { if (ATH_TXQ_SETUP(sc, i) && (qcumask & (1 << i))) ath_tx_processq(sc, &sc->tx.txq[i]); } + rcu_read_unlock(); } void ath_tx_edma_tasklet(struct ath_softc *sc) @@ -2717,6 +2751,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) struct list_head *fifo_list; int status; + rcu_read_lock(); for (;;) { if (test_bit(ATH_OP_HW_RESET, &common->op_flags)) break; @@ -2787,6 +2822,7 @@ void ath_tx_edma_tasklet(struct ath_softc *sc) ath_tx_process_buffer(sc, txq, &ts, bf, &bf_head); ath_txq_unlock_complete(sc, txq); } + rcu_read_unlock(); } /*****************/ diff --git a/drivers/net/wireless/ath/carl9170/usb.c b/drivers/net/wireless/ath/carl9170/usb.c index 76842e6ca38e..99ab20334d21 100644 --- a/drivers/net/wireless/ath/carl9170/usb.c +++ b/drivers/net/wireless/ath/carl9170/usb.c @@ -670,6 +670,7 @@ int carl9170_exec_cmd(struct ar9170 *ar, const enum carl9170_cmd_oids cmd, ar->readlen = outlen; spin_unlock_bh(&ar->cmd_lock); + reinit_completion(&ar->cmd_wait); err = __carl9170_exec_cmd(ar, &ar->cmd, false); if (!(cmd & CARL9170_CMD_ASYNC_FLAG)) { @@ -778,10 +779,7 @@ void carl9170_usb_stop(struct ar9170 *ar) spin_lock_bh(&ar->cmd_lock); ar->readlen = 0; spin_unlock_bh(&ar->cmd_lock); - complete_all(&ar->cmd_wait); - - /* This is required to prevent an early completion on _start */ - reinit_completion(&ar->cmd_wait); + complete(&ar->cmd_wait); /* * Note: diff --git a/drivers/net/wireless/ath/dfs_pattern_detector.c b/drivers/net/wireless/ath/dfs_pattern_detector.c index 2f8136d50f78..4100ffd42a43 100644 --- a/drivers/net/wireless/ath/dfs_pattern_detector.c +++ b/drivers/net/wireless/ath/dfs_pattern_detector.c @@ -338,7 +338,7 @@ static bool dpd_set_domain(struct dfs_pattern_detector *dpd, return true; } -static struct dfs_pattern_detector default_dpd = { +static const struct dfs_pattern_detector default_dpd = { .exit = dpd_exit, .set_dfs_domain = dpd_set_domain, .add_pulse = dpd_add_pulse, diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c index f0e1175fb76a..d117240d9a73 100644 --- a/drivers/net/wireless/ath/wil6210/cfg80211.c +++ b/drivers/net/wireless/ath/wil6210/cfg80211.c @@ -354,10 +354,13 @@ static int wil_cfg80211_scan(struct wiphy *wiphy, wil_dbg_misc(wil, "%s(), wdev=0x%p iftype=%d\n", __func__, wdev, wdev->iftype); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { wil_err(wil, "Already scanning\n"); + mutex_unlock(&wil->p2p_wdev_mutex); return -EAGAIN; } + mutex_unlock(&wil->p2p_wdev_mutex); /* check we are client side */ switch (wdev->iftype) { @@ -760,14 +763,11 @@ static enum wmi_key_usage wil_detect_key_usage(struct wil6210_priv *wil, return rc; } -static struct wil_tid_crypto_rx_single * -wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, - enum wmi_key_usage key_usage, const u8 *mac_addr) +static struct wil_sta_info * +wil_find_sta_by_key_usage(struct wil6210_priv *wil, + enum wmi_key_usage key_usage, const u8 *mac_addr) { int cid = -EINVAL; - int tid = 0; - struct wil_sta_info *s; - struct wil_tid_crypto_rx *c; if (key_usage == WMI_KEY_USE_TX_GROUP) return NULL; /* not needed */ @@ -778,18 +778,72 @@ wil_find_crypto_ctx(struct wil6210_priv *wil, u8 key_index, else if (key_usage == WMI_KEY_USE_RX_GROUP) cid = wil_find_cid_by_idx(wil, 0); if (cid < 0) { - wil_err(wil, "No CID for %pM %s[%d]\n", mac_addr, - key_usage_str[key_usage], key_index); + wil_err(wil, "No CID for %pM %s\n", mac_addr, + key_usage_str[key_usage]); return ERR_PTR(cid); } - s = &wil->sta[cid]; - if (key_usage == WMI_KEY_USE_PAIRWISE) - c = &s->tid_crypto_rx[tid]; - else - c = &s->group_crypto_rx; + return &wil->sta[cid]; +} - return &c->key_id[key_index]; +static void wil_set_crypto_rx(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs, + struct key_params *params) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; + + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, + IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + if (params->seq) + memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); + else + memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); + cc->key_set = true; + break; + default: + break; + } +} + +static void wil_del_rx_key(u8 key_index, enum wmi_key_usage key_usage, + struct wil_sta_info *cs) +{ + struct wil_tid_crypto_rx_single *cc; + int tid; + + if (!cs) + return; + + switch (key_usage) { + case WMI_KEY_USE_PAIRWISE: + for (tid = 0; tid < WIL_STA_TID_NUM; tid++) { + cc = &cs->tid_crypto_rx[tid].key_id[key_index]; + cc->key_set = false; + } + break; + case WMI_KEY_USE_RX_GROUP: + cc = &cs->group_crypto_rx.key_id[key_index]; + cc->key_set = false; + break; + default: + break; + } } static int wil_cfg80211_add_key(struct wiphy *wiphy, @@ -801,24 +855,26 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, int rc; struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); + + if (!params) { + wil_err(wil, "NULL params\n"); + return -EINVAL; + } wil_dbg_misc(wil, "%s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); - if (IS_ERR(cc)) { + if (IS_ERR(cs)) { wil_err(wil, "Not connected, %s(%pM %s[%d] PN %*phN)\n", __func__, mac_addr, key_usage_str[key_usage], key_index, params->seq_len, params->seq); return -EINVAL; } - if (cc) - cc->key_set = false; + wil_del_rx_key(key_index, key_usage, cs); if (params->seq && params->seq_len != IEEE80211_GCMP_PN_LEN) { wil_err(wil, @@ -831,13 +887,8 @@ static int wil_cfg80211_add_key(struct wiphy *wiphy, rc = wmi_add_cipher_key(wil, key_index, mac_addr, params->key_len, params->key, key_usage); - if ((rc == 0) && cc) { - if (params->seq) - memcpy(cc->pn, params->seq, IEEE80211_GCMP_PN_LEN); - else - memset(cc->pn, 0, IEEE80211_GCMP_PN_LEN); - cc->key_set = true; - } + if (!rc) + wil_set_crypto_rx(key_index, key_usage, cs, params); return rc; } @@ -849,20 +900,18 @@ static int wil_cfg80211_del_key(struct wiphy *wiphy, { struct wil6210_priv *wil = wiphy_to_wil(wiphy); enum wmi_key_usage key_usage = wil_detect_key_usage(wil, pairwise); - struct wil_tid_crypto_rx_single *cc = wil_find_crypto_ctx(wil, - key_index, - key_usage, - mac_addr); + struct wil_sta_info *cs = wil_find_sta_by_key_usage(wil, key_usage, + mac_addr); wil_dbg_misc(wil, "%s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (IS_ERR(cc)) + if (IS_ERR(cs)) wil_info(wil, "Not connected, %s(%pM %s[%d])\n", __func__, mac_addr, key_usage_str[key_usage], key_index); - if (!IS_ERR_OR_NULL(cc)) - cc->key_set = false; + if (!IS_ERR_OR_NULL(cs)) + wil_del_rx_key(key_index, key_usage, cs); return wmi_del_cipher_key(wil, key_index, mac_addr, key_usage); } @@ -1363,23 +1412,16 @@ static void wil_cfg80211_stop_p2p_device(struct wiphy *wiphy, struct wireless_dev *wdev) { struct wil6210_priv *wil = wiphy_to_wil(wiphy); - u8 started; + struct wil_p2p_info *p2p = &wil->p2p; + + if (!p2p->p2p_dev_started) + return; wil_dbg_misc(wil, "%s: entered\n", __func__); mutex_lock(&wil->mutex); - started = wil_p2p_stop_discovery(wil); - if (started && wil->scan_request) { - struct cfg80211_scan_info info = { - .aborted = true, - }; - - cfg80211_scan_done(wil->scan_request, &info); - wil->scan_request = NULL; - wil->radio_wdev = wil->wdev; - } + wil_p2p_stop_radio_operations(wil); + p2p->p2p_dev_started = 0; mutex_unlock(&wil->mutex); - - wil->p2p.p2p_dev_started = 0; } static struct cfg80211_ops wil_cfg80211_ops = { @@ -1464,14 +1506,8 @@ struct wireless_dev *wil_cfg80211_init(struct device *dev) set_wiphy_dev(wdev->wiphy, dev); wil_wiphy_init(wdev->wiphy); - rc = wiphy_register(wdev->wiphy); - if (rc < 0) - goto out_failed_reg; - return wdev; -out_failed_reg: - wiphy_free(wdev->wiphy); out: kfree(wdev); @@ -1487,7 +1523,6 @@ void wil_wdev_free(struct wil6210_priv *wil) if (!wdev) return; - wiphy_unregister(wdev->wiphy); wiphy_free(wdev->wiphy); kfree(wdev); } @@ -1498,11 +1533,11 @@ void wil_p2p_wdev_free(struct wil6210_priv *wil) mutex_lock(&wil->p2p_wdev_mutex); p2p_wdev = wil->p2p_wdev; + wil->p2p_wdev = NULL; + wil->radio_wdev = wil_to_wdev(wil); + mutex_unlock(&wil->p2p_wdev_mutex); if (p2p_wdev) { - wil->p2p_wdev = NULL; - wil->radio_wdev = wil_to_wdev(wil); cfg80211_unregister_wdev(p2p_wdev); kfree(p2p_wdev); } - mutex_unlock(&wil->p2p_wdev_mutex); } diff --git a/drivers/net/wireless/ath/wil6210/debugfs.c b/drivers/net/wireless/ath/wil6210/debugfs.c index a8098b406cc0..5e4058a4037b 100644 --- a/drivers/net/wireless/ath/wil6210/debugfs.c +++ b/drivers/net/wireless/ath/wil6210/debugfs.c @@ -1553,6 +1553,56 @@ static const struct file_operations fops_led_blink_time = { .open = simple_open, }; +/*---------FW capabilities------------*/ +static int wil_fw_capabilities_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + seq_printf(s, "fw_capabilities : %*pb\n", WMI_FW_CAPABILITY_MAX, + wil->fw_capabilities); + + return 0; +} + +static int wil_fw_capabilities_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_capabilities_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_capabilities = { + .open = wil_fw_capabilities_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + +/*---------FW version------------*/ +static int wil_fw_version_debugfs_show(struct seq_file *s, void *data) +{ + struct wil6210_priv *wil = s->private; + + if (wil->fw_version[0]) + seq_printf(s, "%s\n", wil->fw_version); + else + seq_puts(s, "N/A\n"); + + return 0; +} + +static int wil_fw_version_seq_open(struct inode *inode, struct file *file) +{ + return single_open(file, wil_fw_version_debugfs_show, + inode->i_private); +} + +static const struct file_operations fops_fw_version = { + .open = wil_fw_version_seq_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek, +}; + /*----------------*/ static void wil6210_debugfs_init_blobs(struct wil6210_priv *wil, struct dentry *dbg) @@ -1603,6 +1653,8 @@ static const struct { {"recovery", S_IRUGO | S_IWUSR, &fops_recovery}, {"led_cfg", S_IRUGO | S_IWUSR, &fops_led_cfg}, {"led_blink_time", S_IRUGO | S_IWUSR, &fops_led_blink_time}, + {"fw_capabilities", S_IRUGO, &fops_fw_capabilities}, + {"fw_version", S_IRUGO, &fops_fw_version}, }; static void wil6210_debugfs_init_files(struct wil6210_priv *wil, @@ -1643,7 +1695,6 @@ static void wil6210_debugfs_init_isr(struct wil6210_priv *wil, static const struct dbg_off dbg_wil_off[] = { WIL_FIELD(privacy, S_IRUGO, doff_u32), WIL_FIELD(status[0], S_IRUGO | S_IWUSR, doff_ulong), - WIL_FIELD(fw_version, S_IRUGO, doff_u32), WIL_FIELD(hw_version, S_IRUGO, doff_x32), WIL_FIELD(recovery_count, S_IRUGO, doff_u32), WIL_FIELD(ap_isolate, S_IRUGO, doff_u32), diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h index 7a2c6c129ad5..2f2b910501ba 100644 --- a/drivers/net/wireless/ath/wil6210/fw.h +++ b/drivers/net/wireless/ath/wil6210/fw.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014 Qualcomm Atheros, Inc. + * Copyright (c) 2014,2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -58,6 +58,15 @@ struct wil_fw_record_comment { /* type == wil_fw_type_comment */ u8 data[0]; /* free-form data [data_size], see above */ } __packed; +/* FW capabilities encoded inside a comment record */ +#define WIL_FW_CAPABILITIES_MAGIC (0xabcddcba) +struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */ + /* identifies capabilities record */ + __le32 magic; + /* capabilities (variable size), see enum wmi_fw_capability */ + u8 capabilities[0]; +}; + /* perform action * data_size = @head.size - offsetof(struct wil_fw_record_action, data) */ @@ -93,6 +102,9 @@ struct wil_fw_record_verify { /* type == wil_fw_verify */ /* file header * First record of every file */ +/* the FW version prefix in the comment */ +#define WIL_FW_VERSION_PREFIX "FW version: " +#define WIL_FW_VERSION_PREFIX_LEN (sizeof(WIL_FW_VERSION_PREFIX) - 1) struct wil_fw_record_file_header { __le32 signature ; /* Wilocity signature */ __le32 reserved; diff --git a/drivers/net/wireless/ath/wil6210/fw_inc.c b/drivers/net/wireless/ath/wil6210/fw_inc.c index d30657ee7e83..8f40eb301924 100644 --- a/drivers/net/wireless/ath/wil6210/fw_inc.c +++ b/drivers/net/wireless/ath/wil6210/fw_inc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2014-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2014-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -118,6 +118,12 @@ static int wil_fw_verify(struct wil6210_priv *wil, const u8 *data, size_t size) return (int)dlen; } +static int fw_ignore_section(struct wil6210_priv *wil, const void *data, + size_t size) +{ + return 0; +} + static int fw_handle_comment(struct wil6210_priv *wil, const void *data, size_t size) { @@ -126,6 +132,27 @@ static int fw_handle_comment(struct wil6210_priv *wil, const void *data, return 0; } +static int +fw_handle_capabilities(struct wil6210_priv *wil, const void *data, + size_t size) +{ + const struct wil_fw_record_capabilities *rec = data; + size_t capa_size; + + if (size < sizeof(*rec) || + le32_to_cpu(rec->magic) != WIL_FW_CAPABILITIES_MAGIC) + return 0; + + capa_size = size - offsetof(struct wil_fw_record_capabilities, + capabilities); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); + memcpy(wil->fw_capabilities, rec->capabilities, + min(sizeof(wil->fw_capabilities), capa_size)); + wil_hex_dump_fw("CAPA", DUMP_PREFIX_OFFSET, 16, 1, + rec->capabilities, capa_size, false); + return 0; +} + static int fw_handle_data(struct wil6210_priv *wil, const void *data, size_t size) { @@ -196,6 +223,13 @@ static int fw_handle_file_header(struct wil6210_priv *wil, const void *data, wil_hex_dump_fw("", DUMP_PREFIX_OFFSET, 16, 1, d->comment, sizeof(d->comment), true); + if (!memcmp(d->comment, WIL_FW_VERSION_PREFIX, + WIL_FW_VERSION_PREFIX_LEN)) + memcpy(wil->fw_version, + d->comment + WIL_FW_VERSION_PREFIX_LEN, + min(sizeof(d->comment) - WIL_FW_VERSION_PREFIX_LEN, + sizeof(wil->fw_version) - 1)); + return 0; } @@ -383,42 +417,51 @@ static int fw_handle_gateway_data4(struct wil6210_priv *wil, const void *data, static const struct { int type; - int (*handler)(struct wil6210_priv *wil, const void *data, size_t size); + int (*load_handler)(struct wil6210_priv *wil, const void *data, + size_t size); + int (*parse_handler)(struct wil6210_priv *wil, const void *data, + size_t size); } wil_fw_handlers[] = { - {wil_fw_type_comment, fw_handle_comment}, - {wil_fw_type_data, fw_handle_data}, - {wil_fw_type_fill, fw_handle_fill}, + {wil_fw_type_comment, fw_handle_comment, fw_handle_capabilities}, + {wil_fw_type_data, fw_handle_data, fw_ignore_section}, + {wil_fw_type_fill, fw_handle_fill, fw_ignore_section}, /* wil_fw_type_action */ /* wil_fw_type_verify */ - {wil_fw_type_file_header, fw_handle_file_header}, - {wil_fw_type_direct_write, fw_handle_direct_write}, - {wil_fw_type_gateway_data, fw_handle_gateway_data}, - {wil_fw_type_gateway_data4, fw_handle_gateway_data4}, + {wil_fw_type_file_header, fw_handle_file_header, + fw_handle_file_header}, + {wil_fw_type_direct_write, fw_handle_direct_write, fw_ignore_section}, + {wil_fw_type_gateway_data, fw_handle_gateway_data, fw_ignore_section}, + {wil_fw_type_gateway_data4, fw_handle_gateway_data4, + fw_ignore_section}, }; static int wil_fw_handle_record(struct wil6210_priv *wil, int type, - const void *data, size_t size) + const void *data, size_t size, bool load) { int i; - for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) { + for (i = 0; i < ARRAY_SIZE(wil_fw_handlers); i++) if (wil_fw_handlers[i].type == type) - return wil_fw_handlers[i].handler(wil, data, size); - } + return load ? + wil_fw_handlers[i].load_handler( + wil, data, size) : + wil_fw_handlers[i].parse_handler( + wil, data, size); wil_err_fw(wil, "unknown record type: %d\n", type); return -EINVAL; } /** - * wil_fw_load - load FW into device - * - * Load the FW and uCode code and data to the corresponding device - * memory regions + * wil_fw_process - process section from FW file + * if load is true: Load the FW and uCode code and data to the + * corresponding device memory regions, + * otherwise only parse and look for capabilities * * Return error code */ -static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) +static int wil_fw_process(struct wil6210_priv *wil, const void *data, + size_t size, bool load) { int rc = 0; const struct wil_fw_record_head *hdr; @@ -437,7 +480,7 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) return -EINVAL; } rc = wil_fw_handle_record(wil, le16_to_cpu(hdr->type), - &hdr[1], hdr_sz); + &hdr[1], hdr_sz, load); if (rc) return rc; } @@ -456,13 +499,16 @@ static int wil_fw_load(struct wil6210_priv *wil, const void *data, size_t size) } /** - * wil_request_firmware - Request firmware and load to device + * wil_request_firmware - Request firmware * - * Request firmware image from the file and load it to device + * Request firmware image from the file + * If load is true, load firmware to device, otherwise + * only parse and extract capabilities * * Return error code */ -int wil_request_firmware(struct wil6210_priv *wil, const char *name) +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load) { int rc, rc1; const struct firmware *fw; @@ -482,7 +528,7 @@ int wil_request_firmware(struct wil6210_priv *wil, const char *name) rc = rc1; goto out; } - rc = wil_fw_load(wil, d, rc1); + rc = wil_fw_process(wil, d, rc1, load); if (rc < 0) goto out; } diff --git a/drivers/net/wireless/ath/wil6210/interrupt.c b/drivers/net/wireless/ath/wil6210/interrupt.c index 011e7412dcc0..64046e0bd0a2 100644 --- a/drivers/net/wireless/ath/wil6210/interrupt.c +++ b/drivers/net/wireless/ath/wil6210/interrupt.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2012-2015 Qualcomm Atheros, Inc. + * Copyright (c) 2012-2016 Qualcomm Atheros, Inc. * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -101,7 +101,7 @@ static void wil6210_mask_irq_misc(struct wil6210_priv *wil, bool mask_halp) mask_halp ? WIL6210_IRQ_DISABLE : WIL6210_IRQ_DISABLE_NO_HALP); } -static void wil6210_mask_halp(struct wil6210_priv *wil) +void wil6210_mask_halp(struct wil6210_priv *wil) { wil_dbg_irq(wil, "%s()\n", __func__); @@ -503,6 +503,13 @@ static int wil6210_debug_irq_mask(struct wil6210_priv *wil, u32 pseudo_cause) offsetof(struct RGF_ICR, ICR)); u32 imv_misc = wil_r(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, IMV)); + + /* HALP interrupt can be unmasked when misc interrupts are + * masked + */ + if (icr_misc & BIT_DMA_EP_MISC_ICR_HALP) + return 0; + wil_err(wil, "IRQ when it should be masked: pseudo 0x%08x\n" "Rx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" "Tx icm:icr:imv 0x%08x 0x%08x 0x%08x\n" @@ -592,7 +599,7 @@ void wil6210_clear_irq(struct wil6210_priv *wil) void wil6210_set_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICS), BIT_DMA_EP_MISC_ICR_HALP); @@ -600,7 +607,7 @@ void wil6210_set_halp(struct wil6210_priv *wil) void wil6210_clear_halp(struct wil6210_priv *wil) { - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_irq(wil, "%s()\n", __func__); wil_w(wil, RGF_DMA_EP_MISC_ICR + offsetof(struct RGF_ICR, ICR), BIT_DMA_EP_MISC_ICR_HALP); diff --git a/drivers/net/wireless/ath/wil6210/main.c b/drivers/net/wireless/ath/wil6210/main.c index 4bc92e54984a..e7130b54d1d8 100644 --- a/drivers/net/wireless/ath/wil6210/main.c +++ b/drivers/net/wireless/ath/wil6210/main.c @@ -232,6 +232,9 @@ static void _wil6210_disconnect(struct wil6210_priv *wil, const u8 *bssid, struct net_device *ndev = wil_to_ndev(wil); struct wireless_dev *wdev = wil->wdev; + if (unlikely(!ndev)) + return; + might_sleep(); wil_info(wil, "%s(bssid=%pM, reason=%d, ev%s)\n", __func__, bssid, reason_code, from_event ? "+" : "-"); @@ -849,6 +852,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) bitmap_zero(wil->status, wil_status_last); mutex_unlock(&wil->wmi_mutex); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -860,6 +864,7 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } + mutex_unlock(&wil->p2p_wdev_mutex); wil_mask_irq(wil); @@ -888,11 +893,12 @@ int wil_reset(struct wil6210_priv *wil, bool load_fw) WIL_FW2_NAME); wil_halt_cpu(wil); + memset(wil->fw_version, 0, sizeof(wil->fw_version)); /* Loading f/w from the file */ - rc = wil_request_firmware(wil, WIL_FW_NAME); + rc = wil_request_firmware(wil, WIL_FW_NAME, true); if (rc) return rc; - rc = wil_request_firmware(wil, WIL_FW2_NAME); + rc = wil_request_firmware(wil, WIL_FW2_NAME, true); if (rc) return rc; @@ -1035,10 +1041,10 @@ int wil_up(struct wil6210_priv *wil) int __wil_down(struct wil6210_priv *wil) { - int rc; - WARN_ON(!mutex_is_locked(&wil->mutex)); + set_bit(wil_status_resetting, wil->status); + if (wil->platform_ops.bus_request) wil->platform_ops.bus_request(wil->platform_handle, 0); @@ -1050,8 +1056,9 @@ int __wil_down(struct wil6210_priv *wil) } wil_enable_irq(wil); - (void)wil_p2p_stop_discovery(wil); + wil_p2p_stop_radio_operations(wil); + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct cfg80211_scan_info info = { .aborted = true, @@ -1063,18 +1070,7 @@ int __wil_down(struct wil6210_priv *wil) cfg80211_scan_done(wil->scan_request, &info); wil->scan_request = NULL; } - - if (test_bit(wil_status_fwconnected, wil->status) || - test_bit(wil_status_fwconnecting, wil->status)) { - - mutex_unlock(&wil->mutex); - rc = wmi_call(wil, WMI_DISCONNECT_CMDID, NULL, 0, - WMI_DISCONNECT_EVENTID, NULL, 0, - WIL6210_DISCONNECT_TO_MS); - mutex_lock(&wil->mutex); - if (rc) - wil_err(wil, "timeout waiting for disconnect\n"); - } + mutex_unlock(&wil->p2p_wdev_mutex); wil_reset(wil, false); @@ -1118,23 +1114,26 @@ void wil_halp_vote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (++wil->halp.ref_cnt == 1) { wil6210_set_halp(wil); rc = wait_for_completion_timeout(&wil->halp.comp, to_jiffies); - if (!rc) + if (!rc) { wil_err(wil, "%s: HALP vote timed out\n", __func__); - else - wil_dbg_misc(wil, - "%s: HALP vote completed after %d ms\n", - __func__, - jiffies_to_msecs(to_jiffies - rc)); + /* Mask HALP as done in case the interrupt is raised */ + wil6210_mask_halp(wil); + } else { + wil_dbg_irq(wil, + "%s: HALP vote completed after %d ms\n", + __func__, + jiffies_to_msecs(to_jiffies - rc)); + } } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } @@ -1145,16 +1144,16 @@ void wil_halp_unvote(struct wil6210_priv *wil) mutex_lock(&wil->halp.lock); - wil_dbg_misc(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: start, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); if (--wil->halp.ref_cnt == 0) { wil6210_clear_halp(wil); - wil_dbg_misc(wil, "%s: HALP unvote\n", __func__); + wil_dbg_irq(wil, "%s: HALP unvote\n", __func__); } - wil_dbg_misc(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, - wil->halp.ref_cnt); + wil_dbg_irq(wil, "%s: end, HALP ref_cnt (%d)\n", __func__, + wil->halp.ref_cnt); mutex_unlock(&wil->halp.lock); } diff --git a/drivers/net/wireless/ath/wil6210/netdev.c b/drivers/net/wireless/ath/wil6210/netdev.c index 098409753d5b..61de5e9f8ef0 100644 --- a/drivers/net/wireless/ath/wil6210/netdev.c +++ b/drivers/net/wireless/ath/wil6210/netdev.c @@ -179,13 +179,6 @@ void *wil_if_alloc(struct device *dev) SET_NETDEV_DEV(ndev, wiphy_dev(wdev->wiphy)); wdev->netdev = ndev; - netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, - WIL6210_NAPI_BUDGET); - netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, - WIL6210_NAPI_BUDGET); - - netif_tx_stop_all_queues(ndev); - return wil; out_priv: @@ -216,25 +209,48 @@ void wil_if_free(struct wil6210_priv *wil) int wil_if_add(struct wil6210_priv *wil) { + struct wireless_dev *wdev = wil_to_wdev(wil); + struct wiphy *wiphy = wdev->wiphy; struct net_device *ndev = wil_to_ndev(wil); int rc; - wil_dbg_misc(wil, "%s()\n", __func__); + wil_dbg_misc(wil, "entered"); + + strlcpy(wiphy->fw_version, wil->fw_version, sizeof(wiphy->fw_version)); + + rc = wiphy_register(wiphy); + if (rc < 0) { + wil_err(wil, "failed to register wiphy, err %d\n", rc); + return rc; + } + + netif_napi_add(ndev, &wil->napi_rx, wil6210_netdev_poll_rx, + WIL6210_NAPI_BUDGET); + netif_tx_napi_add(ndev, &wil->napi_tx, wil6210_netdev_poll_tx, + WIL6210_NAPI_BUDGET); + + netif_tx_stop_all_queues(ndev); rc = register_netdev(ndev); if (rc < 0) { dev_err(&ndev->dev, "Failed to register netdev: %d\n", rc); - return rc; + goto out_wiphy; } return 0; + +out_wiphy: + wiphy_unregister(wdev->wiphy); + return rc; } void wil_if_remove(struct wil6210_priv *wil) { struct net_device *ndev = wil_to_ndev(wil); + struct wireless_dev *wdev = wil_to_wdev(wil); wil_dbg_misc(wil, "%s()\n", __func__); unregister_netdev(ndev); + wiphy_unregister(wdev->wiphy); } diff --git a/drivers/net/wireless/ath/wil6210/p2p.c b/drivers/net/wireless/ath/wil6210/p2p.c index e0f8aa0ebfac..4087785d3090 100644 --- a/drivers/net/wireless/ath/wil6210/p2p.c +++ b/drivers/net/wireless/ath/wil6210/p2p.c @@ -263,3 +263,49 @@ void wil_p2p_search_expired(struct work_struct *work) mutex_unlock(&wil->p2p_wdev_mutex); } } + +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil) +{ + struct wil_p2p_info *p2p = &wil->p2p; + struct cfg80211_scan_info info = { + .aborted = true, + }; + + lockdep_assert_held(&wil->mutex); + + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->radio_wdev != wil->p2p_wdev) + goto out; + + if (!p2p->discovery_started) { + /* Regular scan on the p2p device */ + if (wil->scan_request && + wil->scan_request->wdev == wil->p2p_wdev) { + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } + goto out; + } + + /* Search or listen on p2p device */ + mutex_unlock(&wil->p2p_wdev_mutex); + wil_p2p_stop_discovery(wil); + mutex_lock(&wil->p2p_wdev_mutex); + + if (wil->scan_request) { + /* search */ + cfg80211_scan_done(wil->scan_request, &info); + wil->scan_request = NULL; + } else { + /* listen */ + cfg80211_remain_on_channel_expired(wil->radio_wdev, + p2p->cookie, + &p2p->listen_chan, + GFP_KERNEL); + } + +out: + wil->radio_wdev = wil->wdev; + mutex_unlock(&wil->p2p_wdev_mutex); +} diff --git a/drivers/net/wireless/ath/wil6210/pcie_bus.c b/drivers/net/wireless/ath/wil6210/pcie_bus.c index 7b5c4222bc33..44746ca0d2e6 100644 --- a/drivers/net/wireless/ath/wil6210/pcie_bus.c +++ b/drivers/net/wireless/ath/wil6210/pcie_bus.c @@ -20,6 +20,7 @@ #include #include #include "wil6210.h" +#include static bool use_msi = true; module_param(use_msi, bool, S_IRUGO); @@ -38,6 +39,7 @@ void wil_set_capabilities(struct wil6210_priv *wil) u32 rev_id = wil_r(wil, RGF_USER_JTAG_DEV_ID); bitmap_zero(wil->hw_capabilities, hw_capability_last); + bitmap_zero(wil->fw_capabilities, WMI_FW_CAPABILITY_MAX); switch (rev_id) { case JTAG_DEV_ID_SPARROW_B0: @@ -51,6 +53,9 @@ void wil_set_capabilities(struct wil6210_priv *wil) } wil_info(wil, "Board hardware is %s\n", wil->hw_name); + + /* extract FW capabilities from file without loading the FW */ + wil_request_firmware(wil, WIL_FW_NAME, false); } void wil_disable_irq(struct wil6210_priv *wil) @@ -293,6 +298,9 @@ static void wil_pcie_remove(struct pci_dev *pdev) #endif /* CONFIG_PM */ wil6210_debugfs_remove(wil); + rtnl_lock(); + wil_p2p_wdev_free(wil); + rtnl_unlock(); wil_if_remove(wil); wil_if_pcie_disable(wil); pci_iounmap(pdev, csr); @@ -300,7 +308,6 @@ static void wil_pcie_remove(struct pci_dev *pdev) pci_disable_device(pdev); if (wil->platform_ops.uninit) wil->platform_ops.uninit(wil->platform_handle); - wil_p2p_wdev_free(wil); wil_if_free(wil); } diff --git a/drivers/net/wireless/ath/wil6210/txrx.c b/drivers/net/wireless/ath/wil6210/txrx.c index f2f6a404d3d1..4c38520d4dd2 100644 --- a/drivers/net/wireless/ath/wil6210/txrx.c +++ b/drivers/net/wireless/ath/wil6210/txrx.c @@ -873,9 +873,12 @@ int wil_vring_init_tx(struct wil6210_priv *wil, int id, int size, rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); + if (txdata->dot1x_open && (agg_wsize >= 0)) wil_addba_tx_request(wil, id, agg_wsize); @@ -950,9 +953,11 @@ int wil_vring_init_bcast(struct wil6210_priv *wil, int id, int size) rc = -EINVAL; goto out_free; } - vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); + spin_lock_bh(&txdata->lock); + vring->hwtail = le32_to_cpu(reply.cmd.tx_vring_tail_ptr); txdata->enabled = 1; + spin_unlock_bh(&txdata->lock); return 0; out_free: diff --git a/drivers/net/wireless/ath/wil6210/wil6210.h b/drivers/net/wireless/ath/wil6210/wil6210.h index ecab4af90602..a949cd62bc4e 100644 --- a/drivers/net/wireless/ath/wil6210/wil6210.h +++ b/drivers/net/wireless/ath/wil6210/wil6210.h @@ -17,6 +17,7 @@ #ifndef __WIL6210_H__ #define __WIL6210_H__ +#include #include #include #include @@ -576,10 +577,11 @@ struct wil6210_priv { struct wireless_dev *wdev; void __iomem *csr; DECLARE_BITMAP(status, wil_status_last); - u32 fw_version; + u8 fw_version[ETHTOOL_FWVERS_LEN]; u32 hw_version; const char *hw_name; DECLARE_BITMAP(hw_capabilities, hw_capability_last); + DECLARE_BITMAP(fw_capabilities, WMI_FW_CAPABILITY_MAX); u8 n_mids; /* number of additional MIDs as reported by FW */ u32 recovery_count; /* num of FW recovery attempts in a short time */ u32 recovery_state; /* FW recovery state machine */ @@ -657,7 +659,7 @@ struct wil6210_priv { /* P2P_DEVICE vif */ struct wireless_dev *p2p_wdev; - struct mutex p2p_wdev_mutex; /* protect @p2p_wdev */ + struct mutex p2p_wdev_mutex; /* protect @p2p_wdev and @scan_request */ struct wireless_dev *radio_wdev; /* High Access Latency Policy voting */ @@ -828,6 +830,7 @@ void wil_unmask_irq(struct wil6210_priv *wil); void wil_configure_interrupt_moderation(struct wil6210_priv *wil); void wil_disable_irq(struct wil6210_priv *wil); void wil_enable_irq(struct wil6210_priv *wil); +void wil6210_mask_halp(struct wil6210_priv *wil); /* P2P */ bool wil_p2p_is_social_scan(struct cfg80211_scan_request *request); @@ -840,6 +843,7 @@ u8 wil_p2p_stop_discovery(struct wil6210_priv *wil); int wil_p2p_cancel_listen(struct wil6210_priv *wil, u64 cookie); void wil_p2p_listen_expired(struct work_struct *work); void wil_p2p_search_expired(struct work_struct *work); +void wil_p2p_stop_radio_operations(struct wil6210_priv *wil); /* WMI for P2P */ int wmi_p2p_cfg(struct wil6210_priv *wil, int channel, int bi); @@ -893,7 +897,8 @@ void wil6210_unmask_irq_rx(struct wil6210_priv *wil); int wil_iftype_nl2wmi(enum nl80211_iftype type); int wil_ioctl(struct wil6210_priv *wil, void __user *data, int cmd); -int wil_request_firmware(struct wil6210_priv *wil, const char *name); +int wil_request_firmware(struct wil6210_priv *wil, const char *name, + bool load); int wil_can_suspend(struct wil6210_priv *wil, bool is_runtime); int wil_suspend(struct wil6210_priv *wil, bool is_runtime); diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c index 4d92541913c0..fae4f1285d08 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.c +++ b/drivers/net/wireless/ath/wil6210/wmi.c @@ -312,14 +312,14 @@ static void wmi_evt_ready(struct wil6210_priv *wil, int id, void *d, int len) struct wireless_dev *wdev = wil->wdev; struct wmi_ready_event *evt = d; - wil->fw_version = le32_to_cpu(evt->sw_version); wil->n_mids = evt->numof_additional_mids; - wil_info(wil, "FW ver. %d; MAC %pM; %d MID's\n", wil->fw_version, + wil_info(wil, "FW ver. %s(SW %d); MAC %pM; %d MID's\n", + wil->fw_version, le32_to_cpu(evt->sw_version), evt->mac, wil->n_mids); /* ignore MAC address, we already have it from the boot loader */ - snprintf(wdev->wiphy->fw_version, sizeof(wdev->wiphy->fw_version), - "%d", wil->fw_version); + strlcpy(wdev->wiphy->fw_version, wil->fw_version, + sizeof(wdev->wiphy->fw_version)); wil_set_recovery_state(wil, fw_recovery_idle); set_bit(wil_status_fwready, wil->status); @@ -424,6 +424,7 @@ static void wmi_evt_tx_mgmt(struct wil6210_priv *wil, int id, void *d, int len) static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, void *d, int len) { + mutex_lock(&wil->p2p_wdev_mutex); if (wil->scan_request) { struct wmi_scan_complete_event *data = d; struct cfg80211_scan_info info = { @@ -435,14 +436,13 @@ static void wmi_evt_scan_complete(struct wil6210_priv *wil, int id, wil->scan_request, info.aborted); del_timer_sync(&wil->scan_timer); - mutex_lock(&wil->p2p_wdev_mutex); cfg80211_scan_done(wil->scan_request, &info); wil->radio_wdev = wil->wdev; - mutex_unlock(&wil->p2p_wdev_mutex); wil->scan_request = NULL; } else { wil_err(wil, "SCAN_COMPLETE while not scanning\n"); } + mutex_unlock(&wil->p2p_wdev_mutex); } static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len) diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h index 685fe0ddea26..f430e8a80603 100644 --- a/drivers/net/wireless/ath/wil6210/wmi.h +++ b/drivers/net/wireless/ath/wil6210/wmi.h @@ -46,6 +46,16 @@ enum wmi_mid { MID_BROADCAST = 0xFF, }; +/* FW capability IDs + * Each ID maps to a bit in a 32-bit bitmask value provided by the FW to + * the host + */ +enum wmi_fw_capability { + WMI_FW_CAPABILITY_FTM = 0, + WMI_FW_CAPABILITY_PS_CONFIG = 1, + WMI_FW_CAPABILITY_MAX, +}; + /* WMI_CMD_HDR */ struct wmi_cmd_hdr { u8 mid; @@ -120,6 +130,8 @@ enum wmi_command_id { WMI_BF_SM_MGMT_CMDID = 0x838, WMI_BF_RXSS_MGMT_CMDID = 0x839, WMI_BF_TRIG_CMDID = 0x83A, + WMI_LINK_MAINTAIN_CFG_WRITE_CMDID = 0x842, + WMI_LINK_MAINTAIN_CFG_READ_CMDID = 0x843, WMI_SET_SECTORS_CMDID = 0x849, WMI_MAINTAIN_PAUSE_CMDID = 0x850, WMI_MAINTAIN_RESUME_CMDID = 0x851, @@ -134,10 +146,15 @@ enum wmi_command_id { WMI_BF_CTRL_CMDID = 0x862, WMI_NOTIFY_REQ_CMDID = 0x863, WMI_GET_STATUS_CMDID = 0x864, + WMI_GET_RF_STATUS_CMDID = 0x866, + WMI_GET_BASEBAND_TYPE_CMDID = 0x867, WMI_UNIT_TEST_CMDID = 0x900, WMI_HICCUP_CMDID = 0x901, WMI_FLASH_READ_CMDID = 0x902, WMI_FLASH_WRITE_CMDID = 0x903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_CMDID = 0x904, + WMI_TRAFFIC_RESUME_CMDID = 0x905, /* P2P */ WMI_P2P_CFG_CMDID = 0x910, WMI_PORT_ALLOCATE_CMDID = 0x911, @@ -150,6 +167,26 @@ enum wmi_command_id { WMI_PCP_START_CMDID = 0x918, WMI_PCP_STOP_CMDID = 0x919, WMI_GET_PCP_FACTOR_CMDID = 0x91B, + /* Power Save Configuration Commands */ + WMI_PS_DEV_PROFILE_CFG_CMDID = 0x91C, + /* Not supported yet */ + WMI_PS_DEV_CFG_CMDID = 0x91D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_CMDID = 0x91E, + /* Per MAC Power Save Configuration commands + * Not supported yet + */ + WMI_PS_MID_CFG_CMDID = 0x91F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_CMDID = 0x920, + WMI_RS_CFG_CMDID = 0x921, + WMI_GET_DETAILED_RS_RES_CMDID = 0x922, + WMI_AOA_MEAS_CMDID = 0x923, + WMI_TOF_SESSION_START_CMDID = 0x991, + WMI_TOF_GET_CAPABILITIES_CMDID = 0x992, + WMI_TOF_SET_LCR_CMDID = 0x993, + WMI_TOF_SET_LCI_CMDID = 0x994, + WMI_TOF_CHANNEL_INFO_CMDID = 0x995, WMI_SET_MAC_ADDRESS_CMDID = 0xF003, WMI_ABORT_SCAN_CMDID = 0xF007, WMI_SET_PROMISCUOUS_MODE_CMDID = 0xF041, @@ -291,9 +328,8 @@ enum wmi_scan_type { /* WMI_START_SCAN_CMDID */ struct wmi_start_scan_cmd { u8 direct_scan_mac_addr[WMI_MAC_LEN]; - /* DMG Beacon frame is transmitted during active scanning */ + /* run scan with discovery beacon. Relevant for ACTIVE scan only. */ u8 discovery_mode; - /* reserved */ u8 reserved; /* Max duration in the home channel(ms) */ __le32 dwell_time; @@ -453,6 +489,12 @@ struct wmi_port_delete_cmd { u8 reserved[3]; } __packed; +/* WMI_TRAFFIC_DEFERRAL_CMDID */ +struct wmi_traffic_deferral_cmd { + /* Bit vector: bit[0] - wake on Unicast, bit[1] - wake on Broadcast */ + u8 wakeup_trigger; +} __packed; + /* WMI_P2P_CFG_CMDID */ enum wmi_discovery_mode { WMI_DISCOVERY_MODE_NON_OFFLOAD = 0x00, @@ -818,85 +860,193 @@ struct wmi_pmc_cmd { __le64 mem_base; } __packed; +enum wmi_aoa_meas_type { + WMI_AOA_PHASE_MEAS = 0x00, + WMI_AOA_PHASE_AMP_MEAS = 0x01, +}; + +/* WMI_AOA_MEAS_CMDID */ +struct wmi_aoa_meas_cmd { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + __le32 meas_rf_mask; +} __packed; + +enum wmi_tof_burst_duration { + WMI_TOF_BURST_DURATION_250_USEC = 2, + WMI_TOF_BURST_DURATION_500_USEC = 3, + WMI_TOF_BURST_DURATION_1_MSEC = 4, + WMI_TOF_BURST_DURATION_2_MSEC = 5, + WMI_TOF_BURST_DURATION_4_MSEC = 6, + WMI_TOF_BURST_DURATION_8_MSEC = 7, + WMI_TOF_BURST_DURATION_16_MSEC = 8, + WMI_TOF_BURST_DURATION_32_MSEC = 9, + WMI_TOF_BURST_DURATION_64_MSEC = 10, + WMI_TOF_BURST_DURATION_128_MSEC = 11, + WMI_TOF_BURST_DURATION_NO_PREFERENCES = 15, +}; + +enum wmi_tof_session_start_flags { + WMI_TOF_SESSION_START_FLAG_SECURED = 0x1, + WMI_TOF_SESSION_START_FLAG_ASAP = 0x2, + WMI_TOF_SESSION_START_FLAG_LCI_REQ = 0x4, + WMI_TOF_SESSION_START_FLAG_LCR_REQ = 0x8, +}; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_ftm_dest_info { + u8 channel; + /* wmi_tof_session_start_flags_e */ + u8 flags; + u8 initial_token; + u8 num_of_ftm_per_burst; + u8 num_of_bursts_exp; + /* wmi_tof_burst_duration_e */ + u8 burst_duration; + /* Burst Period indicate interval between two consecutive burst + * instances, in units of 100 ms + */ + __le16 burst_period; + u8 dst_mac[WMI_MAC_LEN]; + __le16 reserved; +} __packed; + +/* WMI_TOF_SESSION_START_CMDID */ +struct wmi_tof_session_start_cmd { + __le32 session_id; + u8 num_of_aoa_measures; + u8 aoa_type; + __le16 num_of_dest; + u8 reserved[4]; + struct wmi_ftm_dest_info ftm_dest_info[0]; +} __packed; + +enum wmi_tof_channel_info_report_type { + WMI_TOF_CHANNEL_INFO_TYPE_CIR = 0x1, + WMI_TOF_CHANNEL_INFO_TYPE_RSSI = 0x2, + WMI_TOF_CHANNEL_INFO_TYPE_SNR = 0x4, + WMI_TOF_CHANNEL_INFO_TYPE_DEBUG_DATA = 0x8, + WMI_TOF_CHANNEL_INFO_TYPE_VENDOR_SPECIFIC = 0x10, +}; + +/* WMI_TOF_CHANNEL_INFO_CMDID */ +struct wmi_tof_channel_info_cmd { + /* wmi_tof_channel_info_report_type_e */ + __le32 channel_info_report_request; +} __packed; + /* WMI Events * List of Events (target to host) */ enum wmi_event_id { - WMI_READY_EVENTID = 0x1001, - WMI_CONNECT_EVENTID = 0x1002, - WMI_DISCONNECT_EVENTID = 0x1003, - WMI_SCAN_COMPLETE_EVENTID = 0x100A, - WMI_REPORT_STATISTICS_EVENTID = 0x100B, - WMI_RD_MEM_RSP_EVENTID = 0x1800, - WMI_FW_READY_EVENTID = 0x1801, - WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, - WMI_ECHO_RSP_EVENTID = 0x1803, - WMI_FS_TUNE_DONE_EVENTID = 0x180A, - WMI_CORR_MEASURE_EVENTID = 0x180B, - WMI_READ_RSSI_EVENTID = 0x180C, - WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, - WMI_DC_CALIB_DONE_EVENTID = 0x180F, - WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, - WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, - WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, - WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, - WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, - WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, - WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, - WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, - WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, - WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, - WMI_VRING_CFG_DONE_EVENTID = 0x1821, - WMI_BA_STATUS_EVENTID = 0x1823, - WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, - WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, - WMI_DELBA_EVENTID = 0x1826, - WMI_GET_SSID_EVENTID = 0x1828, - WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, - WMI_SW_TX_COMPLETE_EVENTID = 0x182B, - WMI_READ_MAC_RXQ_EVENTID = 0x1830, - WMI_READ_MAC_TXQ_EVENTID = 0x1831, - WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, - WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, - WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, - WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, - WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, - WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, - WMI_RS_MGMT_DONE_EVENTID = 0x1852, - WMI_RF_MGMT_STATUS_EVENTID = 0x1853, - WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, - WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, - WMI_RX_MGMT_PACKET_EVENTID = 0x1840, - WMI_TX_MGMT_PACKET_EVENTID = 0x1841, - WMI_OTP_READ_RESULT_EVENTID = 0x1856, - WMI_LED_CFG_DONE_EVENTID = 0x1858, + WMI_READY_EVENTID = 0x1001, + WMI_CONNECT_EVENTID = 0x1002, + WMI_DISCONNECT_EVENTID = 0x1003, + WMI_SCAN_COMPLETE_EVENTID = 0x100A, + WMI_REPORT_STATISTICS_EVENTID = 0x100B, + WMI_RD_MEM_RSP_EVENTID = 0x1800, + WMI_FW_READY_EVENTID = 0x1801, + WMI_EXIT_FAST_MEM_ACC_MODE_EVENTID = 0x200, + WMI_ECHO_RSP_EVENTID = 0x1803, + WMI_FS_TUNE_DONE_EVENTID = 0x180A, + WMI_CORR_MEASURE_EVENTID = 0x180B, + WMI_READ_RSSI_EVENTID = 0x180C, + WMI_TEMP_SENSE_DONE_EVENTID = 0x180E, + WMI_DC_CALIB_DONE_EVENTID = 0x180F, + WMI_IQ_TX_CALIB_DONE_EVENTID = 0x1811, + WMI_IQ_RX_CALIB_DONE_EVENTID = 0x1812, + WMI_SET_WORK_MODE_DONE_EVENTID = 0x1815, + WMI_LO_LEAKAGE_CALIB_DONE_EVENTID = 0x1816, + WMI_MARLON_R_READ_DONE_EVENTID = 0x1818, + WMI_MARLON_R_WRITE_DONE_EVENTID = 0x1819, + WMI_MARLON_R_TXRX_SEL_DONE_EVENTID = 0x181A, + WMI_SILENT_RSSI_CALIB_DONE_EVENTID = 0x181D, + WMI_RF_RX_TEST_DONE_EVENTID = 0x181E, + WMI_CFG_RX_CHAIN_DONE_EVENTID = 0x1820, + WMI_VRING_CFG_DONE_EVENTID = 0x1821, + WMI_BA_STATUS_EVENTID = 0x1823, + WMI_RCP_ADDBA_REQ_EVENTID = 0x1824, + WMI_RCP_ADDBA_RESP_SENT_EVENTID = 0x1825, + WMI_DELBA_EVENTID = 0x1826, + WMI_GET_SSID_EVENTID = 0x1828, + WMI_GET_PCP_CHANNEL_EVENTID = 0x182A, + WMI_SW_TX_COMPLETE_EVENTID = 0x182B, + WMI_READ_MAC_RXQ_EVENTID = 0x1830, + WMI_READ_MAC_TXQ_EVENTID = 0x1831, + WMI_WRITE_MAC_RXQ_EVENTID = 0x1832, + WMI_WRITE_MAC_TXQ_EVENTID = 0x1833, + WMI_WRITE_MAC_XQ_FIELD_EVENTID = 0x1834, + WMI_BEAMFORMING_MGMT_DONE_EVENTID = 0x1836, + WMI_BF_TXSS_MGMT_DONE_EVENTID = 0x1837, + WMI_BF_RXSS_MGMT_DONE_EVENTID = 0x1839, + WMI_RS_MGMT_DONE_EVENTID = 0x1852, + WMI_RF_MGMT_STATUS_EVENTID = 0x1853, + WMI_THERMAL_THROTTLING_STATUS_EVENTID = 0x1855, + WMI_BF_SM_MGMT_DONE_EVENTID = 0x1838, + WMI_RX_MGMT_PACKET_EVENTID = 0x1840, + WMI_TX_MGMT_PACKET_EVENTID = 0x1841, + WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID = 0x1842, + WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENTID = 0x1843, + WMI_OTP_READ_RESULT_EVENTID = 0x1856, + WMI_LED_CFG_DONE_EVENTID = 0x1858, /* Performance monitoring events */ - WMI_DATA_PORT_OPEN_EVENTID = 0x1860, - WMI_WBE_LINK_DOWN_EVENTID = 0x1861, - WMI_BF_CTRL_DONE_EVENTID = 0x1862, - WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, - WMI_GET_STATUS_DONE_EVENTID = 0x1864, - WMI_VRING_EN_EVENTID = 0x1865, - WMI_UNIT_TEST_EVENTID = 0x1900, - WMI_FLASH_READ_DONE_EVENTID = 0x1902, - WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + WMI_DATA_PORT_OPEN_EVENTID = 0x1860, + WMI_WBE_LINK_DOWN_EVENTID = 0x1861, + WMI_BF_CTRL_DONE_EVENTID = 0x1862, + WMI_NOTIFY_REQ_DONE_EVENTID = 0x1863, + WMI_GET_STATUS_DONE_EVENTID = 0x1864, + WMI_VRING_EN_EVENTID = 0x1865, + WMI_GET_RF_STATUS_EVENTID = 0x1866, + WMI_GET_BASEBAND_TYPE_EVENTID = 0x1867, + WMI_UNIT_TEST_EVENTID = 0x1900, + WMI_FLASH_READ_DONE_EVENTID = 0x1902, + WMI_FLASH_WRITE_DONE_EVENTID = 0x1903, + /* Power management */ + WMI_TRAFFIC_DEFERRAL_EVENTID = 0x1904, + WMI_TRAFFIC_RESUME_EVENTID = 0x1905, /* P2P */ - WMI_P2P_CFG_DONE_EVENTID = 0x1910, - WMI_PORT_ALLOCATED_EVENTID = 0x1911, - WMI_PORT_DELETED_EVENTID = 0x1912, - WMI_LISTEN_STARTED_EVENTID = 0x1914, - WMI_SEARCH_STARTED_EVENTID = 0x1915, - WMI_DISCOVERY_STARTED_EVENTID = 0x1916, - WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, - WMI_PCP_STARTED_EVENTID = 0x1918, - WMI_PCP_STOPPED_EVENTID = 0x1919, - WMI_PCP_FACTOR_EVENTID = 0x191A, - WMI_SET_CHANNEL_EVENTID = 0x9000, - WMI_ASSOC_REQ_EVENTID = 0x9001, - WMI_EAPOL_RX_EVENTID = 0x9002, - WMI_MAC_ADDR_RESP_EVENTID = 0x9003, - WMI_FW_VER_EVENTID = 0x9004, - WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, + WMI_P2P_CFG_DONE_EVENTID = 0x1910, + WMI_PORT_ALLOCATED_EVENTID = 0x1911, + WMI_PORT_DELETED_EVENTID = 0x1912, + WMI_LISTEN_STARTED_EVENTID = 0x1914, + WMI_SEARCH_STARTED_EVENTID = 0x1915, + WMI_DISCOVERY_STARTED_EVENTID = 0x1916, + WMI_DISCOVERY_STOPPED_EVENTID = 0x1917, + WMI_PCP_STARTED_EVENTID = 0x1918, + WMI_PCP_STOPPED_EVENTID = 0x1919, + WMI_PCP_FACTOR_EVENTID = 0x191A, + /* Power Save Configuration Events */ + WMI_PS_DEV_PROFILE_CFG_EVENTID = 0x191C, + /* Not supported yet */ + WMI_PS_DEV_CFG_EVENTID = 0x191D, + /* Not supported yet */ + WMI_PS_DEV_CFG_READ_EVENTID = 0x191E, + /* Not supported yet */ + WMI_PS_MID_CFG_EVENTID = 0x191F, + /* Not supported yet */ + WMI_PS_MID_CFG_READ_EVENTID = 0x1920, + WMI_RS_CFG_DONE_EVENTID = 0x1921, + WMI_GET_DETAILED_RS_RES_EVENTID = 0x1922, + WMI_AOA_MEAS_EVENTID = 0x1923, + WMI_TOF_SESSION_END_EVENTID = 0x1991, + WMI_TOF_GET_CAPABILITIES_EVENTID = 0x1992, + WMI_TOF_SET_LCR_EVENTID = 0x1993, + WMI_TOF_SET_LCI_EVENTID = 0x1994, + WMI_TOF_FTM_PER_DEST_RES_EVENTID = 0x1995, + WMI_TOF_CHANNEL_INFO_EVENTID = 0x1996, + WMI_SET_CHANNEL_EVENTID = 0x9000, + WMI_ASSOC_REQ_EVENTID = 0x9001, + WMI_EAPOL_RX_EVENTID = 0x9002, + WMI_MAC_ADDR_RESP_EVENTID = 0x9003, + WMI_FW_VER_EVENTID = 0x9004, + WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENTID = 0x9005, }; /* Events data structures */ @@ -943,10 +1093,85 @@ struct wmi_get_status_done_event { /* WMI_FW_VER_EVENTID */ struct wmi_fw_ver_event { - u8 major; - u8 minor; - __le16 subminor; - __le16 build; + /* FW image version */ + __le32 fw_major; + __le32 fw_minor; + __le32 fw_subminor; + __le32 fw_build; + /* FW image build time stamp */ + __le32 hour; + __le32 minute; + __le32 second; + __le32 day; + __le32 month; + __le32 year; + /* Boot Loader image version */ + __le32 bl_major; + __le32 bl_minor; + __le32 bl_subminor; + __le32 bl_build; + /* The number of entries in the FW capabilies array */ + u8 fw_capabilities_len; + u8 reserved[3]; + /* FW capabilities info + * Must be the last member of the struct + */ + __le32 fw_capabilities[0]; +} __packed; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_type { + RF_UNKNOWN = 0x00, + RF_MARLON = 0x01, + RF_SPARROW = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum board_file_rf_type { + BF_RF_MARLON = 0x00, + BF_RF_SPARROW = 0x01, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +enum rf_status { + RF_OK = 0x00, + RF_NO_COMM = 0x01, + RF_WRONG_BOARD_FILE = 0x02, +}; + +/* WMI_GET_RF_STATUS_EVENTID */ +struct wmi_get_rf_status_event { + /* enum rf_type */ + __le32 rf_type; + /* attached RFs bit vector */ + __le32 attached_rf_vector; + /* enabled RFs bit vector */ + __le32 enabled_rf_vector; + /* enum rf_status, refers to enabled RFs */ + u8 rf_status[32]; + /* enum board file RF type */ + __le32 board_file_rf_type; + /* board file platform type */ + __le32 board_file_platform_type; + /* board file version */ + __le32 board_file_version; + __le32 reserved[2]; +} __packed; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +enum baseband_type { + BASEBAND_UNKNOWN = 0x00, + BASEBAND_SPARROW_M_A0 = 0x03, + BASEBAND_SPARROW_M_A1 = 0x04, + BASEBAND_SPARROW_M_B0 = 0x05, + BASEBAND_SPARROW_M_C0 = 0x06, + BASEBAND_SPARROW_M_D0 = 0x07, +}; + +/* WMI_GET_BASEBAND_TYPE_EVENTID */ +struct wmi_get_baseband_type_event { + /* enum baseband_type */ + __le32 baseband_type; } __packed; /* WMI_MAC_ADDR_RESP_EVENTID */ @@ -1410,4 +1635,553 @@ struct wmi_led_cfg_done_event { __le32 status; } __packed; +#define WMI_NUM_MCS (13) + +/* Rate search parameters configuration per connection */ +struct wmi_rs_cfg { + /* The maximal allowed PER for each MCS + * MCS will be considered as failed if PER during RS is higher + */ + u8 per_threshold[WMI_NUM_MCS]; + /* Number of MPDUs for each MCS + * this is the minimal statistic required to make an educated + * decision + */ + u8 min_frame_cnt[WMI_NUM_MCS]; + /* stop threshold [0-100] */ + u8 stop_th; + /* MCS1 stop threshold [0-100] */ + u8 mcs1_fail_th; + u8 max_back_failure_th; + /* Debug feature for disabling internal RS trigger (which is + * currently triggered by BF Done) + */ + u8 dbg_disable_internal_trigger; + __le32 back_failure_mask; + __le32 mcs_en_vec; +} __packed; + +/* WMI_RS_CFG_CMDID */ +struct wmi_rs_cfg_cmd { + /* connection id */ + u8 cid; + /* enable or disable rate search */ + u8 rs_enable; + /* rate search configuration */ + struct wmi_rs_cfg rs_cfg; +} __packed; + +/* WMI_RS_CFG_DONE_EVENTID */ +struct wmi_rs_cfg_done_event { + u8 cid; + /* enum wmi_fw_status */ + u8 status; + u8 reserved[2]; +} __packed; + +/* WMI_GET_DETAILED_RS_RES_CMDID */ +struct wmi_get_detailed_rs_res_cmd { + /* connection id */ + u8 cid; + u8 reserved[3]; +} __packed; + +/* RS results status */ +enum wmi_rs_results_status { + WMI_RS_RES_VALID = 0x00, + WMI_RS_RES_INVALID = 0x01, +}; + +/* Rate search results */ +struct wmi_rs_results { + /* number of sent MPDUs */ + u8 num_of_tx_pkt[WMI_NUM_MCS]; + /* number of non-acked MPDUs */ + u8 num_of_non_acked_pkt[WMI_NUM_MCS]; + /* RS timestamp */ + __le32 tsf; + /* RS selected MCS */ + u8 mcs; +} __packed; + +/* WMI_GET_DETAILED_RS_RES_EVENTID */ +struct wmi_get_detailed_rs_res_event { + u8 cid; + /* enum wmi_rs_results_status */ + u8 status; + /* detailed rs results */ + struct wmi_rs_results rs_results; + u8 reserved[3]; +} __packed; + +/* broadcast connection ID */ +#define WMI_LINK_MAINTAIN_CFG_CID_BROADCAST (0xFFFFFFFF) + +/* Types wmi_link_maintain_cfg presets for WMI_LINK_MAINTAIN_CFG_WRITE_CMD */ +enum wmi_link_maintain_cfg_type { + /* AP/PCP default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_AP = 0x00, + /* AP/PCP default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_AP = 0x01, + /* STA default normal (non-FST) configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_NORMAL_STA = 0x02, + /* STA default FST configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_DEFAULT_FST_STA = 0x03, + /* custom configuration settings */ + WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM = 0x04, + /* number of defined configuration types */ + WMI_LINK_MAINTAIN_CFG_TYPES_NUM = 0x05, +}; + +/* Response status codes for WMI_LINK_MAINTAIN_CFG_WRITE/READ commands */ +enum wmi_link_maintain_cfg_response_status { + /* WMI_LINK_MAINTAIN_CFG_WRITE/READ command successfully accomplished + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_OK = 0x00, + /* ERROR due to bad argument in WMI_LINK_MAINTAIN_CFG_WRITE/READ + * command request + */ + WMI_LINK_MAINTAIN_CFG_RESPONSE_STATUS_BAD_ARGUMENT = 0x01, +}; + +/* Link Loss and Keep Alive configuration */ +struct wmi_link_maintain_cfg { + /* link_loss_enable_detectors_vec */ + __le32 link_loss_enable_detectors_vec; + /* detectors check period usec */ + __le32 check_link_loss_period_usec; + /* max allowed tx ageing */ + __le32 tx_ageing_threshold_usec; + /* keep alive period for high SNR */ + __le32 keep_alive_period_usec_high_snr; + /* keep alive period for low SNR */ + __le32 keep_alive_period_usec_low_snr; + /* lower snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_low_db; + /* upper snr limit for keep alive period update */ + __le32 keep_alive_snr_threshold_high_db; + /* num of successive bad bcons causing link-loss */ + __le32 bad_beacons_num_threshold; + /* SNR limit for bad_beacons_detector */ + __le32 bad_beacons_snr_threshold_db; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_CMDID */ +struct wmi_link_maintain_cfg_write_cmd { + /* enum wmi_link_maintain_cfg_type_e - type of requested default + * configuration to be applied + */ + __le32 cfg_type; + /* requested connection ID or WMI_LINK_MAINTAIN_CFG_CID_BROADCAST */ + __le32 cid; + /* custom configuration settings to be applied (relevant only if + * cfg_type==WMI_LINK_MAINTAIN_CFG_TYPE_CUSTOM) + */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_READ_CMDID */ +struct wmi_link_maintain_cfg_read_cmd { + /* connection ID which configuration settings are requested */ + __le32 cid; +} __packed; + +/* WMI_LINK_MAINTAIN_CFG_WRITE_DONE_EVENTID */ +struct wmi_link_maintain_cfg_write_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - write status */ + __le32 status; +} __packed; + +/* \WMI_LINK_MAINTAIN_CFG_READ_DONE_EVENT */ +struct wmi_link_maintain_cfg_read_done_event { + /* requested connection ID */ + __le32 cid; + /* wmi_link_maintain_cfg_response_status_e - read status */ + __le32 status; + /* Retrieved configuration settings */ + struct wmi_link_maintain_cfg lm_cfg; +} __packed; + +enum wmi_traffic_deferral_status { + WMI_TRAFFIC_DEFERRAL_APPROVED = 0x0, + WMI_TRAFFIC_DEFERRAL_REJECTED = 0x1, +}; + +/* WMI_TRAFFIC_DEFERRAL_EVENTID */ +struct wmi_traffic_deferral_event { + /* enum wmi_traffic_deferral_status_e */ + u8 status; +} __packed; + +enum wmi_traffic_resume_status { + WMI_TRAFFIC_RESUME_SUCCESS = 0x0, + WMI_TRAFFIC_RESUME_FAILED = 0x1, +}; + +/* WMI_TRAFFIC_RESUME_EVENTID */ +struct wmi_traffic_resume_event { + /* enum wmi_traffic_resume_status_e */ + u8 status; +} __packed; + +/* Power Save command completion status codes */ +enum wmi_ps_cfg_cmd_status { + WMI_PS_CFG_CMD_STATUS_SUCCESS = 0x00, + WMI_PS_CFG_CMD_STATUS_BAD_PARAM = 0x01, + /* other error */ + WMI_PS_CFG_CMD_STATUS_ERROR = 0x02, +}; + +/* Device Power Save Profiles */ +enum wmi_ps_profile_type { + WMI_PS_PROFILE_TYPE_DEFAULT = 0x00, + WMI_PS_PROFILE_TYPE_PS_DISABLED = 0x01, + WMI_PS_PROFILE_TYPE_MAX_PS = 0x02, + WMI_PS_PROFILE_TYPE_LOW_LATENCY_PS = 0x03, +}; + +/* WMI_PS_DEV_PROFILE_CFG_CMDID + * + * Power save profile to be used by the device + * + * Returned event: + * - WMI_PS_DEV_PROFILE_CFG_EVENTID + */ +struct wmi_ps_dev_profile_cfg_cmd { + /* wmi_ps_profile_type_e */ + u8 ps_profile; + u8 reserved[3]; +} __packed; + +/* WMI_PS_DEV_PROFILE_CFG_EVENTID */ +struct wmi_ps_dev_profile_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +enum wmi_ps_level { + WMI_PS_LEVEL_DEEP_SLEEP = 0x00, + WMI_PS_LEVEL_SHALLOW_SLEEP = 0x01, + /* awake = all PS mechanisms are disabled */ + WMI_PS_LEVEL_AWAKE = 0x02, +}; + +enum wmi_ps_deep_sleep_clk_level { + /* 33k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC = 0x00, + /* 10k */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_OSC = 0x01, + /* @RTC Low latency */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_RTC_LT = 0x02, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_XTAL = 0x03, + WMI_PS_DEEP_SLEEP_CLK_LEVEL_SYSCLK = 0x04, + /* Not Applicable */ + WMI_PS_DEEP_SLEEP_CLK_LEVEL_N_A = 0xFF, +}; + +/* Response by the FW to a D3 entry request */ +enum wmi_ps_d3_resp_policy { + WMI_PS_D3_RESP_POLICY_DEFAULT = 0x00, + /* debug -D3 req is always denied */ + WMI_PS_D3_RESP_POLICY_DENIED = 0x01, + /* debug -D3 req is always approved */ + WMI_PS_D3_RESP_POLICY_APPROVED = 0x02, +}; + +/* Device common power save configurations */ +struct wmi_ps_dev_cfg { + /* lowest level of PS allowed while unassociated, enum wmi_ps_level_e + */ + u8 ps_unassoc_min_level; + /* lowest deep sleep clock level while nonassoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_unassoc_deep_sleep_min_level; + /* lowest level of PS allowed while associated, enum wmi_ps_level_e */ + u8 ps_assoc_min_level; + /* lowest deep sleep clock level while assoc, enum + * wmi_ps_deep_sleep_clk_level_e + */ + u8 ps_assoc_deep_sleep_min_level; + /* enum wmi_ps_deep_sleep_clk_level_e */ + u8 ps_assoc_low_latency_ds_min_level; + /* enum wmi_ps_d3_resp_policy_e */ + u8 ps_D3_response_policy; + /* BOOL */ + u8 ps_D3_pm_pme_enabled; + /* BOOL */ + u8 ps_halp_enable; + u8 ps_deep_sleep_enter_thresh_msec; + /* BOOL */ + u8 ps_voltage_scaling_en; +} __packed; + +/* WMI_PS_DEV_CFG_CMDID + * + * Configure common Power Save parameters of the device and all MIDs. + * + * Returned event: + * - WMI_PS_DEV_CFG_EVENTID + */ +struct wmi_ps_dev_cfg_cmd { + /* Device Power Save configuration to be applied */ + struct wmi_ps_dev_cfg ps_dev_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* WMI_PS_DEV_CFG_EVENTID */ +struct wmi_ps_dev_cfg_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_DEV_CFG_READ_CMDID + * + * request to retrieve device Power Save configuration + * (WMI_PS_DEV_CFG_CMD params) + * + * Returned event: + * - WMI_PS_DEV_CFG_READ_EVENTID + */ +struct wmi_ps_dev_cfg_read_cmd { + __le32 reserved; +} __packed; + +/* WMI_PS_DEV_CFG_READ_EVENTID */ +struct wmi_ps_dev_cfg_read_event { + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; + /* Retrieved device Power Save configuration (WMI_PS_DEV_CFG_CMD + * params) + */ + struct wmi_ps_dev_cfg dev_ps_cfg; + /* alignment to 32b */ + u8 reserved[2]; +} __packed; + +/* Per Mac Power Save configurations */ +struct wmi_ps_mid_cfg { + /* Low power RX in BTI is enabled, BOOL */ + u8 beacon_lprx_enable; + /* Sync to sector ID enabled, BOOL */ + u8 beacon_sync_to_sectorId_enable; + /* Low power RX in DTI is enabled, BOOL */ + u8 frame_exchange_lprx_enable; + /* Sleep Cycle while in scheduled PS, 1-31 */ + u8 scheduled_sleep_cycle_pow2; + /* Stay Awake for k BIs every (sleep_cycle - k) BIs, 1-31 */ + u8 scheduled_num_of_awake_bis; + u8 am_to_traffic_load_thresh_mbp; + u8 traffic_to_am_load_thresh_mbps; + u8 traffic_to_am_num_of_no_traffic_bis; + /* BOOL */ + u8 continuous_traffic_psm; + __le16 no_traffic_to_min_usec; + __le16 no_traffic_to_max_usec; + __le16 snoozing_sleep_interval_milisec; + u8 max_no_data_awake_events; + /* Trigger WEB after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_web; + /* Trigger BF after k failed beacons */ + u8 num_of_failed_beacons_rx_to_trigger_bf; + /* Trigger SOB after k successful beacons */ + u8 num_of_successful_beacons_rx_to_trigger_sob; +} __packed; + +/* WMI_PS_MID_CFG_CMDID + * + * Configure Power Save parameters of a specific MID. + * These parameters are relevant for the specific BSS this MID belongs to. + * + * Returned event: + * - WMI_PS_MID_CFG_EVENTID + */ +struct wmi_ps_mid_cfg_cmd { + /* MAC ID */ + u8 mid; + /* mid PS configuration to be applied */ + struct wmi_ps_mid_cfg ps_mid_cfg; +} __packed; + +/* WMI_PS_MID_CFG_EVENTID */ +struct wmi_ps_mid_cfg_event { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +/* WMI_PS_MID_CFG_READ_CMDID + * + * request to retrieve Power Save configuration of mid + * (WMI_PS_MID_CFG_CMD params) + * + * Returned event: + * - WMI_PS_MID_CFG_READ_EVENTID + */ +struct wmi_ps_mid_cfg_read_cmd { + /* MAC ID */ + u8 mid; + /* alignment to 32b */ + u8 reserved[3]; +} __packed; + +/* WMI_PS_MID_CFG_READ_EVENTID */ +struct wmi_ps_mid_cfg_read_event { + /* MAC ID */ + u8 mid; + /* Retrieved MID Power Save configuration(WMI_PS_MID_CFG_CMD params) */ + struct wmi_ps_mid_cfg mid_ps_cfg; + /* wmi_ps_cfg_cmd_status_e */ + __le32 status; +} __packed; + +#define WMI_AOA_MAX_DATA_SIZE (128) + +enum wmi_aoa_meas_status { + WMI_AOA_MEAS_SUCCESS = 0x00, + WMI_AOA_MEAS_PEER_INCAPABLE = 0x01, + WMI_AOA_MEAS_FAILURE = 0x02, +}; + +/* WMI_AOA_MEAS_EVENTID */ +struct wmi_aoa_meas_event { + u8 mac_addr[WMI_MAC_LEN]; + /* channels IDs: + * 0 - 58320 MHz + * 1 - 60480 MHz + * 2 - 62640 MHz + */ + u8 channel; + /* enum wmi_aoa_meas_type */ + u8 aoa_meas_type; + /* Measurments are from RFs, defined by the mask */ + __le32 meas_rf_mask; + /* enum wmi_aoa_meas_status */ + u8 meas_status; + u8 reserved; + /* Length of meas_data in bytes */ + __le16 length; + u8 meas_data[WMI_AOA_MAX_DATA_SIZE]; +} __packed; + +/* WMI_TOF_GET_CAPABILITIES_EVENTID */ +struct wmi_tof_get_capabilities_event { + u8 ftm_capability; + /* maximum supported number of destination to start TOF */ + u8 max_num_of_dest; + /* maximum supported number of measurements per burst */ + u8 max_num_of_meas_per_burst; + u8 reserved; + /* maximum supported multi bursts */ + __le16 max_multi_bursts_sessions; + /* maximum supported FTM burst duration , wmi_tof_burst_duration_e */ + __le16 max_ftm_burst_duration; + /* AOA supported types */ + __le32 aoa_supported_types; +} __packed; + +enum wmi_tof_session_end_status { + WMI_TOF_SESSION_END_NO_ERROR = 0x00, + WMI_TOF_SESSION_END_FAIL = 0x01, + WMI_TOF_SESSION_END_PARAMS_ERROR = 0x02, + WMI_TOF_SESSION_END_ABORTED = 0x03, +}; + +/* WMI_TOF_SESSION_END_EVENTID */ +struct wmi_tof_session_end_event { + /* FTM session ID */ + __le32 session_id; + /* wmi_tof_session_end_status_e */ + u8 status; + u8 reserved[3]; +} __packed; + +/* Responder FTM Results */ +struct wmi_responder_ftm_res { + u8 t1[6]; + u8 t2[6]; + u8 t3[6]; + u8 t4[6]; + __le16 tod_err; + __le16 toa_err; + __le16 tod_err_initiator; + __le16 toa_err_initiator; +} __packed; + +enum wmi_tof_ftm_per_dest_res_status { + WMI_PER_DEST_RES_NO_ERROR = 0x00, + WMI_PER_DEST_RES_TX_RX_FAIL = 0x01, + WMI_PER_DEST_RES_PARAM_DONT_MATCH = 0x02, +}; + +enum wmi_tof_ftm_per_dest_res_flags { + WMI_PER_DEST_RES_REQ_START = 0x01, + WMI_PER_DEST_RES_BURST_REPORT_END = 0x02, + WMI_PER_DEST_RES_REQ_END = 0x04, + WMI_PER_DEST_RES_PARAM_UPDATE = 0x08, +}; + +/* WMI_TOF_FTM_PER_DEST_RES_EVENTID */ +struct wmi_tof_ftm_per_dest_res_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_ftm_per_dest_res_flags_e */ + u8 flags; + /* wmi_tof_ftm_per_dest_res_status_e */ + u8 status; + /* responder ASAP */ + u8 responder_asap; + /* responder number of FTM per burst */ + u8 responder_num_ftm_per_burst; + /* responder number of FTM burst exponent */ + u8 responder_num_ftm_bursts_exp; + /* responder burst duration ,wmi_tof_burst_duration_e */ + u8 responder_burst_duration; + /* responder burst period, indicate interval between two consecutive + * burst instances, in units of 100 ms + */ + __le16 responder_burst_period; + /* receive burst counter */ + __le16 bursts_cnt; + /* tsf of responder start burst */ + __le32 tsf_sync; + /* actual received ftm per burst */ + u8 actual_ftm_per_burst; + u8 reserved0[7]; + struct wmi_responder_ftm_res responder_ftm_res[0]; +} __packed; + +enum wmi_tof_channel_info_type { + WMI_TOF_CHANNEL_INFO_AOA = 0x00, + WMI_TOF_CHANNEL_INFO_LCI = 0x01, + WMI_TOF_CHANNEL_INFO_LCR = 0x02, + WMI_TOF_CHANNEL_INFO_VENDOR_SPECIFIC = 0x03, + WMI_TOF_CHANNEL_INFO_CIR = 0x04, + WMI_TOF_CHANNEL_INFO_RSSI = 0x05, + WMI_TOF_CHANNEL_INFO_SNR = 0x06, + WMI_TOF_CHANNEL_INFO_DEBUG = 0x07, +}; + +/* WMI_TOF_CHANNEL_INFO_EVENTID */ +struct wmi_tof_channel_info_event { + /* FTM session ID */ + __le32 session_id; + /* destination MAC address */ + u8 dst_mac[WMI_MAC_LEN]; + /* wmi_tof_channel_info_type_e */ + u8 type; + /* data report length */ + u8 len; + /* data report payload */ + u8 report[0]; +} __packed; + #endif /* __WILOCITY_WMI_H__ */ diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c index d1bc51f92686..038a960c5104 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcdc.c @@ -194,7 +194,7 @@ retry: } /* Check info buffer */ - info = (void *)&msg[1]; + info = (void *)&bcdc->buf[0]; /* Copy info buffer */ if (buf) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c index f549c25608d6..72139b579b18 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c @@ -420,7 +420,7 @@ u8 brcmf_sdiod_regrb(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) u32 brcmf_sdiod_regrl(struct brcmf_sdio_dev *sdiodev, u32 addr, int *ret) { - u32 data; + u32 data = 0; int retval; brcmf_dbg(SDIO, "addr:0x%08x\n", addr); @@ -1101,6 +1101,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = { BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339), + BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4345), BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4354), diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c index b8aec5e5ef93..b777e1b2f87a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c @@ -1595,15 +1595,9 @@ static s32 brcmf_set_auth_type(struct net_device *ndev, val = 1; brcmf_dbg(CONN, "shared key\n"); break; - case NL80211_AUTHTYPE_AUTOMATIC: - val = 2; - brcmf_dbg(CONN, "automatic\n"); - break; - case NL80211_AUTHTYPE_NETWORK_EAP: - brcmf_dbg(CONN, "network eap\n"); default: val = 2; - brcmf_err("invalid auth type (%d)\n", sme->auth_type); + brcmf_dbg(CONN, "automatic, auth type (%d)\n", sme->auth_type); break; } @@ -2533,7 +2527,7 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) WL_BSS_INFO_MAX); if (err) { brcmf_err("Failed to get bss info (%d)\n", err); - return; + goto out_kfree; } si->filled |= BIT(NL80211_STA_INFO_BSS_PARAM); si->bss_param.beacon_interval = le16_to_cpu(buf->bss_le.beacon_period); @@ -2545,6 +2539,9 @@ static void brcmf_fill_bss_param(struct brcmf_if *ifp, struct station_info *si) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_PREAMBLE; if (capability & WLAN_CAPABILITY_SHORT_SLOT_TIME) si->bss_param.flags |= BSS_PARAM_FLAGS_SHORT_SLOT_TIME; + +out_kfree: + kfree(buf); } static s32 @@ -3703,6 +3700,7 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, struct cfg80211_wowlan *wowl) { u32 wowl_config; + struct brcmf_wowl_wakeind_le wowl_wakeind; u32 i; brcmf_dbg(TRACE, "Suspend, wowl config.\n"); @@ -3744,8 +3742,9 @@ static void brcmf_configure_wowl(struct brcmf_cfg80211_info *cfg, if (!test_bit(BRCMF_VIF_STATUS_CONNECTED, &ifp->vif->sme_state)) wowl_config |= BRCMF_WOWL_UNASSOC; - brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", "clear", - sizeof(struct brcmf_wowl_wakeind_le)); + memcpy(&wowl_wakeind, "clear", 6); + brcmf_fil_iovar_data_set(ifp, "wowl_wakeind", &wowl_wakeind, + sizeof(wowl_wakeind)); brcmf_fil_iovar_int_set(ifp, "wowl", wowl_config); brcmf_fil_iovar_int_set(ifp, "wowl_activate", 1); brcmf_bus_wowl_config(cfg->pub->bus_if, true); @@ -3884,11 +3883,11 @@ brcmf_cfg80211_del_pmksa(struct wiphy *wiphy, struct net_device *ndev, if (!check_vif_up(ifp->vif)) return -EIO; - brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", &pmksa->bssid); + brcmf_dbg(CONN, "del_pmksa - PMK bssid = %pM\n", pmksa->bssid); npmk = le32_to_cpu(cfg->pmk_list.npmk); for (i = 0; i < npmk; i++) - if (!memcmp(&pmksa->bssid, &pmk[i].bssid, ETH_ALEN)) + if (!memcmp(pmksa->bssid, pmk[i].bssid, ETH_ALEN)) break; if ((npmk > 0) && (i < npmk)) { @@ -4502,6 +4501,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, u16 chanspec = chandef_to_chanspec(&cfg->d11inf, &settings->chandef); bool mbss; int is_11d; + bool supports_11d; brcmf_dbg(TRACE, "ctrlchn=%d, center=%d, bw=%d, beacon_interval=%d, dtim_period=%d,\n", settings->chandef.chan->hw_value, @@ -4514,11 +4514,16 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, mbss = ifp->vif->mbss; /* store current 11d setting */ - brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, &ifp->vif->is_11d); - country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, - settings->beacon.tail_len, - WLAN_EID_COUNTRY); - is_11d = country_ie ? 1 : 0; + if (brcmf_fil_cmd_int_get(ifp, BRCMF_C_GET_REGULATORY, + &ifp->vif->is_11d)) { + supports_11d = false; + } else { + country_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail, + settings->beacon.tail_len, + WLAN_EID_COUNTRY); + is_11d = country_ie ? 1 : 0; + supports_11d = true; + } memset(&ssid_le, 0, sizeof(ssid_le)); if (settings->ssid == NULL || settings->ssid_len == 0) { @@ -4577,7 +4582,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, /* Parameters shared by all radio interfaces */ if (!mbss) { - if (is_11d != ifp->vif->is_11d) { + if ((supports_11d) && (is_11d != ifp->vif->is_11d)) { err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, is_11d); if (err < 0) { @@ -4619,7 +4624,7 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev, brcmf_err("SET INFRA error %d\n", err); goto exit; } - } else if (WARN_ON(is_11d != ifp->vif->is_11d)) { + } else if (WARN_ON(supports_11d && (is_11d != ifp->vif->is_11d))) { /* Multiple-BSS should use same 11d configuration */ err = -EINVAL; goto exit; @@ -4753,11 +4758,8 @@ static int brcmf_cfg80211_stop_ap(struct wiphy *wiphy, struct net_device *ndev) brcmf_err("setting INFRA mode failed %d\n", err); if (brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MBSS)) brcmf_fil_iovar_int_set(ifp, "mbss", 0); - err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, - ifp->vif->is_11d); - if (err < 0) - brcmf_err("restoring REGULATORY setting failed %d\n", - err); + brcmf_fil_cmd_int_set(ifp, BRCMF_C_SET_REGULATORY, + ifp->vif->is_11d); /* Bring device back up so it can be used again */ err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_UP, 1); if (err < 0) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c index 65e8c8766441..5eaac13e2317 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c @@ -136,27 +136,6 @@ static void _brcmf_set_multicast_list(struct work_struct *work) err); } -static void -_brcmf_set_mac_address(struct work_struct *work) -{ - struct brcmf_if *ifp; - s32 err; - - ifp = container_of(work, struct brcmf_if, setmacaddr_work); - - brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); - - err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", ifp->mac_addr, - ETH_ALEN); - if (err < 0) { - brcmf_err("Setting cur_etheraddr failed, %d\n", err); - } else { - brcmf_dbg(TRACE, "MAC address updated to %pM\n", - ifp->mac_addr); - memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - } -} - #if IS_ENABLED(CONFIG_IPV6) static void _brcmf_update_ndtable(struct work_struct *work) { @@ -190,10 +169,20 @@ static int brcmf_netdev_set_mac_address(struct net_device *ndev, void *addr) { struct brcmf_if *ifp = netdev_priv(ndev); struct sockaddr *sa = (struct sockaddr *)addr; + int err; - memcpy(&ifp->mac_addr, sa->sa_data, ETH_ALEN); - schedule_work(&ifp->setmacaddr_work); - return 0; + brcmf_dbg(TRACE, "Enter, bsscfgidx=%d\n", ifp->bsscfgidx); + + err = brcmf_fil_iovar_data_set(ifp, "cur_etheraddr", sa->sa_data, + ETH_ALEN); + if (err < 0) { + brcmf_err("Setting cur_etheraddr failed, %d\n", err); + } else { + brcmf_dbg(TRACE, "updated to %pM\n", sa->sa_data); + memcpy(ifp->mac_addr, sa->sa_data, ETH_ALEN); + memcpy(ifp->ndev->dev_addr, ifp->mac_addr, ETH_ALEN); + } + return err; } static void brcmf_netdev_set_multicast_list(struct net_device *ndev) @@ -519,13 +508,9 @@ int brcmf_net_attach(struct brcmf_if *ifp, bool rtnl_locked) ndev->needed_headroom += drvr->hdrlen; ndev->ethtool_ops = &brcmf_ethtool_ops; - drvr->rxsz = ndev->mtu + ndev->hard_header_len + - drvr->hdrlen; - /* set the mac address */ memcpy(ndev->dev_addr, ifp->mac_addr, ETH_ALEN); - INIT_WORK(&ifp->setmacaddr_work, _brcmf_set_mac_address); INIT_WORK(&ifp->multicast_work, _brcmf_set_multicast_list); INIT_WORK(&ifp->ndoffload_work, _brcmf_update_ndtable); @@ -730,7 +715,6 @@ static void brcmf_del_if(struct brcmf_pub *drvr, s32 bsscfgidx, } if (ifp->ndev->netdev_ops == &brcmf_netdev_ops_pri) { - cancel_work_sync(&ifp->setmacaddr_work); cancel_work_sync(&ifp->multicast_work); cancel_work_sync(&ifp->ndoffload_work); } @@ -886,9 +870,12 @@ static int brcmf_inet6addr_changed(struct notifier_block *nb, } break; case NETDEV_DOWN: - if (i < NDOL_MAX_ENTRIES) - for (; i < ifp->ipv6addr_idx; i++) + if (i < NDOL_MAX_ENTRIES) { + for (; i < ifp->ipv6addr_idx - 1; i++) table[i] = table[i + 1]; + memset(&table[i], 0, sizeof(table[i])); + ifp->ipv6addr_idx--; + } break; default: break; @@ -1061,8 +1048,7 @@ fail: brcmf_fws_del_interface(ifp); brcmf_fws_deinit(drvr); } - if (ifp) - brcmf_net_detach(ifp->ndev, false); + brcmf_net_detach(ifp->ndev, false); if (p2p_ifp) brcmf_net_detach(p2p_ifp->ndev, false); drvr->iflist[0] = NULL; @@ -1169,7 +1155,8 @@ int brcmf_netdev_wait_pend8021x(struct brcmf_if *ifp) !brcmf_get_pend_8021x_cnt(ifp), MAX_WAIT_FOR_8021X_TX); - WARN_ON(!err); + if (!err) + brcmf_err("Timed out waiting for no pending 802.1x packets\n"); return !err; } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h index 8fa34cad5a96..c94dcab260d0 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.h @@ -112,15 +112,11 @@ struct brcmf_pub { /* Internal brcmf items */ uint hdrlen; /* Total BRCMF header length (proto + bus) */ - uint rxsz; /* Rx buffer size bus module should use */ /* Dongle media info */ char fwver[BRCMF_DRIVER_FIRMWARE_VERSION_LEN]; u8 mac[ETH_ALEN]; /* MAC address obtained from dongle */ - /* Multicast data packets sent to dongle */ - unsigned long tx_multicast; - struct mac_address addresses[BRCMF_MAX_IFS]; struct brcmf_if *iflist[BRCMF_MAX_IFS]; @@ -176,7 +172,6 @@ enum brcmf_netif_stop_reason { * @vif: points to cfg80211 specific interface information. * @ndev: associated network device. * @stats: interface specific network statistics. - * @setmacaddr_work: worker object for setting mac address. * @multicast_work: worker object for multicast provisioning. * @ndoffload_work: worker object for neighbor discovery offload configuration. * @fws_desc: interface specific firmware-signalling descriptor. @@ -193,7 +188,6 @@ struct brcmf_if { struct brcmf_cfg80211_vif *vif; struct net_device *ndev; struct net_device_stats stats; - struct work_struct setmacaddr_work; struct work_struct multicast_work; struct work_struct ndoffload_work; struct brcmf_fws_mac_descriptor *fws_desc; diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c index 7e269f9aa607..d0b738da2458 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c @@ -234,13 +234,20 @@ static void brcmf_flowring_block(struct brcmf_flowring *flow, u16 flowid, void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) { + struct brcmf_bus *bus_if = dev_get_drvdata(flow->dev); struct brcmf_flowring_ring *ring; + struct brcmf_if *ifp; u16 hash_idx; + u8 ifidx; struct sk_buff *skb; ring = flow->rings[flowid]; if (!ring) return; + + ifidx = brcmf_flowring_ifidx_get(flow, flowid); + ifp = brcmf_get_ifp(bus_if->drvr, ifidx); + brcmf_flowring_block(flow, flowid, false); hash_idx = ring->hash_id; flow->hash[hash_idx].ifidx = BRCMF_FLOWRING_INVALID_IFIDX; @@ -249,7 +256,7 @@ void brcmf_flowring_delete(struct brcmf_flowring *flow, u16 flowid) skb = skb_dequeue(&ring->skblist); while (skb) { - brcmu_pkt_buf_free_skb(skb); + brcmf_txfinalize(ifp, skb, false); skb = skb_dequeue(&ring->skblist); } @@ -495,14 +502,18 @@ void brcmf_flowring_add_tdls_peer(struct brcmf_flowring *flow, int ifidx, } else { search = flow->tdls_entry; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; while (search->next) { search = search->next; if (memcmp(search->mac, peer, ETH_ALEN) == 0) - return; + goto free_entry; } search->next = tdls_entry; } flow->tdls_active = true; + return; + +free_entry: + kfree(tdls_entry); } diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c index 9f9024a7bd64..a190f535efc9 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c @@ -2104,8 +2104,6 @@ int brcmf_fws_process_skb(struct brcmf_if *ifp, struct sk_buff *skb) if ((skb->priority == 0) || (skb->priority > 7)) skb->priority = cfg80211_classify8021d(skb, NULL); - drvr->tx_multicast += !!multicast; - if (fws->avoid_queueing) { rc = brcmf_proto_txdata(drvr, ifp->ifidx, 0, skb); if (rc < 0) diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index 68ab3ac15650..b892dac70f4b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -313,6 +313,7 @@ struct rte_console { #define KSO_WAIT_US 50 #define MAX_KSO_ATTEMPTS (PMU_MAX_TRANSITION_DLY/KSO_WAIT_US) +#define BRCMF_SDIO_MAX_ACCESS_ERRORS 5 /* * Conversion of 802.1D priority to precedence level @@ -677,6 +678,7 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) { u8 wr_val = 0, rd_val, cmp_val, bmask; int err = 0; + int err_cnt = 0; int try_cnt = 0; brcmf_dbg(TRACE, "Enter: on=%d\n", on); @@ -712,9 +714,14 @@ brcmf_sdio_kso_control(struct brcmf_sdio *bus, bool on) */ rd_val = brcmf_sdiod_regrb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, &err); - if (((rd_val & bmask) == cmp_val) && !err) + if (!err) { + if ((rd_val & bmask) == cmp_val) + break; + err_cnt = 0; + } + /* bail out upon subsequent access errors */ + if (err && (err_cnt++ > BRCMF_SDIO_MAX_ACCESS_ERRORS)) break; - udelay(KSO_WAIT_US); brcmf_sdiod_regwb(bus->sdiodev, SBSDIO_FUNC1_SLEEPCSR, wr_val, &err); @@ -3757,7 +3764,8 @@ static u32 brcmf_sdio_buscore_read32(void *ctx, u32 addr) u32 val, rev; val = brcmf_sdiod_regrl(sdiodev, addr, NULL); - if (sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 && + if ((sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4335_4339 || + sdiodev->func[0]->device == SDIO_DEVICE_ID_BROADCOM_4339) && addr == CORE_CC_REG(SI_ENUM_BASE, chipid)) { rev = (val & CID_REV_MASK) >> CID_REV_SHIFT; if (rev >= 2) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c index a10f35c5eb3d..fe6755944b7b 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/tracepoint.c @@ -19,6 +19,7 @@ #ifndef __CHECKER__ #define CREATE_TRACE_POINTS #include "tracepoint.h" +#include "debug.h" void __brcmf_err(const char *func, const char *fmt, ...) { diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c index 98b15a9a2779..2f978a39b58a 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/usb.c @@ -1099,15 +1099,11 @@ struct brcmf_usbdev *brcmf_usb_attach(struct brcmf_usbdev_info *devinfo, devinfo->tx_freecount = ntxq; devinfo->ctl_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!devinfo->ctl_urb) { - brcmf_err("usb_alloc_urb (ctl) failed\n"); + if (!devinfo->ctl_urb) goto error; - } devinfo->bulk_urb = usb_alloc_urb(0, GFP_ATOMIC); - if (!devinfo->bulk_urb) { - brcmf_err("usb_alloc_urb (bulk) failed\n"); + if (!devinfo->bulk_urb) goto error; - } return &devinfo->bus_pub; @@ -1462,11 +1458,15 @@ static int brcmf_usb_reset_resume(struct usb_interface *intf) #define BRCMF_USB_DEVICE(dev_id) \ { USB_DEVICE(BRCM_USB_VENDOR_ID_BROADCOM, dev_id) } +#define LINKSYS_USB_DEVICE(dev_id) \ + { USB_DEVICE(BRCM_USB_VENDOR_ID_LINKSYS, dev_id) } + static struct usb_device_id brcmf_usb_devid_table[] = { BRCMF_USB_DEVICE(BRCM_USB_43143_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43236_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43242_DEVICE_ID), BRCMF_USB_DEVICE(BRCM_USB_43569_DEVICE_ID), + LINKSYS_USB_DEVICE(BRCM_USB_43235_LINKSYS_DEVICE_ID), { USB_DEVICE(BRCM_USB_VENDOR_ID_LG, BRCM_USB_43242_LG_DEVICE_ID) }, /* special entry for device with firmware loaded and running */ BRCMF_USB_DEVICE(BRCM_USB_BCMFW_DEVICE_ID), diff --git a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h index 3cc42bef6245..d0407d9ad782 100644 --- a/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h +++ b/drivers/net/wireless/broadcom/brcm80211/include/brcm_hw_ids.h @@ -22,6 +22,7 @@ #define BRCM_USB_VENDOR_ID_BROADCOM 0x0a5c #define BRCM_USB_VENDOR_ID_LG 0x043e +#define BRCM_USB_VENDOR_ID_LINKSYS 0x13b1 #define BRCM_PCIE_VENDOR_ID_BROADCOM PCI_VENDOR_ID_BROADCOM /* Chipcommon Core Chip IDs */ @@ -58,6 +59,7 @@ /* USB Device IDs */ #define BRCM_USB_43143_DEVICE_ID 0xbd1e +#define BRCM_USB_43235_LINKSYS_DEVICE_ID 0x0039 #define BRCM_USB_43236_DEVICE_ID 0xbd17 #define BRCM_USB_43242_DEVICE_ID 0xbd1f #define BRCM_USB_43242_LG_DEVICE_ID 0x3101 diff --git a/drivers/net/wireless/intel/iwlegacy/3945.c b/drivers/net/wireless/intel/iwlegacy/3945.c index 209dc9988455..4db327a95414 100644 --- a/drivers/net/wireless/intel/iwlegacy/3945.c +++ b/drivers/net/wireless/intel/iwlegacy/3945.c @@ -2671,7 +2671,7 @@ const struct il_ops il3945_ops = { .send_led_cmd = il3945_send_led_cmd, }; -static struct il_cfg il3945_bg_cfg = { +static const struct il_cfg il3945_bg_cfg = { .name = "3945BG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, @@ -2700,7 +2700,7 @@ static struct il_cfg il3945_bg_cfg = { }, }; -static struct il_cfg il3945_abg_cfg = { +static const struct il_cfg il3945_abg_cfg = { .name = "3945ABG", .fw_name_pre = IL3945_FW_PRE, .ucode_api_max = IL3945_UCODE_API_MAX, diff --git a/drivers/net/wireless/intel/iwlegacy/common.h b/drivers/net/wireless/intel/iwlegacy/common.h index 726ede391cb9..3bba521d2cd9 100644 --- a/drivers/net/wireless/intel/iwlegacy/common.h +++ b/drivers/net/wireless/intel/iwlegacy/common.h @@ -1320,7 +1320,7 @@ struct il_priv { u64 timestamp; union { -#if defined(CONFIG_IWL3945) || defined(CONFIG_IWL3945_MODULE) +#if IS_ENABLED(CONFIG_IWL3945) struct { void *shared_virt; dma_addr_t shared_phys; @@ -1351,7 +1351,7 @@ struct il_priv { } _3945; #endif -#if defined(CONFIG_IWL4965) || defined(CONFIG_IWL4965_MODULE) +#if IS_ENABLED(CONFIG_IWL4965) struct { struct il_rx_phy_res last_phy_res; bool last_phy_res_valid; diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c index b662cf35b033..c7509c51e9d9 100644 --- a/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c +++ b/drivers/net/wireless/intel/iwlwifi/dvm/ucode.c @@ -46,15 +46,6 @@ * ******************************************************************************/ -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_priv *priv, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &priv->fw->img[ucode_type]; -} - /* * Calibration */ @@ -330,7 +321,7 @@ int iwl_load_ucode_wait_alive(struct iwl_priv *priv, enum iwl_ucode_type old_type; static const u16 alive_cmd[] = { REPLY_ALIVE }; - fw = iwl_get_ucode_image(priv, ucode_type); + fw = iwl_get_ucode_image(priv->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c index 64690c14ff4d..d4b73dedf89b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-7000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-7000.c @@ -73,13 +73,13 @@ /* Highest firmware API version supported */ #define IWL7260_UCODE_API_MAX 17 #define IWL7265_UCODE_API_MAX 17 -#define IWL7265D_UCODE_API_MAX 24 -#define IWL3168_UCODE_API_MAX 24 +#define IWL7265D_UCODE_API_MAX 26 +#define IWL3168_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL7260_UCODE_API_MIN 16 -#define IWL7265_UCODE_API_MIN 16 -#define IWL7265D_UCODE_API_MIN 16 +#define IWL7260_UCODE_API_MIN 17 +#define IWL7265_UCODE_API_MIN 17 +#define IWL7265D_UCODE_API_MIN 17 #define IWL3168_UCODE_API_MIN 20 /* NVM versions */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c index 6c6725e808d4..d02ca1491d16 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-8000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-8000.c @@ -70,11 +70,11 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL8000_UCODE_API_MAX 24 -#define IWL8265_UCODE_API_MAX 24 +#define IWL8000_UCODE_API_MAX 26 +#define IWL8265_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL8000_UCODE_API_MIN 16 +#define IWL8000_UCODE_API_MIN 17 #define IWL8265_UCODE_API_MIN 20 /* NVM versions */ @@ -212,6 +212,17 @@ const struct iwl_cfg iwl8265_2ac_cfg = { .vht_mu_mimo_supported = true, }; +const struct iwl_cfg iwl8275_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 8275", + .fw_name_pre = IWL8265_FW_PRE, + IWL_DEVICE_8265, + .ht_params = &iwl8000_ht_params, + .nvm_ver = IWL8000_NVM_VERSION, + .nvm_calib_ver = IWL8000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .vht_mu_mimo_supported = true, +}; + const struct iwl_cfg iwl4165_2ac_cfg = { .name = "Intel(R) Dual Band Wireless AC 4165", .fw_name_pre = IWL8000_FW_PRE, diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c index fbaf705f3fa7..ff850410d897 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-9000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-9000.c @@ -55,10 +55,10 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL9000_UCODE_API_MAX 24 +#define IWL9000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ -#define IWL9000_UCODE_API_MIN 16 +#define IWL9000_UCODE_API_MIN 17 /* NVM versions */ #define IWL9000_NVM_VERSION 0x0a1d @@ -72,15 +72,15 @@ #define IWL9000_SMEM_OFFSET 0x400000 #define IWL9000_SMEM_LEN 0x68000 -#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" +#define IWL9000_FW_PRE "iwlwifi-9000-pu-a0-jf-a0-" #define IWL9260_FW_PRE "iwlwifi-9260-th-a0-jf-a0-" -#define IWL9260LC_FW_PRE "iwlwifi-9260-th-a0-lc-a0-" +#define IWL9000LC_FW_PRE "iwlwifi-9000-pu-a0-lc-a0-" #define IWL9000_MODULE_FIRMWARE(api) \ IWL9000_FW_PRE "-" __stringify(api) ".ucode" #define IWL9260_MODULE_FIRMWARE(api) \ IWL9260_FW_PRE "-" __stringify(api) ".ucode" -#define IWL9260LC_MODULE_FIRMWARE(api) \ - IWL9260LC_FW_PRE "-" __stringify(api) ".ucode" +#define IWL9000LC_MODULE_FIRMWARE(api) \ + IWL9000LC_FW_PRE "-" __stringify(api) ".ucode" #define NVM_HW_SECTION_NUM_FAMILY_9000 10 @@ -146,41 +146,73 @@ static const struct iwl_tt_params iwl9000_tt_params = { .mac_addr_from_csr = true, \ .rf_id = true +const struct iwl_cfg iwl9160_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9160", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + const struct iwl_cfg iwl9260_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .name = "Intel(R) Dual Band Wireless AC 9260", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + +const struct iwl_cfg iwl9270_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9270", + .fw_name_pre = IWL9260_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, +}; + +const struct iwl_cfg iwl9460_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9460", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, +}; + +const struct iwl_cfg iwl9560_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9560", + .fw_name_pre = IWL9000_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; /* * TODO the struct below is for internal testing only this should be * removed by EO 2016~ */ -const struct iwl_cfg iwl9260lc_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 9260", - .fw_name_pre = IWL9260LC_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, -}; - -const struct iwl_cfg iwl5165_2ac_cfg = { - .name = "Intel(R) Dual Band Wireless AC 5165", - .fw_name_pre = IWL9000_FW_PRE, - IWL_DEVICE_9000, - .ht_params = &iwl9000_ht_params, - .nvm_ver = IWL9000_NVM_VERSION, - .nvm_calib_ver = IWL9000_TX_POWER_VERSION, - .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, - .integrated = true, +const struct iwl_cfg iwl9000lc_2ac_cfg = { + .name = "Intel(R) Dual Band Wireless AC 9000", + .fw_name_pre = IWL9000LC_FW_PRE, + IWL_DEVICE_9000, + .ht_params = &iwl9000_ht_params, + .nvm_ver = IWL9000_NVM_VERSION, + .nvm_calib_ver = IWL9000_TX_POWER_VERSION, + .max_ht_ampdu_exponent = IEEE80211_HT_MAX_AMPDU_64K, + .integrated = true, }; MODULE_FIRMWARE(IWL9000_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); MODULE_FIRMWARE(IWL9260_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); -MODULE_FIRMWARE(IWL9260LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); +MODULE_FIRMWARE(IWL9000LC_MODULE_FIRMWARE(IWL9000_UCODE_API_MAX)); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c index 4d78232c8afe..ea1618525878 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-a000.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-a000.c @@ -55,7 +55,7 @@ #include "iwl-agn-hw.h" /* Highest firmware API version supported */ -#define IWL_A000_UCODE_API_MAX 24 +#define IWL_A000_UCODE_API_MAX 26 /* Lowest firmware API version supported */ #define IWL_A000_UCODE_API_MIN 24 diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h index 423b23320d4f..2660cc4b9f8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h @@ -359,7 +359,6 @@ struct iwl_cfg { high_temp:1, mac_addr_from_csr:1, lp_xtal_workaround:1, - no_power_up_nic_in_init:1, disable_dummy_notification:1, apmg_not_supported:1, mq_rx_supported:1, @@ -445,13 +444,17 @@ extern const struct iwl_cfg iwl7265d_n_cfg; extern const struct iwl_cfg iwl8260_2n_cfg; extern const struct iwl_cfg iwl8260_2ac_cfg; extern const struct iwl_cfg iwl8265_2ac_cfg; +extern const struct iwl_cfg iwl8275_2ac_cfg; extern const struct iwl_cfg iwl4165_2ac_cfg; extern const struct iwl_cfg iwl8260_2ac_sdio_cfg; extern const struct iwl_cfg iwl8265_2ac_sdio_cfg; extern const struct iwl_cfg iwl4165_2ac_sdio_cfg; +extern const struct iwl_cfg iwl9000lc_2ac_cfg; +extern const struct iwl_cfg iwl9160_2ac_cfg; extern const struct iwl_cfg iwl9260_2ac_cfg; -extern const struct iwl_cfg iwl9260lc_2ac_cfg; -extern const struct iwl_cfg iwl5165_2ac_cfg; +extern const struct iwl_cfg iwl9270_2ac_cfg; +extern const struct iwl_cfg iwl9460_2ac_cfg; +extern const struct iwl_cfg iwl9560_2ac_cfg; extern const struct iwl_cfg iwla000_2ac_cfg; #endif /* CONFIG_IWLMVM */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h index 871ad02fdb17..d73e9d436027 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-csr.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-csr.h @@ -589,6 +589,8 @@ enum dtd_diode_reg { * Causes for the FH register interrupts */ enum msix_fh_int_causes { + MSIX_FH_INT_CAUSES_Q0 = BIT(0), + MSIX_FH_INT_CAUSES_Q1 = BIT(1), MSIX_FH_INT_CAUSES_D2S_CH0_NUM = BIT(16), MSIX_FH_INT_CAUSES_D2S_CH1_NUM = BIT(17), MSIX_FH_INT_CAUSES_S2D = BIT(19), diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c index 1d9dd153ef1c..50510fb6ab8c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-devtrace.c @@ -33,9 +33,6 @@ #define CREATE_TRACE_POINTS #include "iwl-devtrace.h" -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite8); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ioread32); -EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_iowrite32); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_event); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_error); EXPORT_TRACEPOINT_SYMBOL(iwlwifi_dev_ucode_cont_event); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h index 1d6f5d21a663..33ef5372d195 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h @@ -77,7 +77,6 @@ */ #define FH_MEM_LOWER_BOUND (0x1000) #define FH_MEM_UPPER_BOUND (0x2000) -#define TFH_MEM_LOWER_BOUND (0xA06000) /** * Keep-Warm (KW) buffer base address. @@ -120,7 +119,7 @@ #define FH_MEM_CBBC_20_31_LOWER_BOUND (FH_MEM_LOWER_BOUND + 0xB20) #define FH_MEM_CBBC_20_31_UPPER_BOUND (FH_MEM_LOWER_BOUND + 0xB80) /* a000 TFD table address, 64 bit */ -#define TFH_TFDQ_CBB_TABLE (TFH_MEM_LOWER_BOUND + 0x1C00) +#define TFH_TFDQ_CBB_TABLE (0x1C00) /* Find TFD CB base pointer for given queue */ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, @@ -156,7 +155,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * In case of DRAM read address which is not aligned to 128B, the TFH will * enable transfer size which doesn't cross 64B DRAM address boundary. */ -#define TFH_TRANSFER_MODE (TFH_MEM_LOWER_BOUND + 0x1F40) +#define TFH_TRANSFER_MODE (0x1F40) #define TFH_TRANSFER_MAX_PENDING_REQ 0xc #define TFH_CHUNK_SIZE_128 BIT(8) #define TFH_CHUNK_SPLIT_MODE BIT(10) @@ -167,7 +166,7 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * the start of the TFD first TB. * In case of a DRAM Tx CMD update the TFH will update PN and Key ID */ -#define TFH_TXCMD_UPDATE_CFG (TFH_MEM_LOWER_BOUND + 0x1F48) +#define TFH_TXCMD_UPDATE_CFG (0x1F48) /* * Controls TX DMA operation * @@ -181,22 +180,22 @@ static inline unsigned int FH_MEM_CBBC_QUEUE(struct iwl_trans *trans, * set to 1 - interrupt is sent to the driver * Bit 0: Indicates the snoop configuration */ -#define TFH_SRV_DMA_CHNL0_CTRL (TFH_MEM_LOWER_BOUND + 0x1F60) +#define TFH_SRV_DMA_CHNL0_CTRL (0x1F60) #define TFH_SRV_DMA_SNOOP BIT(0) #define TFH_SRV_DMA_TO_DRIVER BIT(24) #define TFH_SRV_DMA_START BIT(31) /* Defines the DMA SRAM write start address to transfer a data block */ -#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F64) +#define TFH_SRV_DMA_CHNL0_SRAM_ADDR (0x1F64) /* Defines the 64bits DRAM start address to read the DMA data block from */ -#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (TFH_MEM_LOWER_BOUND + 0x1F68) +#define TFH_SRV_DMA_CHNL0_DRAM_ADDR (0x1F68) /* * Defines the number of bytes to transfer from DRAM to SRAM. * Note that this register may be configured with non-dword aligned size. */ -#define TFH_SRV_DMA_CHNL0_BC (TFH_MEM_LOWER_BOUND + 0x1F70) +#define TFH_SRV_DMA_CHNL0_BC (0x1F70) /** * Rx SRAM Control and Status Registers (RSCSR) @@ -644,6 +643,7 @@ struct iwl_rb_status { #define TFD_QUEUE_BC_SIZE (TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP) #define IWL_TX_DMA_MASK DMA_BIT_MASK(36) #define IWL_NUM_OF_TBS 20 +#define IWL_TFH_NUM_TBS 25 static inline u8 iwl_get_dma_hi_addr(dma_addr_t addr) { @@ -665,25 +665,29 @@ struct iwl_tfd_tb { } __packed; /** - * struct iwl_tfd + * struct iwl_tfh_tb transmit buffer descriptor within transmit frame descriptor * - * Transmit Frame Descriptor (TFD) - * - * @ __reserved1[3] reserved - * @ num_tbs 0-4 number of active tbs - * 5 reserved - * 6-7 padding (not used) - * @ tbs[20] transmit frame buffer descriptors - * @ __pad padding + * This structure contains dma address and length of transmission address * + * @tb_len length of the tx buffer + * @addr 64 bits dma address + */ +struct iwl_tfh_tb { + __le16 tb_len; + __le64 addr; +} __packed; + +/** * Each Tx queue uses a circular buffer of 256 TFDs stored in host DRAM. * Both driver and device share these circular buffers, each of which must be - * contiguous 256 TFDs x 128 bytes-per-TFD = 32 KBytes + * contiguous 256 TFDs. + * For pre a000 HW it is 256 x 128 bytes-per-TFD = 32 KBytes + * For a000 HW and on it is 256 x 256 bytes-per-TFD = 65 KBytes * * Driver must indicate the physical address of the base of each * circular buffer via the FH_MEM_CBBC_QUEUE registers. * - * Each TFD contains pointer/size information for up to 20 data buffers + * Each TFD contains pointer/size information for up to 20 / 25 data buffers * in host DRAM. These buffers collectively contain the (one) frame described * by the TFD. Each buffer must be a single contiguous block of memory within * itself, but buffers may be scattered in host DRAM. Each buffer has max size @@ -692,6 +696,16 @@ struct iwl_tfd_tb { * * A maximum of 255 (not 256!) TFDs may be on a queue waiting for Tx. */ + +/** + * struct iwl_tfd - Transmit Frame Descriptor (TFD) + * @ __reserved1[3] reserved + * @ num_tbs 0-4 number of active tbs + * 5 reserved + * 6-7 padding (not used) + * @ tbs[20] transmit frame buffer descriptors + * @ __pad padding + */ struct iwl_tfd { u8 __reserved1[3]; u8 num_tbs; @@ -699,6 +713,19 @@ struct iwl_tfd { __le32 __pad; } __packed; +/** + * struct iwl_tfh_tfd - Transmit Frame Descriptor (TFD) + * @ num_tbs 0-4 number of active tbs + * 5 -15 reserved + * @ tbs[25] transmit frame buffer descriptors + * @ __pad padding + */ +struct iwl_tfh_tfd { + __le16 num_tbs; + struct iwl_tfh_tb tbs[IWL_TFH_NUM_TBS]; + __le32 __pad; +} __packed; + /* Keep Warm Size */ #define IWL_KW_SIZE 0x1000 /* 4k */ @@ -707,8 +734,13 @@ struct iwl_tfd { /** * struct iwlagn_schedq_bc_tbl scheduler byte count table * base physical address provided by SCD_DRAM_BASE_ADDR + * For devices up to a000: * @tfd_offset 0-12 - tx command byte count - * 12-16 - station index + * 12-16 - station index + * For a000 and on: + * @tfd_offset 0-12 - tx command byte count + * 12-13 - number of 64 byte chunks + * 14-16 - reserved */ struct iwlagn_scd_bc_tbl { __le16 tfd_offset[TFD_QUEUE_BC_SIZE]; diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h index 1b1e045f8907..ceec5ca2b1ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw-file.h @@ -199,8 +199,6 @@ struct iwl_ucode_capa { * @IWL_UCODE_TLV_FLAGS_NEWSCAN: new uCode scan behavior on hidden SSID, * treats good CRC threshold as a boolean * @IWL_UCODE_TLV_FLAGS_MFP: This uCode image supports MFP (802.11w). - * @IWL_UCODE_TLV_FLAGS_P2P: This uCode image supports P2P. - * @IWL_UCODE_TLV_FLAGS_DW_BC_TABLE: The SCD byte count table is in DWORDS * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: This uCode image supports uAPSD * @IWL_UCODE_TLV_FLAGS_SHORT_BL: 16 entries of black list instead of 64 in scan * offload profile config command. @@ -210,36 +208,24 @@ struct iwl_ucode_capa { * from the probe request template. * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL: new NS offload (small version) * @IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE: new NS offload (large version) - * @IWL_UCODE_TLV_FLAGS_P2P_PM: P2P client supports PM as a stand alone MAC - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_DCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in different bindings. - * @IWL_UCODE_TLV_FLAGS_P2P_BSS_PS_SCM: support power save on BSS station and - * P2P client interfaces simultaneously if they are in same bindings. * @IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT: General support for uAPSD * @IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD: P2P client supports uAPSD power save * @IWL_UCODE_TLV_FLAGS_BCAST_FILTERING: uCode supports broadcast filtering. - * @IWL_UCODE_TLV_FLAGS_GO_UAPSD: AP/GO interfaces support uAPSD clients * @IWL_UCODE_TLV_FLAGS_EBS_SUPPORT: this uCode image supports EBS. */ enum iwl_ucode_tlv_flag { IWL_UCODE_TLV_FLAGS_PAN = BIT(0), IWL_UCODE_TLV_FLAGS_NEWSCAN = BIT(1), IWL_UCODE_TLV_FLAGS_MFP = BIT(2), - IWL_UCODE_TLV_FLAGS_P2P = BIT(3), - IWL_UCODE_TLV_FLAGS_DW_BC_TABLE = BIT(4), IWL_UCODE_TLV_FLAGS_SHORT_BL = BIT(7), IWL_UCODE_TLV_FLAGS_D3_6_IPV6_ADDRS = BIT(10), IWL_UCODE_TLV_FLAGS_NO_BASIC_SSID = BIT(12), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_SMALL = BIT(15), IWL_UCODE_TLV_FLAGS_NEW_NSOFFL_LARGE = BIT(16), - IWL_UCODE_TLV_FLAGS_P2P_PM = BIT(21), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM = BIT(22), - IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM = BIT(23), IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT = BIT(24), IWL_UCODE_TLV_FLAGS_EBS_SUPPORT = BIT(25), IWL_UCODE_TLV_FLAGS_P2P_PS_UAPSD = BIT(26), IWL_UCODE_TLV_FLAGS_BCAST_FILTERING = BIT(29), - IWL_UCODE_TLV_FLAGS_GO_UAPSD = BIT(30), }; typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; @@ -249,24 +235,21 @@ typedef unsigned int __bitwise__ iwl_ucode_tlv_api_t; * @IWL_UCODE_TLV_API_FRAGMENTED_SCAN: This ucode supports active dwell time * longer than the passive one, which is essential for fragmented scan. * @IWL_UCODE_TLV_API_WIFI_MCC_UPDATE: ucode supports MCC updates with source. - * @IWL_UCODE_TLV_API_WIDE_CMD_HDR: ucode supports wide command header * @IWL_UCODE_TLV_API_LQ_SS_PARAMS: Configure STBC/BFER via LQ CMD ss_params * @IWL_UCODE_TLV_API_NEW_VERSION: new versioning format - * @IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY: scan APIs use 8-level priority - * instead of 3. - * @IWL_UCODE_TLV_API_TX_POWER_CHAIN: TX power API has larger command size - * (command version 3) that supports per-chain limits + * @IWL_UCODE_TLV_API_SCAN_TSF_REPORT: Scan start time reported in scan + * iteration complete notification, and the timestamp reported for RX + * received during scan, are reported in TSF of the mac specified in the + * scan request. * * @NUM_IWL_UCODE_TLV_API: number of bits used */ enum iwl_ucode_tlv_api { IWL_UCODE_TLV_API_FRAGMENTED_SCAN = (__force iwl_ucode_tlv_api_t)8, IWL_UCODE_TLV_API_WIFI_MCC_UPDATE = (__force iwl_ucode_tlv_api_t)9, - IWL_UCODE_TLV_API_WIDE_CMD_HDR = (__force iwl_ucode_tlv_api_t)14, IWL_UCODE_TLV_API_LQ_SS_PARAMS = (__force iwl_ucode_tlv_api_t)18, - IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY = (__force iwl_ucode_tlv_api_t)24, - IWL_UCODE_TLV_API_TX_POWER_CHAIN = (__force iwl_ucode_tlv_api_t)27, + IWL_UCODE_TLV_API_NEW_VERSION = (__force iwl_ucode_tlv_api_t)20, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT = (__force iwl_ucode_tlv_api_t)28, NUM_IWL_UCODE_TLV_API #ifdef __CHECKER__ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h index 74ea68d1063c..5f229556339a 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-fw.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-fw.h @@ -329,4 +329,13 @@ iwl_fw_dbg_conf_usniffer(const struct iwl_fw *fw, u8 id) return conf_tlv->usniffer; } +static inline const struct fw_img * +iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type) +{ + if (ucode_type >= IWL_UCODE_TYPE_MAX) + return NULL; + + return &fw->img[ucode_type]; +} + #endif /* __iwl_fw_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-io.c b/drivers/net/wireless/intel/iwlwifi/iwl-io.c index 92c8b5f9a9cb..a9f69fdd170b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-io.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-io.c @@ -267,7 +267,7 @@ static const char *get_rfh_string(int cmd) IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_WIDX, i); IWL_CMD_MQ(cmd, RFH_Q_FRBDCB_RIDX, i); IWL_CMD_MQ(cmd, RFH_Q_URBD_STTS_WPTR_LSB, i); - }; + } switch (cmd) { IWL_CMD(RFH_RXF_DMA_CFG); diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c index 8aa1f2b7fdfc..88f260db3744 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-notif-wait.c @@ -99,8 +99,12 @@ void iwl_notification_wait_notify(struct iwl_notif_wait_data *notif_wait, continue; for (i = 0; i < w->n_cmds; i++) { - if (w->cmds[i] == - WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd)) { + u16 rec_id = WIDE_ID(pkt->hdr.group_id, + pkt->hdr.cmd); + + if (w->cmds[i] == rec_id || + (!iwl_cmd_groupid(w->cmds[i]) && + DEF_ID(w->cmds[i]) == rec_id)) { found = true; break; } diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c index 43f8f7d45ddb..3bd6fc1b76d4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c @@ -67,6 +67,7 @@ #include #include #include +#include #include "iwl-drv.h" #include "iwl-modparams.h" #include "iwl-nvm-parse.h" @@ -564,11 +565,16 @@ static void iwl_set_hw_address_from_csr(struct iwl_trans *trans, __le32 mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_STRAP)); __le32 mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_STRAP)); - /* If OEM did not fuse address - get it from OTP */ - if (!mac_addr0 && !mac_addr1) { - mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); - mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); - } + iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); + /* + * If the OEM fused a valid address, use it instead of the one in the + * OTP + */ + if (is_valid_ether_addr(data->hw_addr)) + return; + + mac_addr0 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR0_OTP)); + mac_addr1 = cpu_to_le32(iwl_read32(trans, CSR_MAC_ADDR1_OTP)); iwl_flip_hw_address(mac_addr0, mac_addr1, data->hw_addr); } @@ -899,3 +905,91 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, return regd; } IWL_EXPORT_SYMBOL(iwl_parse_nvm_mcc_info); + +#ifdef CONFIG_ACPI +#define WRDD_METHOD "WRDD" +#define WRDD_WIFI (0x07) +#define WRDD_WIGIG (0x10) + +static u32 iwl_wrdd_get_mcc(struct device *dev, union acpi_object *wrdd) +{ + union acpi_object *mcc_pkg, *domain_type, *mcc_value; + u32 i; + + if (wrdd->type != ACPI_TYPE_PACKAGE || + wrdd->package.count < 2 || + wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || + wrdd->package.elements[0].integer.value != 0) { + IWL_DEBUG_EEPROM(dev, "Unsupported wrdd structure\n"); + return 0; + } + + for (i = 1 ; i < wrdd->package.count ; ++i) { + mcc_pkg = &wrdd->package.elements[i]; + + if (mcc_pkg->type != ACPI_TYPE_PACKAGE || + mcc_pkg->package.count < 2 || + mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || + mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { + mcc_pkg = NULL; + continue; + } + + domain_type = &mcc_pkg->package.elements[0]; + if (domain_type->integer.value == WRDD_WIFI) + break; + + mcc_pkg = NULL; + } + + if (mcc_pkg) { + mcc_value = &mcc_pkg->package.elements[1]; + return mcc_value->integer.value; + } + + return 0; +} + +int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + acpi_handle root_handle; + acpi_handle handle; + struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; + acpi_status status; + u32 mcc_val; + + root_handle = ACPI_HANDLE(dev); + if (!root_handle) { + IWL_DEBUG_EEPROM(dev, + "Could not retrieve root port ACPI handle\n"); + return -ENOENT; + } + + /* Get the method's handle */ + status = acpi_get_handle(root_handle, (acpi_string)WRDD_METHOD, + &handle); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRD method not found\n"); + return -ENOENT; + } + + /* Call WRDD with no arguments */ + status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); + if (ACPI_FAILURE(status)) { + IWL_DEBUG_EEPROM(dev, "WRDC invocation failed (0x%x)\n", + status); + return -ENOENT; + } + + mcc_val = iwl_wrdd_get_mcc(dev, wrdd.pointer); + kfree(wrdd.pointer); + if (!mcc_val) + return -ENOENT; + + mcc[0] = (mcc_val >> 8) & 0xff; + mcc[1] = mcc_val & 0xff; + mcc[2] = '\0'; + return 0; +} +IWL_EXPORT_SYMBOL(iwl_get_bios_mcc); +#endif diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h index d704d52aa7ec..7249e5b403f4 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.h @@ -5,7 +5,8 @@ * * GPL LICENSE SUMMARY * - * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved. + * Copyright(c) 2008 - 2015 Intel Corporation. All rights reserved. + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -93,4 +94,21 @@ struct ieee80211_regdomain * iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg, int num_of_ch, __le32 *channels, u16 fw_mcc); +#ifdef CONFIG_ACPI +/** + * iwl_get_bios_mcc - read MCC from BIOS, if available + * + * @dev: the struct device + * @mcc: output buffer (3 bytes) that will get the MCC + * + * This function tries to read the current MCC from ACPI if available. + */ +int iwl_get_bios_mcc(struct device *dev, char *mcc); +#else +static inline int iwl_get_bios_mcc(struct device *dev, char *mcc) +{ + return -ENOENT; +} +#endif + #endif /* __iwl_nvm_parse_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c index 7beba9ae5617..2893826d7d2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-phy-db.c @@ -110,7 +110,7 @@ enum iwl_phy_db_section_type { IWL_PHY_DB_MAX }; -#define PHY_DB_CMD 0x6c /* TEMP API - The actual is 0x8c */ +#define PHY_DB_CMD 0x6c /* * phy db - configure operational ucode diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h index 459bf736fd5b..406ef301b8ab 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h @@ -302,22 +302,17 @@ #define OSC_CLK_FORCE_CONTROL (0x8) #define FH_UCODE_LOAD_STATUS (0x1AF0) -#define CSR_UCODE_LOAD_STATUS_ADDR (0x1E70) -enum secure_load_status_reg { - LMPM_CPU_UCODE_LOADING_STARTED = 0x00000001, - LMPM_CPU_HDRS_LOADING_COMPLETED = 0x00000003, - LMPM_CPU_UCODE_LOADING_COMPLETED = 0x00000007, - LMPM_CPU_STATUS_NUM_OF_LAST_COMPLETED = 0x000000F8, - LMPM_CPU_STATUS_NUM_OF_LAST_LOADED_BLOCK = 0x0000FF00, -}; -#define LMPM_SECURE_INSPECTOR_CODE_ADDR (0x1E38) -#define LMPM_SECURE_INSPECTOR_DATA_ADDR (0x1E3C) +/* + * Replacing FH_UCODE_LOAD_STATUS + * This register is writen by driver and is read by uCode during boot flow. + * Note this address is cleared after MAC reset. + */ +#define UREG_UCODE_LOAD_STATUS (0xa05c40) + #define LMPM_SECURE_UCODE_LOAD_CPU1_HDR_ADDR (0x1E78) #define LMPM_SECURE_UCODE_LOAD_CPU2_HDR_ADDR (0x1E7C) -#define LMPM_SECURE_INSPECTOR_CODE_MEM_SPACE (0x400000) -#define LMPM_SECURE_INSPECTOR_DATA_MEM_SPACE (0x402000) #define LMPM_SECURE_CPU1_HDR_MEM_SPACE (0x420000) #define LMPM_SECURE_CPU2_HDR_MEM_SPACE (0x420400) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index 6069a9ff53fa..d42cab291025 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -65,6 +65,7 @@ #include "iwl-trans.h" #include "iwl-drv.h" +#include "iwl-fh.h" struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, struct device *dev, @@ -77,7 +78,7 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, static struct lock_class_key __key; #endif - trans = kzalloc(sizeof(*trans) + priv_size, GFP_KERNEL); + trans = devm_kzalloc(dev, sizeof(*trans) + priv_size, GFP_KERNEL); if (!trans) return NULL; @@ -102,18 +103,14 @@ struct iwl_trans *iwl_trans_alloc(unsigned int priv_size, SLAB_HWCACHE_ALIGN, NULL); if (!trans->dev_cmd_pool) - goto free; + return NULL; return trans; - free: - kfree(trans); - return NULL; } void iwl_trans_free(struct iwl_trans *trans) { kmem_cache_destroy(trans->dev_cmd_pool); - kfree(trans); } int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) @@ -139,6 +136,9 @@ int iwl_trans_send_cmd(struct iwl_trans *trans, struct iwl_host_cmd *cmd) if (!(cmd->flags & CMD_ASYNC)) lock_map_acquire_read(&trans->sync_cmd_lockdep_map); + if (trans->wide_cmd_header && !iwl_cmd_groupid(cmd->id)) + cmd->id = DEF_ID(cmd->id); + ret = trans->ops->send_cmd(trans, cmd); if (!(cmd->flags & CMD_ASYNC)) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index 5535e2238da3..0296124a7f9c 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -153,6 +153,7 @@ static inline u32 iwl_cmd_id(u8 opcode, u8 groupid, u8 version) /* make u16 wide id out of u8 group and opcode */ #define WIDE_ID(grp, opcode) ((grp << 8) | opcode) +#define DEF_ID(opcode) ((1 << 8) | (opcode)) /* due to the conversion, this group is special; new groups * should be defined in the appropriate fw-api header files @@ -262,8 +263,6 @@ static inline u32 iwl_rx_packet_payload_len(const struct iwl_rx_packet *pkt) * (i.e. mark it as non-idle). * @CMD_WANT_ASYNC_CALLBACK: the op_mode's async callback function must be * called after this command completes. Valid only with CMD_ASYNC. - * @CMD_TB_BITMAP_POS: Position of the first bit for the TB bitmap. We need to - * check that we leave enough room for the TBs bitmap which needs 20 bits. */ enum CMD_MODE { CMD_ASYNC = BIT(0), @@ -274,8 +273,6 @@ enum CMD_MODE { CMD_MAKE_TRANS_IDLE = BIT(5), CMD_WAKE_UP_TRANS = BIT(6), CMD_WANT_ASYNC_CALLBACK = BIT(7), - - CMD_TB_BITMAP_POS = 11, }; #define DEF_CMD_PAYLOAD_SIZE 320 @@ -488,7 +485,6 @@ struct iwl_hcmd_arr { * @bc_table_dword: set to true if the BC table expects the byte count to be * in DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: firmware supports wide host command header * @sw_csum_tx: transport should compute the TCP checksum * @command_groups: array of command groups, each member is an array of the * commands in the group; for debugging only @@ -510,7 +506,6 @@ struct iwl_trans_config { enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; const struct iwl_hcmd_arr *command_groups; int command_groups_size; @@ -649,6 +644,8 @@ struct iwl_trans_ops { void (*txq_set_shared_mode)(struct iwl_trans *trans, u32 txq_id, bool shared); + dma_addr_t (*get_txq_byte_table)(struct iwl_trans *trans, int txq_id); + int (*wait_tx_queue_empty)(struct iwl_trans *trans, u32 txq_bm); void (*freeze_txq_timer)(struct iwl_trans *trans, unsigned long txqs, bool freeze); @@ -772,6 +769,7 @@ enum iwl_plat_pm_mode { * @hw_id_str: a string with info about HW ID. Set during transport allocation. * @pm_support: set to true in start_hw if link pm is supported * @ltr_enabled: set to true if the LTR is enabled + * @wide_cmd_header: true when ucode supports wide command header format * @num_rx_queues: number of RX queues allocated by the transport; * the transport must set this before calling iwl_drv_start() * @dev_cmd_pool: pool for Tx cmd allocation - for internal use only. @@ -823,6 +821,7 @@ struct iwl_trans { const struct iwl_hcmd_arr *command_groups; int command_groups_size; + bool wide_cmd_header; u8 num_rx_queues; @@ -1073,6 +1072,15 @@ static inline void iwl_trans_txq_set_shared_mode(struct iwl_trans *trans, trans->ops->txq_set_shared_mode(trans, queue, shared_mode); } +static inline dma_addr_t iwl_trans_get_txq_byte_table(struct iwl_trans *trans, + int queue) +{ + /* we should never be called if the trans doesn't support it */ + BUG_ON(!trans->ops->get_txq_byte_table); + + return trans->ops->get_txq_byte_table(trans, queue); +} + static inline void iwl_trans_txq_enable(struct iwl_trans *trans, int queue, int fifo, int sta_id, int tid, int frame_limit, u16 ssn, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c index b23271755daf..2d6f44fbaf62 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs-vif.c @@ -504,6 +504,28 @@ static inline char *iwl_dbgfs_is_match(char *name, char *buf) return !strncmp(name, buf, len) ? buf + len : NULL; } +static ssize_t iwl_dbgfs_os_device_timediff_read(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct ieee80211_vif *vif = file->private_data; + struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); + struct iwl_mvm *mvm = mvmvif->mvm; + u32 curr_gp2; + u64 curr_os; + s64 diff; + char buf[64]; + const size_t bufsz = sizeof(buf); + int pos = 0; + + iwl_mvm_get_sync_time(mvm, &curr_gp2, &curr_os); + do_div(curr_os, NSEC_PER_USEC); + diff = curr_os - curr_gp2; + pos += scnprintf(buf + pos, bufsz - pos, "diff=%lld\n", diff); + + return simple_read_from_buffer(user_buf, count, ppos, buf, pos); +} + static ssize_t iwl_dbgfs_tof_enable_write(struct ieee80211_vif *vif, char *buf, size_t count, loff_t *ppos) @@ -1530,6 +1552,8 @@ MVM_DEBUGFS_READ_FILE_OPS(tof_range_response); MVM_DEBUGFS_READ_WRITE_FILE_OPS(tof_responder_params, 32); MVM_DEBUGFS_READ_WRITE_FILE_OPS(quota_min, 32); MVM_DEBUGFS_WRITE_FILE_OPS(lqm_send_cmd, 64); +MVM_DEBUGFS_READ_FILE_OPS(os_device_timediff); + void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) { @@ -1554,8 +1578,7 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) if (iwlmvm_mod_params.power_scheme != IWL_POWER_SCHEME_CAM && ((vif->type == NL80211_IFTYPE_STATION && !vif->p2p) || - (vif->type == NL80211_IFTYPE_STATION && vif->p2p && - mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM))) + (vif->type == NL80211_IFTYPE_STATION && vif->p2p))) MVM_DEBUGFS_ADD_FILE_VIF(pm_params, mvmvif->dbgfs_dir, S_IWUSR | S_IRUSR); @@ -1570,6 +1593,8 @@ void iwl_mvm_vif_dbgfs_register(struct iwl_mvm *mvm, struct ieee80211_vif *vif) MVM_DEBUGFS_ADD_FILE_VIF(quota_min, mvmvif->dbgfs_dir, S_IRUSR | S_IWUSR); MVM_DEBUGFS_ADD_FILE_VIF(lqm_send_cmd, mvmvif->dbgfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE_VIF(os_device_timediff, + mvmvif->dbgfs_dir, S_IRUSR); if (vif->type == NL80211_IFTYPE_STATION && !vif->p2p && mvmvif == mvm->bf_allowed_vif) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c index b34489817c70..539d718df797 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c @@ -917,6 +917,59 @@ static ssize_t iwl_dbgfs_indirection_tbl_write(struct iwl_mvm *mvm, return ret ?: count; } +static ssize_t iwl_dbgfs_inject_packet_write(struct iwl_mvm *mvm, + char *buf, size_t count, + loff_t *ppos) +{ + struct iwl_rx_cmd_buffer rxb = { + ._rx_page_order = 0, + .truesize = 0, /* not used */ + ._offset = 0, + }; + struct iwl_rx_packet *pkt; + struct iwl_rx_mpdu_desc *desc; + int bin_len = count / 2; + int ret = -EINVAL; + + /* supporting only 9000 descriptor */ + if (!mvm->trans->cfg->mq_rx_supported) + return -ENOTSUPP; + + rxb._page = alloc_pages(GFP_ATOMIC, 0); + if (!rxb._page) + return -ENOMEM; + pkt = rxb_addr(&rxb); + + ret = hex2bin(page_address(rxb._page), buf, bin_len); + if (ret) + goto out; + + /* avoid invalid memory access */ + if (bin_len < sizeof(*pkt) + sizeof(*desc)) + goto out; + + /* check this is RX packet */ + if (WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd) != + WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD)) + goto out; + + /* check the length in metadata matches actual received length */ + desc = (void *)pkt->data; + if (le16_to_cpu(desc->mpdu_len) != + (bin_len - sizeof(*desc) - sizeof(*pkt))) + goto out; + + local_bh_disable(); + iwl_mvm_rx_mpdu_mq(mvm, NULL, &rxb, 0); + local_bh_enable(); + ret = 0; + +out: + iwl_free_rxb(&rxb); + + return ret ?: count; +} + static ssize_t iwl_dbgfs_fw_dbg_conf_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) @@ -1454,6 +1507,7 @@ MVM_DEBUGFS_WRITE_FILE_OPS(cont_recording, 8); MVM_DEBUGFS_WRITE_FILE_OPS(max_amsdu_len, 8); MVM_DEBUGFS_WRITE_FILE_OPS(indirection_tbl, (IWL_RSS_INDIRECTION_TABLE_SIZE * 2)); +MVM_DEBUGFS_WRITE_FILE_OPS(inject_packet, 512); #ifdef CONFIG_IWLWIFI_BCAST_FILTERING MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters, 256); @@ -1464,6 +1518,132 @@ MVM_DEBUGFS_READ_WRITE_FILE_OPS(bcast_filters_macs, 256); MVM_DEBUGFS_READ_WRITE_FILE_OPS(d3_sram, 8); #endif +static ssize_t iwl_dbgfs_mem_read(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd cmd = {}; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = { + .flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + .data = { &cmd, }, + .len = { sizeof(cmd) }, + }; + size_t delta, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + cmd.op = cpu_to_le32(DEBUG_MEM_OP_READ); + + /* Take care of alignment of both the position and the length */ + delta = *ppos & 0x3; + cmd.addr = cpu_to_le32(*ppos - delta); + cmd.len = cpu_to_le32(min(ALIGN(count + delta, 4) / 4, + (size_t)DEBUG_MEM_MAX_SIZE_DWORDS)); + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (le32_to_cpu(rsp->status) != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + len = min((size_t)le32_to_cpu(rsp->len) << 2, + iwl_rx_packet_payload_len(hcmd.resp_pkt) - sizeof(*rsp)); + len = min(len - delta, count); + if (len < 0) { + ret = -EFAULT; + goto out; + } + + ret = len - copy_to_user(user_buf, (void *)rsp->data + delta, len); + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static ssize_t iwl_dbgfs_mem_write(struct file *file, + const char __user *user_buf, size_t count, + loff_t *ppos) +{ + struct iwl_mvm *mvm = file->private_data; + struct iwl_dbg_mem_access_cmd *cmd; + struct iwl_dbg_mem_access_rsp *rsp; + struct iwl_host_cmd hcmd = {}; + size_t cmd_size; + size_t data_size; + u32 op, len; + ssize_t ret; + + hcmd.id = iwl_cmd_id(*ppos >> 24 ? UMAC_RD_WR : LMAC_RD_WR, + DEBUG_GROUP, 0); + + if (*ppos & 0x3 || count < 4) { + op = DEBUG_MEM_OP_WRITE_BYTES; + len = min(count, (size_t)(4 - (*ppos & 0x3))); + data_size = len; + } else { + op = DEBUG_MEM_OP_WRITE; + len = min(count >> 2, (size_t)DEBUG_MEM_MAX_SIZE_DWORDS); + data_size = len << 2; + } + + cmd_size = sizeof(*cmd) + ALIGN(data_size, 4); + cmd = kzalloc(cmd_size, GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + cmd->op = cpu_to_le32(op); + cmd->len = cpu_to_le32(len); + cmd->addr = cpu_to_le32(*ppos); + if (copy_from_user((void *)cmd->data, user_buf, data_size)) { + kfree(cmd); + return -EFAULT; + } + + hcmd.flags = CMD_WANT_SKB | CMD_SEND_IN_RFKILL, + hcmd.data[0] = (void *)cmd; + hcmd.len[0] = cmd_size; + + mutex_lock(&mvm->mutex); + ret = iwl_mvm_send_cmd(mvm, &hcmd); + mutex_unlock(&mvm->mutex); + + kfree(cmd); + + if (ret < 0) + return ret; + + rsp = (void *)hcmd.resp_pkt->data; + if (rsp->status != DEBUG_MEM_STATUS_SUCCESS) { + ret = -ENXIO; + goto out; + } + + ret = data_size; + *ppos += ret; + +out: + iwl_free_resp(&hcmd); + return ret; +} + +static const struct file_operations iwl_dbgfs_mem_ops = { + .read = iwl_dbgfs_mem_read, + .write = iwl_dbgfs_mem_write, + .open = simple_open, + .llseek = default_llseek, +}; + int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) { struct dentry *bcast_dir __maybe_unused; @@ -1502,6 +1682,7 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) MVM_DEBUGFS_ADD_FILE(send_echo_cmd, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(cont_recording, mvm->debugfs_dir, S_IWUSR); MVM_DEBUGFS_ADD_FILE(indirection_tbl, mvm->debugfs_dir, S_IWUSR); + MVM_DEBUGFS_ADD_FILE(inject_packet, mvm->debugfs_dir, S_IWUSR); if (!debugfs_create_bool("enable_scan_iteration_notif", S_IRUSR | S_IWUSR, mvm->debugfs_dir, @@ -1560,6 +1741,9 @@ int iwl_mvm_dbgfs_register(struct iwl_mvm *mvm, struct dentry *dbgfs_dir) mvm->debugfs_dir, &mvm->nvm_phy_sku_blob)) goto err; + debugfs_create_file("mem", S_IRUSR | S_IWUSR, dbgfs_dir, mvm, + &iwl_dbgfs_mem_ops); + /* * Create a symlink with mac80211. It will be removed when mac80211 * exists (before the opmode exists which removes the target.) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h index 404b0de9e2dc..3fa43d1348a2 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-power.h @@ -313,35 +313,26 @@ enum iwl_dev_tx_power_cmd_mode { IWL_TX_POWER_MODE_SET_ACK = 3, }; /* TX_POWER_REDUCED_FLAGS_TYPE_API_E_VER_4 */; +#define IWL_NUM_CHAIN_LIMITS 2 +#define IWL_NUM_SUB_BANDS 5 + /** - * struct iwl_dev_tx_power_cmd_v2 - TX power reduction command + * struct iwl_dev_tx_power_cmd - TX power reduction command * @set_mode: see &enum iwl_dev_tx_power_cmd_mode * @mac_context_id: id of the mac ctx for which we are reducing TX power. * @pwr_restriction: TX power restriction in 1/8 dBms. * @dev_24: device TX power restriction in 1/8 dBms * @dev_52_low: device TX power restriction upper band - low * @dev_52_high: device TX power restriction upper band - high + * @per_chain_restriction: per chain restrictions */ -struct iwl_dev_tx_power_cmd_v2 { +struct iwl_dev_tx_power_cmd_v3 { __le32 set_mode; __le32 mac_context_id; __le16 pwr_restriction; __le16 dev_24; __le16 dev_52_low; __le16 dev_52_high; -} __packed; /* TX_REDUCED_POWER_API_S_VER_2 */ - -#define IWL_NUM_CHAIN_LIMITS 2 -#define IWL_NUM_SUB_BANDS 5 - -/** - * struct iwl_dev_tx_power_cmd - TX power reduction command - * @v2: version 2 of the command, embedded here for easier software handling - * @per_chain_restriction: per chain restrictions - */ -struct iwl_dev_tx_power_cmd_v3 { - /* v3 is just an extension of v2 - keep this here */ - struct iwl_dev_tx_power_cmd_v2 v2; __le16 per_chain_restriction[IWL_NUM_CHAIN_LIMITS][IWL_NUM_SUB_BANDS]; } __packed; /* TX_REDUCED_POWER_API_S_VER_3 */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h index f01dab0d0dac..0c294c9f98e9 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-scan.h @@ -7,6 +7,7 @@ * * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved. * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH + * Copyright(c) 2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of version 2 of the GNU General Public License as @@ -603,6 +604,8 @@ struct iwl_scan_req_umac_tail { * @uid: scan id, &enum iwl_umac_scan_uid_offsets * @ooc_priority: out of channel priority - &enum iwl_scan_priority * @general_flags: &enum iwl_umac_scan_general_flags + * @reserved2: for future use and alignment + * @scan_start_mac_id: report the scan start TSF time according to this mac TSF * @extended_dwell: dwell time for channels 1, 6 and 11 * @active_dwell: dwell time for active scan * @passive_dwell: dwell time for passive scan @@ -620,8 +623,10 @@ struct iwl_scan_req_umac { __le32 flags; __le32 uid; __le32 ooc_priority; - /* SCAN_GENERAL_PARAMS_API_S_VER_1 */ - __le32 general_flags; + /* SCAN_GENERAL_PARAMS_API_S_VER_4 */ + __le16 general_flags; + u8 reserved2; + u8 scan_start_mac_id; u8 extended_dwell; u8 active_dwell; u8 passive_dwell; @@ -629,7 +634,7 @@ struct iwl_scan_req_umac { __le32 max_out_time; __le32 suspend_time; __le32 scan_priority; - /* SCAN_CHANNEL_PARAMS_API_S_VER_1 */ + /* SCAN_CHANNEL_PARAMS_API_S_VER_4 */ u8 channel_flags; u8 n_channels; __le16 reserved; @@ -718,8 +723,8 @@ struct iwl_scan_offload_profiles_query { * @status: one of SCAN_COMP_STATUS_* * @bt_status: BT on/off status * @last_channel: last channel that was scanned - * @tsf_low: TSF timer (lower half) in usecs - * @tsf_high: TSF timer (higher half) in usecs + * @start_tsf: TSF timer in usecs of the scan start time for the mac specified + * in &struct iwl_scan_req_umac. * @results: array of scan results, only "scanned_channels" of them are valid */ struct iwl_umac_scan_iter_complete_notif { @@ -728,9 +733,8 @@ struct iwl_umac_scan_iter_complete_notif { u8 status; u8 bt_status; u8 last_channel; - __le32 tsf_low; - __le32 tsf_high; + __le64 start_tsf; struct iwl_scan_results_notif results[]; -} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_1 */ +} __packed; /* SCAN_ITER_COMPLETE_NTF_UMAC_API_S_VER_2 */ #endif diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h index d1c4fb849111..6c8e3ca79323 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-sta.h @@ -432,26 +432,43 @@ struct iwl_mvm_rm_sta_cmd { u8 reserved[3]; } __packed; /* REMOVE_STA_CMD_API_S_VER_2 */ +/** + * struct iwl_mvm_mgmt_mcast_key_cmd_v1 + * ( MGMT_MCAST_KEY = 0x1f ) + * @ctrl_flags: %iwl_sta_key_flag + * @igtk: + * @k1: unused + * @k2: unused + * @sta_id: station ID that support IGTK + * @key_id: + * @receive_seq_cnt: initial RSC/PN needed for replay check + */ +struct iwl_mvm_mgmt_mcast_key_cmd_v1 { + __le32 ctrl_flags; + u8 igtk[16]; + u8 k1[16]; + u8 k2[16]; + __le32 key_id; + __le32 sta_id; + __le64 receive_seq_cnt; +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ + /** * struct iwl_mvm_mgmt_mcast_key_cmd * ( MGMT_MCAST_KEY = 0x1f ) * @ctrl_flags: %iwl_sta_key_flag - * @IGTK: - * @K1: unused - * @K2: unused + * @igtk: IGTK master key * @sta_id: station ID that support IGTK * @key_id: * @receive_seq_cnt: initial RSC/PN needed for replay check */ struct iwl_mvm_mgmt_mcast_key_cmd { __le32 ctrl_flags; - u8 IGTK[16]; - u8 K1[16]; - u8 K2[16]; + u8 igtk[32]; __le32 key_id; __le32 sta_id; __le64 receive_seq_cnt; -} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_1 */ +} __packed; /* SEC_MGMT_MULTICAST_KEY_CMD_API_S_VER_2 */ struct iwl_mvm_wep_key { u8 key_index; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h index 4144623e1616..59ca97a11b2b 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api-tx.h @@ -89,7 +89,6 @@ * @TX_CMD_FLG_MH_PAD: driver inserted 2 byte padding after MAC header. * Should be set for 26/30 length MAC headers * @TX_CMD_FLG_RESP_TO_DRV: zero this if the response should go only to FW - * @TX_CMD_FLG_CCMP_AGG: this frame uses CCMP for aggregation acceleration * @TX_CMD_FLG_TKIP_MIC_DONE: FW already performed TKIP MIC calculation * @TX_CMD_FLG_DUR: disable duration overwriting used in PS-Poll Assoc-id * @TX_CMD_FLG_FW_DROP: FW should mark frame to be dropped @@ -116,7 +115,6 @@ enum iwl_tx_flags { TX_CMD_FLG_KEEP_SEQ_CTL = BIT(18), TX_CMD_FLG_MH_PAD = BIT(20), TX_CMD_FLG_RESP_TO_DRV = BIT(21), - TX_CMD_FLG_CCMP_AGG = BIT(22), TX_CMD_FLG_TKIP_MIC_DONE = BIT(23), TX_CMD_FLG_DUR = BIT(25), TX_CMD_FLG_FW_DROP = BIT(26), @@ -149,7 +147,7 @@ enum iwl_tx_pm_timeouts { * @TX_CMD_SEC_EXT: extended cipher algorithm. * @TX_CMD_SEC_GCMP: GCMP encryption algorithm. * @TX_CMD_SEC_KEY128: set for 104 bits WEP key. - * @TC_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken + * @TX_CMD_SEC_KEY_FROM_TABLE: for a non-WEP key, set if the key should be taken * from the table instead of from the TX command. * If the key is taken from the key table its index should be given by the * first byte of the TX command key field. @@ -161,7 +159,7 @@ enum iwl_tx_cmd_sec_ctrl { TX_CMD_SEC_EXT = 0x04, TX_CMD_SEC_GCMP = 0x05, TX_CMD_SEC_KEY128 = 0x08, - TC_CMD_SEC_KEY_FROM_TABLE = 0x08, + TX_CMD_SEC_KEY_FROM_TABLE = 0x08, }; /* TODO: how does these values are OK with only 16 bit variable??? */ @@ -577,6 +575,85 @@ struct iwl_mvm_ba_notif { u8 reserved1; } __packed; +/** + * struct iwl_mvm_compressed_ba_tfd - progress of a TFD queue + * @q_num: TFD queue number + * @tfd_index: Index of first un-acked frame in the TFD queue + */ +struct iwl_mvm_compressed_ba_tfd { + u8 q_num; + u8 reserved; + __le16 tfd_index; +} __packed; /* COMPRESSED_BA_TFD_API_S_VER_1 */ + +/** + * struct iwl_mvm_compressed_ba_ratid - progress of a RA TID queue + * @q_num: RA TID queue number + * @tid: TID of the queue + * @ssn: BA window current SSN + */ +struct iwl_mvm_compressed_ba_ratid { + u8 q_num; + u8 tid; + __le16 ssn; +} __packed; /* COMPRESSED_BA_RATID_API_S_VER_1 */ + +/* + * enum iwl_mvm_ba_resp_flags - TX aggregation status + * @IWL_MVM_BA_RESP_TX_AGG: generated due to BA + * @IWL_MVM_BA_RESP_TX_BAR: generated due to BA after BAR + * @IWL_MVM_BA_RESP_TX_AGG_FAIL: aggregation didn't receive BA + * @IWL_MVM_BA_RESP_TX_UNDERRUN: aggregation got underrun + * @IWL_MVM_BA_RESP_TX_BT_KILL: aggregation got BT-kill + * @IWL_MVM_BA_RESP_TX_DSP_TIMEOUT: aggregation didn't finish within the + * expected time + */ +enum iwl_mvm_ba_resp_flags { + IWL_MVM_BA_RESP_TX_AGG, + IWL_MVM_BA_RESP_TX_BAR, + IWL_MVM_BA_RESP_TX_AGG_FAIL, + IWL_MVM_BA_RESP_TX_UNDERRUN, + IWL_MVM_BA_RESP_TX_BT_KILL, + IWL_MVM_BA_RESP_TX_DSP_TIMEOUT +}; + +/** + * struct iwl_mvm_compressed_ba_notif - notifies about reception of BA + * ( BA_NOTIF = 0xc5 ) + * @flags: status flag, see the &iwl_mvm_ba_resp_flags + * @sta_id: Index of recipient (BA-sending) station in fw's station table + * @reduced_txp: power reduced according to TPC. This is the actual value and + * not a copy from the LQ command. Thus, if not the first rate was used + * for Tx-ing then this value will be set to 0 by FW. + * @initial_rate: TLC rate info, initial rate index, TLC table color + * @retry_cnt: retry count + * @query_byte_cnt: SCD query byte count + * @query_frame_cnt: SCD query frame count + * @txed: number of frames sent in the aggregation (all-TIDs) + * @done: number of frames that were Acked by the BA (all-TIDs) + * @wireless_time: Wireless-media time + * @tx_rate: the rate the aggregation was sent at + * @tfd_cnt: number of TFD-Q elements + * @ra_tid_cnt: number of RATID-Q elements + */ +struct iwl_mvm_compressed_ba_notif { + __le32 flags; + u8 sta_id; + u8 reduced_txp; + u8 initial_rate; + u8 retry_cnt; + __le32 query_byte_cnt; + __le16 query_frame_cnt; + __le16 txed; + __le16 done; + __le32 wireless_time; + __le32 tx_rate; + __le16 tfd_cnt; + __le16 ra_tid_cnt; + struct iwl_mvm_compressed_ba_tfd tfd[1]; + struct iwl_mvm_compressed_ba_ratid ra_tid[0]; +} __packed; /* COMPRESSED_BA_RES_API_S_VER_4 */ + /** * struct iwl_mac_beacon_cmd_v6 - beacon template command * @tx: the tx commands associated with the beacon frame @@ -675,13 +752,21 @@ static inline u32 iwl_mvm_get_scd_ssn(struct iwl_mvm_tx_resp *tx_resp) tx_resp->frame_count) & 0xfff; } +/* Available options for the SCD_QUEUE_CFG HCMD */ +enum iwl_scd_cfg_actions { + SCD_CFG_DISABLE_QUEUE = 0x0, + SCD_CFG_ENABLE_QUEUE = 0x1, + SCD_CFG_UPDATE_QUEUE_TID = 0x2, +}; + /** * struct iwl_scd_txq_cfg_cmd - New txq hw scheduler config command * @token: * @sta_id: station id * @tid: * @scd_queue: scheduler queue to confiug - * @enable: 1 queue enable, 0 queue disable + * @action: 1 queue enable, 0 queue disable, 2 change txq's tid owner + * Value is one of %iwl_scd_cfg_actions options * @aggregate: 1 aggregated queue, 0 otherwise * @tx_fifo: %enum iwl_mvm_tx_fifo * @window: BA window size @@ -692,7 +777,7 @@ struct iwl_scd_txq_cfg_cmd { u8 sta_id; u8 tid; u8 scd_queue; - u8 enable; + u8 action; u8 aggregate; u8 tx_fifo; u8 window; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h index 71076f02796e..97633690f3d5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-api.h @@ -205,7 +205,7 @@ enum { /* Phy */ PHY_CONFIGURATION_CMD = 0x6a, CALIB_RES_NOTIF_PHY_DB = 0x6b, - /* PHY_DB_CMD = 0x6c, */ + PHY_DB_CMD = 0x6c, /* ToF - 802.11mc FTM */ TOF_CMD = 0x10, @@ -340,6 +340,11 @@ enum iwl_prot_offload_subcmd_ids { STORED_BEACON_NTF = 0xFF, }; +enum iwl_fmac_debug_cmds { + LMAC_RD_WR = 0x0, + UMAC_RD_WR = 0x1, +}; + /* command groups */ enum { LEGACY_GROUP = 0x0, @@ -349,6 +354,7 @@ enum { PHY_OPS_GROUP = 0x4, DATA_PATH_GROUP = 0x5, PROT_OFFLOAD_GROUP = 0xb, + DEBUG_GROUP = 0xf, }; /** @@ -482,13 +488,17 @@ struct iwl_nvm_access_cmd { * @block_size: the block size in powers of 2 * @block_num: number of blocks specified in the command. * @device_phy_addr: virtual addresses from device side + * 32 bit address for API version 1, 64 bit address for API version 2. */ struct iwl_fw_paging_cmd { __le32 flags; __le32 block_size; __le32 block_num; - __le32 device_phy_addr[NUM_OF_FW_PAGING_BLOCKS]; -} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_1 */ + union { + __le32 addr32[NUM_OF_FW_PAGING_BLOCKS]; + __le64 addr64[NUM_OF_FW_PAGING_BLOCKS]; + } device_phy_addr; +} __packed; /* FW_PAGING_BLOCK_CMD_API_S_VER_2 */ /* * Fw items ID's @@ -1973,8 +1983,9 @@ struct iwl_tdls_config_res { struct iwl_tdls_config_sta_info_res sta_info[IWL_MVM_TDLS_STA_COUNT]; } __packed; /* TDLS_CONFIG_RSP_API_S_VER_1 */ -#define TX_FIFO_MAX_NUM 8 -#define RX_FIFO_MAX_NUM 2 +#define TX_FIFO_MAX_NUM_9000 8 +#define TX_FIFO_MAX_NUM 15 +#define RX_FIFO_MAX_NUM 2 #define TX_FIFO_INTERNAL_MAX_NUM 6 /** @@ -2000,6 +2011,21 @@ struct iwl_tdls_config_res { * NOTE: on firmware that don't have IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG * set, the last 3 members don't exist. */ +struct iwl_shared_mem_cfg_v1 { + __le32 shared_mem_addr; + __le32 shared_mem_size; + __le32 sample_buff_addr; + __le32 sample_buff_size; + __le32 txfifo_addr; + __le32 txfifo_size[TX_FIFO_MAX_NUM_9000]; + __le32 rxfifo_size[RX_FIFO_MAX_NUM]; + __le32 page_buff_addr; + __le32 page_buff_size; + __le32 rxfifo_addr; + __le32 internal_txfifo_addr; + __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ + struct iwl_shared_mem_cfg { __le32 shared_mem_addr; __le32 shared_mem_size; @@ -2013,7 +2039,7 @@ struct iwl_shared_mem_cfg { __le32 rxfifo_addr; __le32 internal_txfifo_addr; __le32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; -} __packed; /* SHARED_MEM_ALLOC_API_S_VER_2 */ +} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */ /** * VHT MU-MIMO group configuration @@ -2129,4 +2155,48 @@ struct iwl_channel_switch_noa_notif { __le32 id_and_color; } __packed; /* CHANNEL_SWITCH_START_NTFY_API_S_VER_1 */ +/* Operation types for the debug mem access */ +enum { + DEBUG_MEM_OP_READ = 0, + DEBUG_MEM_OP_WRITE = 1, + DEBUG_MEM_OP_WRITE_BYTES = 2, +}; + +#define DEBUG_MEM_MAX_SIZE_DWORDS 32 + +/** + * struct iwl_dbg_mem_access_cmd - Request the device to read/write memory + * @op: DEBUG_MEM_OP_* + * @addr: address to read/write from/to + * @len: in dwords, to read/write + * @data: for write opeations, contains the source buffer + */ +struct iwl_dbg_mem_access_cmd { + __le32 op; + __le32 addr; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_CMD_API_S_VER_1 */ + +/* Status responses for the debug mem access */ +enum { + DEBUG_MEM_STATUS_SUCCESS = 0x0, + DEBUG_MEM_STATUS_FAILED = 0x1, + DEBUG_MEM_STATUS_LOCKED = 0x2, + DEBUG_MEM_STATUS_HIDDEN = 0x3, + DEBUG_MEM_STATUS_LENGTH = 0x4, +}; + +/** + * struct iwl_dbg_mem_access_rsp - Response to debug mem commands + * @status: DEBUG_MEM_STATUS_* + * @len: read dwords (0 for write operations) + * @data: contains the read DWs + */ +struct iwl_dbg_mem_access_rsp { + __le32 status; + __le32 len; + __le32 data[]; +} __packed; /* DEBUG_(U|L)MAC_RD_WR_RSP_API_S_VER_1 */ + #endif /* __fw_api_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c index 46b52bf705fb..d89d0a1fd34e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw-dbg.c @@ -440,14 +440,12 @@ static const struct iwl_prph_range iwl_prph_dump_addr_comm[] = { { .start = 0x00a04560, .end = 0x00a0457c }, { .start = 0x00a04590, .end = 0x00a04598 }, { .start = 0x00a045c0, .end = 0x00a045f4 }, - { .start = 0x00a44000, .end = 0x00a7bf80 }, }; static const struct iwl_prph_range iwl_prph_dump_addr_9000[] = { { .start = 0x00a05c00, .end = 0x00a05c18 }, { .start = 0x00a05400, .end = 0x00a056e8 }, { .start = 0x00a08000, .end = 0x00a098bc }, - { .start = 0x00adfc00, .end = 0x00adfd1c }, { .start = 0x00a02400, .end = 0x00a02758 }, }; @@ -559,7 +557,7 @@ void iwl_mvm_fw_error_dump(struct iwl_mvm *mvm) sizeof(struct iwl_fw_error_dump_fifo); } - for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) { + for (i = 0; i < mem_cfg->num_txfifo_entries; i++) { if (!mem_cfg->txfifo_size[i]) continue; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c index 7e0cdbf8bf74..872066317fa5 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c @@ -90,15 +90,6 @@ struct iwl_mvm_alive_data { u32 scd_base_addr; }; -static inline const struct fw_img * -iwl_get_ucode_image(struct iwl_mvm *mvm, enum iwl_ucode_type ucode_type) -{ - if (ucode_type >= IWL_UCODE_TYPE_MAX) - return NULL; - - return &mvm->fw->img[ucode_type]; -} - static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant) { struct iwl_tx_ant_cfg_cmd tx_ant_cmd = { @@ -385,9 +376,7 @@ static int iwl_save_fw_paging(struct iwl_mvm *mvm, /* send paging cmd to FW in case CPU2 has paging image */ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) { - int blk_idx; - __le32 dev_phy_addr; - struct iwl_fw_paging_cmd fw_paging_cmd = { + struct iwl_fw_paging_cmd paging_cmd = { .flags = cpu_to_le32(PAGING_CMD_IS_SECURED | PAGING_CMD_IS_ENABLED | @@ -396,18 +385,32 @@ static int iwl_send_paging_cmd(struct iwl_mvm *mvm, const struct fw_img *fw) .block_size = cpu_to_le32(BLOCK_2_EXP_SIZE), .block_num = cpu_to_le32(mvm->num_of_paging_blk), }; + int blk_idx, size = sizeof(paging_cmd); + + /* A bit hard coded - but this is the old API and will be deprecated */ + if (!iwl_mvm_has_new_tx_api(mvm)) + size -= NUM_OF_FW_PAGING_BLOCKS * 4; /* loop for for all paging blocks + CSS block */ for (blk_idx = 0; blk_idx < mvm->num_of_paging_blk + 1; blk_idx++) { - dev_phy_addr = - cpu_to_le32(mvm->fw_paging_db[blk_idx].fw_paging_phys >> - PAGE_2_EXP_SIZE); - fw_paging_cmd.device_phy_addr[blk_idx] = dev_phy_addr; + dma_addr_t addr = mvm->fw_paging_db[blk_idx].fw_paging_phys; + + addr = addr >> PAGE_2_EXP_SIZE; + + if (iwl_mvm_has_new_tx_api(mvm)) { + __le64 phy_addr = cpu_to_le64(addr); + + paging_cmd.device_phy_addr.addr64[blk_idx] = phy_addr; + } else { + __le32 phy_addr = cpu_to_le32(addr); + + paging_cmd.device_phy_addr.addr32[blk_idx] = phy_addr; + } } return iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(FW_PAGING_BLOCK_CMD, IWL_ALWAYS_LONG_GROUP, 0), - 0, sizeof(fw_paging_cmd), &fw_paging_cmd); + 0, size, &paging_cmd); } /* @@ -580,9 +583,9 @@ static int iwl_mvm_load_ucode_wait_alive(struct iwl_mvm *mvm, iwl_fw_dbg_conf_usniffer(mvm->fw, FW_DBG_START_FROM_ALIVE) && !(fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_USNIFFER_UNIFIED))) - fw = iwl_get_ucode_image(mvm, IWL_UCODE_REGULAR_USNIFFER); + fw = iwl_get_ucode_image(mvm->fw, IWL_UCODE_REGULAR_USNIFFER); else - fw = iwl_get_ucode_image(mvm, ucode_type); + fw = iwl_get_ucode_image(mvm->fw, ucode_type); if (WARN_ON(!fw)) return -EINVAL; mvm->cur_ucode = ucode_type; @@ -826,6 +829,59 @@ out: return ret; } +static void iwl_mvm_parse_shared_mem_a000(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); +} + +static void iwl_mvm_parse_shared_mem(struct iwl_mvm *mvm, + struct iwl_rx_packet *pkt) +{ + struct iwl_shared_mem_cfg_v1 *mem_cfg = (void *)pkt->data; + int i; + + mvm->shared_mem_cfg.num_txfifo_entries = + ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); + for (i = 0; i < ARRAY_SIZE(mem_cfg->txfifo_size); i++) + mvm->shared_mem_cfg.txfifo_size[i] = + le32_to_cpu(mem_cfg->txfifo_size[i]); + for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) + mvm->shared_mem_cfg.rxfifo_size[i] = + le32_to_cpu(mem_cfg->rxfifo_size[i]); + + /* new API has more data, from rxfifo_addr field and on */ + if (fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { + BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != + sizeof(mem_cfg->internal_txfifo_size)); + + for (i = 0; + i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); + i++) + mvm->shared_mem_cfg.internal_txfifo_size[i] = + le32_to_cpu(mem_cfg->internal_txfifo_size[i]); + } +} + static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) { struct iwl_host_cmd cmd = { @@ -833,9 +889,7 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) .data = { NULL, }, .len = { 0, }, }; - struct iwl_shared_mem_cfg *mem_cfg; struct iwl_rx_packet *pkt; - u32 i; lockdep_assert_held(&mvm->mutex); @@ -849,45 +903,10 @@ static void iwl_mvm_get_shared_mem_conf(struct iwl_mvm *mvm) return; pkt = cmd.resp_pkt; - mem_cfg = (void *)pkt->data; - - mvm->shared_mem_cfg.shared_mem_addr = - le32_to_cpu(mem_cfg->shared_mem_addr); - mvm->shared_mem_cfg.shared_mem_size = - le32_to_cpu(mem_cfg->shared_mem_size); - mvm->shared_mem_cfg.sample_buff_addr = - le32_to_cpu(mem_cfg->sample_buff_addr); - mvm->shared_mem_cfg.sample_buff_size = - le32_to_cpu(mem_cfg->sample_buff_size); - mvm->shared_mem_cfg.txfifo_addr = le32_to_cpu(mem_cfg->txfifo_addr); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.txfifo_size); i++) - mvm->shared_mem_cfg.txfifo_size[i] = - le32_to_cpu(mem_cfg->txfifo_size[i]); - for (i = 0; i < ARRAY_SIZE(mvm->shared_mem_cfg.rxfifo_size); i++) - mvm->shared_mem_cfg.rxfifo_size[i] = - le32_to_cpu(mem_cfg->rxfifo_size[i]); - mvm->shared_mem_cfg.page_buff_addr = - le32_to_cpu(mem_cfg->page_buff_addr); - mvm->shared_mem_cfg.page_buff_size = - le32_to_cpu(mem_cfg->page_buff_size); - - /* new API has more data */ - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_EXTEND_SHARED_MEM_CFG)) { - mvm->shared_mem_cfg.rxfifo_addr = - le32_to_cpu(mem_cfg->rxfifo_addr); - mvm->shared_mem_cfg.internal_txfifo_addr = - le32_to_cpu(mem_cfg->internal_txfifo_addr); - - BUILD_BUG_ON(sizeof(mvm->shared_mem_cfg.internal_txfifo_size) != - sizeof(mem_cfg->internal_txfifo_size)); - - for (i = 0; - i < ARRAY_SIZE(mvm->shared_mem_cfg.internal_txfifo_size); - i++) - mvm->shared_mem_cfg.internal_txfifo_size[i] = - le32_to_cpu(mem_cfg->internal_txfifo_size[i]); - } + if (iwl_mvm_has_new_tx_api(mvm)) + iwl_mvm_parse_shared_mem_a000(mvm, pkt); + else + iwl_mvm_parse_shared_mem(mvm, pkt); IWL_DEBUG_INFO(mvm, "SHARED MEM CFG: got memory offsets/sizes\n"); @@ -1027,19 +1046,11 @@ static int iwl_mvm_sar_init(struct iwl_mvm *mvm) { struct iwl_mvm_sar_table sar_table; struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS), }; int ret, i, j, idx; int len = sizeof(cmd); - /* we can't do anything with the table if the FW doesn't support it */ - if (!fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_TX_POWER_CHAIN)) { - IWL_DEBUG_RADIO(mvm, - "FW doesn't support per-chain TX power settings.\n"); - return 0; - } - if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); @@ -1096,27 +1107,27 @@ int iwl_mvm_up(struct iwl_mvm *mvm) * (for example, if we were in RFKILL) */ ret = iwl_run_init_mvm_ucode(mvm, false); - if (ret && !iwlmvm_mod_params.init_dbg) { + + if (iwlmvm_mod_params.init_dbg) + return 0; + + if (ret) { IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", ret); /* this can't happen */ if (WARN_ON(ret > 0)) ret = -ERFKILL; goto error; } - if (!iwlmvm_mod_params.init_dbg) { - /* - * Stop and start the transport without entering low power - * mode. This will save the state of other components on the - * device that are triggered by the INIT firwmare (MFUART). - */ - _iwl_trans_stop_device(mvm->trans, false); - ret = _iwl_trans_start_hw(mvm->trans, false); - if (ret) - goto error; - } - if (iwlmvm_mod_params.init_dbg) - return 0; + /* + * Stop and start the transport without entering low power + * mode. This will save the state of other components on the + * device that are triggered by the INIT firwmare (MFUART). + */ + _iwl_trans_stop_device(mvm->trans, false); + ret = _iwl_trans_start_hw(mvm->trans, false); + if (ret) + goto error; ret = iwl_mvm_load_ucode_wait_alive(mvm, IWL_UCODE_REGULAR); if (ret) { @@ -1214,9 +1225,12 @@ int iwl_mvm_up(struct iwl_mvm *mvm) } /* TODO: read the budget from BIOS / Platform NVM */ - if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) + if (iwl_mvm_is_ctdp_supported(mvm) && mvm->cooling_dev.cur_state > 0) { ret = iwl_mvm_ctdp_command(mvm, CTDP_CMD_OPERATION_START, mvm->cooling_dev.cur_state); + if (ret) + goto error; + } #else /* Initialize tx backoffs to the minimal possible */ iwl_mvm_tt_tx_backoff(mvm, 0); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c index 69c42ce45b8a..6b962d6b067a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac-ctxt.c @@ -539,6 +539,11 @@ void iwl_mvm_mac_ctxt_release(struct iwl_mvm *mvm, struct ieee80211_vif *vif) iwl_mvm_disable_txq(mvm, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MVM_OFFCHANNEL_QUEUE, IWL_MAX_TID_COUNT, 0); + else + iwl_mvm_disable_txq(mvm, + IWL_MVM_DQA_P2P_DEVICE_QUEUE, + vif->hw_queue[0], IWL_MAX_TID_COUNT, + 0); break; case NL80211_IFTYPE_AP: @@ -769,26 +774,6 @@ static void iwl_mvm_mac_ctxt_cmd_common(struct iwl_mvm *mvm, cmd->ac[txf].fifos_mask = BIT(txf); } - if (vif->type == NL80211_IFTYPE_AP) { - /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ - cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= - BIT(IWL_MVM_TX_FIFO_MCAST); - - /* - * in AP mode, pass probe requests and beacons from other APs - * (needed for ht protection); when there're no any associated - * station don't ask FW to pass beacons to prevent unnecessary - * wake-ups. - */ - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); - if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { - cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); - IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); - } else { - IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); - } - } - if (vif->bss_conf.qos) cmd->qos_flags |= cpu_to_le32(MAC_QOS_FLG_UPDATE_EDCA); @@ -1186,6 +1171,7 @@ static void iwl_mvm_mac_ap_iterator(void *_data, u8 *mac, */ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, struct ieee80211_vif *vif, + struct iwl_mac_ctx_cmd *cmd, struct iwl_mac_data_ap *ctxt_ap, bool add) { @@ -1196,6 +1182,23 @@ static void iwl_mvm_mac_ctxt_cmd_fill_ap(struct iwl_mvm *mvm, .beacon_device_ts = 0 }; + /* in AP mode, the MCAST FIFO takes the EDCA params from VO */ + cmd->ac[IWL_MVM_TX_FIFO_VO].fifos_mask |= BIT(IWL_MVM_TX_FIFO_MCAST); + + /* + * in AP mode, pass probe requests and beacons from other APs + * (needed for ht protection); when there're no any associated + * station don't ask FW to pass beacons to prevent unnecessary + * wake-ups. + */ + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_PROBE_REQUEST); + if (mvmvif->ap_assoc_sta_count || !mvm->drop_bcn_ap_mode) { + cmd->filter_flags |= cpu_to_le32(MAC_FILTER_IN_BEACON); + IWL_DEBUG_HC(mvm, "Asking FW to pass beacons\n"); + } else { + IWL_DEBUG_HC(mvm, "No need to receive beacons\n"); + } + ctxt_ap->bi = cpu_to_le32(vif->bss_conf.beacon_int); ctxt_ap->bi_reciprocal = cpu_to_le32(iwl_mvm_reciprocal(vif->bss_conf.beacon_int)); @@ -1253,7 +1256,7 @@ static int iwl_mvm_mac_ctxt_cmd_ap(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for ap mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.ap, action == FW_CTXT_ACTION_ADD); return iwl_mvm_mac_ctxt_send_cmd(mvm, &cmd); @@ -1272,7 +1275,7 @@ static int iwl_mvm_mac_ctxt_cmd_go(struct iwl_mvm *mvm, iwl_mvm_mac_ctxt_cmd_common(mvm, vif, &cmd, NULL, action); /* Fill the data specific for GO mode */ - iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd.go.ap, + iwl_mvm_mac_ctxt_cmd_fill_ap(mvm, vif, &cmd, &cmd.go.ap, action == FW_CTXT_ACTION_ADD); cmd.go.ctwin = cpu_to_le32(noa->oppps_ctwindow & diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index 5dd77e336617..318efd814037 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -465,7 +465,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES; hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP; - BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 4); + BUILD_BUG_ON(ARRAY_SIZE(mvm->ciphers) < ARRAY_SIZE(mvm_ciphers) + 6); memcpy(mvm->ciphers, mvm_ciphers, sizeof(mvm_ciphers)); hw->wiphy->n_cipher_suites = ARRAY_SIZE(mvm_ciphers); hw->wiphy->cipher_suites = mvm->ciphers; @@ -479,17 +479,23 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->n_cipher_suites++; } - /* - * Enable 11w if advertised by firmware and software crypto - * is not enabled (as the firmware will interpret some mgmt - * packets, so enabling it with software crypto isn't safe) + /* Enable 11w if software crypto is not enabled (as the + * firmware will interpret some mgmt packets, so enabling it + * with software crypto isn't safe). */ - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_MFP && - !iwlwifi_mod_params.sw_crypto) { + if (!iwlwifi_mod_params.sw_crypto) { ieee80211_hw_set(hw, MFP_CAPABLE); mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC; hw->wiphy->n_cipher_suites++; + if (iwl_mvm_has_new_rx_api(mvm)) { + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_128; + hw->wiphy->n_cipher_suites++; + mvm->ciphers[hw->wiphy->n_cipher_suites] = + WLAN_CIPHER_SUITE_BIP_GMAC_256; + hw->wiphy->n_cipher_suites++; + } } /* currently FW API supports only one optional cipher scheme */ @@ -539,9 +545,7 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) hw->wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | REGULATORY_DISABLE_BEACON_HINTS; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_GO_UAPSD) - hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; - + hw->wiphy->flags |= WIPHY_FLAG_AP_UAPSD; hw->wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH; hw->wiphy->iface_combinations = iwl_mvm_iface_combinations; @@ -645,6 +649,16 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_WFA_TPC_REP_IE_SUPPORT)) hw->wiphy->features |= NL80211_FEATURE_WFA_TPC_IE_IN_PROBES; + if (fw_has_api(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_API_SCAN_TSF_REPORT)) { + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SCAN_START_TIME); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_BSS_PARENT_TSF); + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_SET_SCAN_DWELL); + } + mvm->rts_threshold = IEEE80211_MAX_RTS_THRESHOLD; #ifdef CONFIG_PM_SLEEP @@ -712,6 +726,10 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm) if (ret) iwl_mvm_leds_exit(mvm); + if (mvm->cfg->vht_mu_mimo_supported) + wiphy_ext_feature_set(hw->wiphy, + NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER); + return ret; } @@ -1251,20 +1269,18 @@ static int iwl_mvm_set_tx_power(struct iwl_mvm *mvm, struct ieee80211_vif *vif, s16 tx_power) { struct iwl_dev_tx_power_cmd cmd = { - .v3.v2.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), - .v3.v2.mac_context_id = + .v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_MAC), + .v3.mac_context_id = cpu_to_le32(iwl_mvm_vif_from_mac80211(vif)->id), - .v3.v2.pwr_restriction = cpu_to_le16(8 * tx_power), + .v3.pwr_restriction = cpu_to_le16(8 * tx_power), }; int len = sizeof(cmd); if (tx_power == IWL_DEFAULT_MAX_TX_POWER) - cmd.v3.v2.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); + cmd.v3.pwr_restriction = cpu_to_le16(IWL_DEV_MAX_TX_POWER); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TX_POWER_ACK)) len = sizeof(cmd.v3); - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_TX_POWER_CHAIN)) - len = sizeof(cmd.v3.v2); return iwl_mvm_send_cmd_pdu(mvm, REDUCE_TX_POWER_CMD, 0, len, &cmd); } @@ -2221,6 +2237,10 @@ static void iwl_mvm_bss_info_changed(struct ieee80211_hw *hw, case NL80211_IFTYPE_ADHOC: iwl_mvm_bss_info_changed_ap_ibss(mvm, vif, bss_conf, changes); break; + case NL80211_IFTYPE_MONITOR: + if (changes & BSS_CHANGED_MU_GROUPS) + iwl_mvm_update_mu_groups(mvm, vif); + break; default: /* shouldn't happen */ WARN_ON_ONCE(1); @@ -2747,6 +2767,8 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE; break; case WLAN_CIPHER_SUITE_AES_CMAC: + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: WARN_ON_ONCE(!ieee80211_hw_check(hw, MFP_CAPABLE)); break; case WLAN_CIPHER_SUITE_WEP40: @@ -2780,9 +2802,11 @@ static int iwl_mvm_mac_set_key(struct ieee80211_hw *hw, * GTK on AP interface is a TX-only key, return 0; * on IBSS they're per-station and because we're lazy * we don't support them for RX, so do the same. - * CMAC in AP/IBSS modes must be done in software. + * CMAC/GMAC in AP/IBSS modes must be done in software. */ - if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (key->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + key->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) ret = -EOPNOTSUPP; else ret = 0; diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h index 6a615bb73042..d17cbf603f7c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h @@ -604,16 +604,9 @@ enum iwl_mvm_tdls_cs_state { }; struct iwl_mvm_shared_mem_cfg { - u32 shared_mem_addr; - u32 shared_mem_size; - u32 sample_buff_addr; - u32 sample_buff_size; - u32 txfifo_addr; + int num_txfifo_entries; u32 txfifo_size[TX_FIFO_MAX_NUM]; u32 rxfifo_size[RX_FIFO_MAX_NUM]; - u32 page_buff_addr; - u32 page_buff_size; - u32 rxfifo_addr; u32 internal_txfifo_addr; u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM]; }; @@ -699,6 +692,10 @@ struct iwl_mvm_baid_data { * it. In this state, when a new queue is needed to be allocated but no * such free queue exists, an inactive queue might be freed and given to * the new RA/TID. + * @IWL_MVM_QUEUE_RECONFIGURING: queue is being reconfigured + * This is the state of a queue that has had traffic pass through it, but + * needs to be reconfigured for some reason, e.g. the queue needs to + * become unshared and aggregations re-enabled on. */ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_FREE, @@ -706,10 +703,11 @@ enum iwl_mvm_queue_status { IWL_MVM_QUEUE_READY, IWL_MVM_QUEUE_SHARED, IWL_MVM_QUEUE_INACTIVE, + IWL_MVM_QUEUE_RECONFIGURING, }; #define IWL_MVM_DQA_QUEUE_TIMEOUT (5 * HZ) -#define IWL_MVM_NUM_CIPHERS 8 +#define IWL_MVM_NUM_CIPHERS 10 struct iwl_mvm { /* for logger access */ @@ -769,6 +767,7 @@ struct iwl_mvm { u8 ra_sta_id; /* The RA this queue is mapped to, if exists */ bool reserved; /* Is this the TXQ reserved for a STA */ u8 mac80211_ac; /* The mac80211 AC this queue is mapped to */ + u8 txq_tid; /* The TID "owner" of this queue*/ u16 tid_bitmap; /* Bitmap of the TIDs mapped to this queue */ /* Timestamp for inactivation per TID of this queue */ unsigned long last_frame_time[IWL_MAX_TID_COUNT + 1]; @@ -822,6 +821,12 @@ struct iwl_mvm { /* UMAC scan tracking */ u32 scan_uid_status[IWL_MVM_MAX_UMAC_SCANS]; + /* start time of last scan in TSF of the mac that requested the scan */ + u64 scan_start; + + /* the vif that requested the current scan */ + struct iwl_mvm_vif *scan_vif; + /* rx chain antennas set through debugfs for the scan command */ u8 scan_rx_ant; @@ -1124,6 +1129,18 @@ static inline bool iwl_mvm_enter_d0i3_on_suspend(struct iwl_mvm *mvm) (mvm->trans->runtime_pm_mode != IWL_PLAT_PM_MODE_D0I3); } +static inline bool iwl_mvm_is_dqa_data_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_DATA_QUEUE); +} + +static inline bool iwl_mvm_is_dqa_mgmt_queue(struct iwl_mvm *mvm, u8 queue) +{ + return (queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE) && + (queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE); +} + static inline bool iwl_mvm_is_lar_supported(struct iwl_mvm *mvm) { bool nvm_lar = mvm->nvm_data->lar_enabled; @@ -1194,6 +1211,12 @@ static inline bool iwl_mvm_has_new_rx_api(struct iwl_mvm *mvm) IWL_UCODE_TLV_CAPA_MULTI_QUEUE_RX_SUPPORT); } +static inline bool iwl_mvm_has_new_tx_api(struct iwl_mvm *mvm) +{ + /* TODO - replace with TLV once defined */ + return mvm->trans->cfg->use_tfh; +} + static inline bool iwl_mvm_is_tt_in_fw(struct iwl_mvm *mvm) { #ifdef CONFIG_THERMAL @@ -1245,6 +1268,7 @@ u8 iwl_mvm_mac80211_idx_to_hwrate(int rate_idx); void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm); u8 first_antenna(u8 mask); u8 iwl_mvm_next_antenna(struct iwl_mvm *mvm, u8 valid, u8 last_idx); +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime); /* Tx / Host Commands */ int __must_check iwl_mvm_send_cmd(struct iwl_mvm *mvm, @@ -1281,8 +1305,6 @@ static inline void iwl_mvm_set_tx_cmd_ccmp(struct ieee80211_tx_info *info, tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); - if (info->flags & IEEE80211_TX_CTL_AMPDU) - tx_cmd->tx_flags |= cpu_to_le32(TX_CMD_FLG_CCMP_AGG); } static inline void iwl_mvm_wait_for_async_handlers(struct iwl_mvm *mvm) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c index 7a686f67f007..eade099b6dbf 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/nvm.c @@ -66,7 +66,6 @@ *****************************************************************************/ #include #include -#include #include "iwl-trans.h" #include "iwl-csr.h" #include "mvm.h" @@ -751,96 +750,6 @@ exit: return resp_cp; } -#ifdef CONFIG_ACPI -#define WRD_METHOD "WRDD" -#define WRDD_WIFI (0x07) -#define WRDD_WIGIG (0x10) - -static u32 iwl_mvm_wrdd_get_mcc(struct iwl_mvm *mvm, union acpi_object *wrdd) -{ - union acpi_object *mcc_pkg, *domain_type, *mcc_value; - u32 i; - - if (wrdd->type != ACPI_TYPE_PACKAGE || - wrdd->package.count < 2 || - wrdd->package.elements[0].type != ACPI_TYPE_INTEGER || - wrdd->package.elements[0].integer.value != 0) { - IWL_DEBUG_LAR(mvm, "Unsupported wrdd structure\n"); - return 0; - } - - for (i = 1 ; i < wrdd->package.count ; ++i) { - mcc_pkg = &wrdd->package.elements[i]; - - if (mcc_pkg->type != ACPI_TYPE_PACKAGE || - mcc_pkg->package.count < 2 || - mcc_pkg->package.elements[0].type != ACPI_TYPE_INTEGER || - mcc_pkg->package.elements[1].type != ACPI_TYPE_INTEGER) { - mcc_pkg = NULL; - continue; - } - - domain_type = &mcc_pkg->package.elements[0]; - if (domain_type->integer.value == WRDD_WIFI) - break; - - mcc_pkg = NULL; - } - - if (mcc_pkg) { - mcc_value = &mcc_pkg->package.elements[1]; - return mcc_value->integer.value; - } - - return 0; -} - -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - acpi_handle root_handle; - acpi_handle handle; - struct acpi_buffer wrdd = {ACPI_ALLOCATE_BUFFER, NULL}; - acpi_status status; - u32 mcc_val; - - root_handle = ACPI_HANDLE(mvm->dev); - if (!root_handle) { - IWL_DEBUG_LAR(mvm, - "Could not retrieve root port ACPI handle\n"); - return -ENOENT; - } - - /* Get the method's handle */ - status = acpi_get_handle(root_handle, (acpi_string)WRD_METHOD, &handle); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRD method not found\n"); - return -ENOENT; - } - - /* Call WRDD with no arguments */ - status = acpi_evaluate_object(handle, NULL, NULL, &wrdd); - if (ACPI_FAILURE(status)) { - IWL_DEBUG_LAR(mvm, "WRDC invocation failed (0x%x)\n", status); - return -ENOENT; - } - - mcc_val = iwl_mvm_wrdd_get_mcc(mvm, wrdd.pointer); - kfree(wrdd.pointer); - if (!mcc_val) - return -ENOENT; - - mcc[0] = (mcc_val >> 8) & 0xff; - mcc[1] = mcc_val & 0xff; - mcc[2] = '\0'; - return 0; -} -#else /* CONFIG_ACPI */ -static int iwl_mvm_get_bios_mcc(struct iwl_mvm *mvm, char *mcc) -{ - return -ENOENT; -} -#endif - int iwl_mvm_init_mcc(struct iwl_mvm *mvm) { bool tlv_lar; @@ -884,7 +793,7 @@ int iwl_mvm_init_mcc(struct iwl_mvm *mvm) return -EIO; if (iwl_mvm_is_wifi_mcc_supported(mvm) && - !iwl_mvm_get_bios_mcc(mvm, mcc)) { + !iwl_get_bios_mcc(mvm->dev, mcc)) { kfree(regd); regd = iwl_mvm_get_regdomain(mvm->hw->wiphy, mcc, MCC_SOURCE_BIOS, NULL); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c index 55d9096da68c..05fe6dd1a2c8 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c @@ -359,6 +359,7 @@ static const struct iwl_hcmd_names iwl_mvm_legacy_names[] = { HCMD_NAME(BT_COEX_CI), HCMD_NAME(PHY_CONFIGURATION_CMD), HCMD_NAME(CALIB_RES_NOTIF_PHY_DB), + HCMD_NAME(PHY_DB_CMD), HCMD_NAME(SCAN_OFFLOAD_COMPLETE), HCMD_NAME(SCAN_OFFLOAD_UPDATE_PROFILES_CMD), HCMD_NAME(SCAN_OFFLOAD_CONFIG_CMD), @@ -652,11 +653,9 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, /* the hardware splits the A-MSDU */ if (mvm->cfg->mq_rx_supported) trans_cfg.rx_buf_size = IWL_AMSDU_4K; - trans_cfg.wide_cmd_header = fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_WIDE_CMD_HDR); - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_DW_BC_TABLE) - trans_cfg.bc_table_dword = true; + trans->wide_cmd_header = true; + trans_cfg.bc_table_dword = true; trans_cfg.command_groups = iwl_mvm_groups; trans_cfg.command_groups_size = ARRAY_SIZE(iwl_mvm_groups); @@ -711,37 +710,21 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, IWL_DEBUG_EEPROM(mvm->trans->dev, "working without external nvm file\n"); - if (WARN(cfg->no_power_up_nic_in_init && !mvm->nvm_file_name, - "not allowing power-up and not having nvm_file\n")) + err = iwl_trans_start_hw(mvm->trans); + if (err) goto out_free; - /* - * Even if nvm exists in the nvm_file driver should read again the nvm - * from the nic because there might be entries that exist in the OTP - * and not in the file. - * for nics with no_power_up_nic_in_init: rely completley on nvm_file - */ - if (cfg->no_power_up_nic_in_init && mvm->nvm_file_name) { - err = iwl_nvm_init(mvm, false); - if (err) - goto out_free; - } else { - err = iwl_trans_start_hw(mvm->trans); - if (err) - goto out_free; - - mutex_lock(&mvm->mutex); - iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); - err = iwl_run_init_mvm_ucode(mvm, true); - if (!err || !iwlmvm_mod_params.init_dbg) - iwl_mvm_stop_device(mvm); - iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); - mutex_unlock(&mvm->mutex); - /* returns 0 if successful, 1 if success but in rfkill */ - if (err < 0 && !iwlmvm_mod_params.init_dbg) { - IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); - goto out_free; - } + mutex_lock(&mvm->mutex); + iwl_mvm_ref(mvm, IWL_MVM_REF_INIT_UCODE); + err = iwl_run_init_mvm_ucode(mvm, true); + if (!err || !iwlmvm_mod_params.init_dbg) + iwl_mvm_stop_device(mvm); + iwl_mvm_unref(mvm, IWL_MVM_REF_INIT_UCODE); + mutex_unlock(&mvm->mutex); + /* returns 0 if successful, 1 if success but in rfkill */ + if (err < 0 && !iwlmvm_mod_params.init_dbg) { + IWL_ERR(mvm, "Failed to run INIT ucode: %d\n", err); + goto out_free; } scan_size = iwl_mvm_scan_size(mvm); @@ -783,8 +766,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg, flush_delayed_work(&mvm->fw_dump_wk); iwl_phy_db_free(mvm->phy_db); kfree(mvm->scan_cmd); - if (!cfg->no_power_up_nic_in_init || !mvm->nvm_file_name) - iwl_trans_op_mode_leave(trans); + iwl_trans_op_mode_leave(trans); + ieee80211_free_hw(mvm->hw); return NULL; } @@ -857,9 +840,7 @@ static void iwl_mvm_async_handlers_wk(struct work_struct *wk) struct iwl_mvm *mvm = container_of(wk, struct iwl_mvm, async_handlers_wk); struct iwl_async_handler_entry *entry, *tmp; - struct list_head local_list; - - INIT_LIST_HEAD(&local_list); + LIST_HEAD(local_list); /* Ensure that we are not in stop flow (check iwl_mvm_mac_stop) */ @@ -966,10 +947,11 @@ static void iwl_mvm_rx(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_rx_mpdu(mvm, napi, rxb); - else if (pkt->hdr.cmd == REPLY_RX_PHY_CMD) + else if (cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_PHY_CMD)) iwl_mvm_rx_rx_phy_cmd(mvm, rxb); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -981,13 +963,14 @@ static void iwl_mvm_rx_mq(struct iwl_op_mode *op_mode, { struct iwl_rx_packet *pkt = rxb_addr(rxb); struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (likely(pkt->hdr.cmd == REPLY_RX_MPDU_CMD)) + if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, 0); - else if (unlikely(pkt->hdr.group_id == DATA_PATH_GROUP && - pkt->hdr.cmd == RX_QUEUES_NOTIFICATION)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, 0); - else if (pkt->hdr.cmd == FRAME_RELEASE) + else if (cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE)) iwl_mvm_rx_frame_release(mvm, napi, rxb, 0); else iwl_mvm_rx_common(mvm, rxb, pkt); @@ -1666,13 +1649,14 @@ static void iwl_mvm_rx_mq_rss(struct iwl_op_mode *op_mode, { struct iwl_mvm *mvm = IWL_OP_MODE_GET_MVM(op_mode); struct iwl_rx_packet *pkt = rxb_addr(rxb); + u16 cmd = WIDE_ID(pkt->hdr.group_id, pkt->hdr.cmd); - if (unlikely(pkt->hdr.cmd == FRAME_RELEASE)) + if (unlikely(cmd == WIDE_ID(LEGACY_GROUP, FRAME_RELEASE))) iwl_mvm_rx_frame_release(mvm, napi, rxb, queue); - else if (unlikely(pkt->hdr.cmd == RX_QUEUES_NOTIFICATION && - pkt->hdr.group_id == DATA_PATH_GROUP)) + else if (unlikely(cmd == WIDE_ID(DATA_PATH_GROUP, + RX_QUEUES_NOTIFICATION))) iwl_mvm_rx_queue_notif(mvm, rxb, queue); - else + else if (likely(cmd == WIDE_ID(LEGACY_GROUP, REPLY_RX_MPDU_CMD))) iwl_mvm_rx_mpdu_mq(mvm, napi, rxb, queue); } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/power.c b/drivers/net/wireless/intel/iwlwifi/mvm/power.c index ff85865b1dda..af6d10c23e5a 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/power.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/power.c @@ -694,8 +694,7 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, /* enable PM on p2p if p2p stand alone */ if (vifs->p2p_active && !vifs->bss_active && !vifs->ap_active) { - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; return; } @@ -707,12 +706,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, ap_mvmvif->phy_ctxt->id); /* clients are not stand alone: enable PM if DCM */ - if (!(client_same_channel || ap_same_channel) && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_DCM)) { + if (!(client_same_channel || ap_same_channel)) { if (vifs->bss_active) bss_mvmvif->pm_enabled = true; - if (vifs->p2p_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM)) + if (vifs->p2p_active) p2p_mvmvif->pm_enabled = true; return; } @@ -721,12 +718,10 @@ static void iwl_mvm_power_set_pm(struct iwl_mvm *mvm, * There is only one channel in the system and there are only * bss and p2p clients that share it */ - if (client_same_channel && !vifs->ap_active && - (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_BSS_P2P_PS_SCM)) { + if (client_same_channel && !vifs->ap_active) { /* share same channel*/ bss_mvmvif->pm_enabled = true; - if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_P2P_PM) - p2p_mvmvif->pm_enabled = true; + p2p_mvmvif->pm_enabled = true; } } diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index df6c32caa5f0..a57c6ef5bc14 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -132,7 +132,8 @@ static inline int iwl_mvm_check_pn(struct iwl_mvm *mvm, struct sk_buff *skb, IEEE80211_CCMP_PN_LEN) <= 0) return -1; - memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); + if (!(stats->flag & RX_FLAG_AMSDU_MORE)) + memcpy(ptk_pn->q[queue].pn[tid], pn, IEEE80211_CCMP_PN_LEN); stats->flag |= RX_FLAG_PN_VALIDATED; return 0; @@ -417,10 +418,11 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, ssn = ieee80211_sn_inc(ssn); - /* holes are valid since nssn indicates frames were received. */ - if (skb_queue_empty(skb_list) || !skb_peek_tail(skb_list)) - continue; - /* Empty the list. Will have more than one frame for A-MSDU */ + /* + * Empty the list. Will have more than one frame for A-MSDU. + * Empty list is valid as well since nssn indicates frames were + * received. + */ while ((skb = __skb_dequeue(skb_list))) { iwl_mvm_pass_packet_to_mac80211(mvm, napi, skb, reorder_buf->queue, @@ -433,7 +435,7 @@ static void iwl_mvm_release_frames(struct iwl_mvm *mvm, if (reorder_buf->num_stored && !reorder_buf->removed) { u16 index = reorder_buf->head_sn % reorder_buf->buf_size; - while (!skb_peek_tail(&reorder_buf->entries[index])) + while (skb_queue_empty(&reorder_buf->entries[index])) index = (index + 1) % reorder_buf->buf_size; /* modify timer to match next frame's expiration time */ mod_timer(&reorder_buf->reorder_timer, @@ -451,17 +453,17 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) u16 sn = 0, index = 0; bool expired = false; - spin_lock_bh(&buf->lock); + spin_lock(&buf->lock); if (!buf->num_stored || buf->removed) { - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); return; } for (i = 0; i < buf->buf_size ; i++) { index = (buf->head_sn + i) % buf->buf_size; - if (!skb_peek_tail(&buf->entries[index])) + if (skb_queue_empty(&buf->entries[index])) continue; if (!time_after(jiffies, buf->reorder_time[index] + RX_REORDER_BUF_TIMEOUT_MQ)) @@ -491,7 +493,7 @@ void iwl_mvm_reorder_timer_expired(unsigned long data) buf->reorder_time[index] + 1 + RX_REORDER_BUF_TIMEOUT_MQ); } - spin_unlock_bh(&buf->lock); + spin_unlock(&buf->lock); } static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, @@ -502,7 +504,7 @@ static void iwl_mvm_del_ba(struct iwl_mvm *mvm, int queue, struct iwl_mvm_reorder_buffer *reorder_buf; u8 baid = data->baid; - if (WARN_ON_ONCE(baid >= IWL_RX_REORDER_DATA_INVALID_BAID)) + if (WARN_ONCE(baid >= IWL_MAX_BAID, "invalid BAID: %x\n", baid)) return; rcu_read_lock(); @@ -589,6 +591,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, baid = (reorder & IWL_RX_MPDU_REORDER_BAID_MASK) >> IWL_RX_MPDU_REORDER_BAID_SHIFT; + /* + * This also covers the case of receiving a Block Ack Request + * outside a BA session; we'll pass it to mac80211 and that + * then sends a delBA action frame. + */ if (baid == IWL_RX_REORDER_DATA_INVALID_BAID) return false; @@ -598,9 +605,10 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, mvm_sta = iwl_mvm_sta_from_mac80211(sta); - /* not a data packet */ - if (!ieee80211_is_data_qos(hdr->frame_control) || - is_multicast_ether_addr(hdr->addr1)) + /* not a data packet or a bar */ + if (!ieee80211_is_back_req(hdr->frame_control) && + (!ieee80211_is_data_qos(hdr->frame_control) || + is_multicast_ether_addr(hdr->addr1))) return false; if (unlikely(!ieee80211_is_data_present(hdr->frame_control))) @@ -624,6 +632,11 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, spin_lock_bh(&buffer->lock); + if (ieee80211_is_back_req(hdr->frame_control)) { + iwl_mvm_release_frames(mvm, sta, napi, buffer, nssn); + goto drop; + } + /* * If there was a significant jump in the nssn - adjust. * If the SN is smaller than the NSSN it might need to first go into @@ -883,6 +896,9 @@ void iwl_mvm_rx_mpdu_mq(struct iwl_mvm *mvm, struct napi_struct *napi, u8 *qc = ieee80211_get_qos_ctl(hdr); *qc &= ~IEEE80211_QOS_CTL_A_MSDU_PRESENT; + if (!(desc->amsdu_info & + IWL_RX_MPDU_AMSDU_LAST_SUBFRAME)) + rx_status->flag |= RX_FLAG_AMSDU_MORE; } if (baid != IWL_RX_REORDER_DATA_INVALID_BAID) iwl_mvm_agg_rx_received(mvm, baid); diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index dac120f8861b..f279fdd6eb44 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -141,6 +141,7 @@ struct iwl_mvm_scan_params { struct cfg80211_match_set *match_sets; int n_scan_plans; struct cfg80211_sched_scan_plan *scan_plans; + u32 measurement_dwell; }; static u8 iwl_mvm_scan_rx_ant(struct iwl_mvm *mvm) @@ -232,6 +233,27 @@ iwl_mvm_scan_type iwl_mvm_get_scan_type(struct iwl_mvm *mvm, bool p2p_device) return IWL_SCAN_TYPE_WILD; } +static int +iwl_mvm_get_measurement_dwell(struct iwl_mvm *mvm, + struct cfg80211_scan_request *req, + struct iwl_mvm_scan_params *params) +{ + if (!req->duration) + return 0; + + if (req->duration_mandatory && + req->duration > scan_timing[params->type].max_out_time) { + IWL_DEBUG_SCAN(mvm, + "Measurement scan - too long dwell %hu (max out time %u)\n", + req->duration, + scan_timing[params->type].max_out_time); + return -EOPNOTSUPP; + } + + return min_t(u32, (u32)req->duration, + scan_timing[params->type].max_out_time); +} + static inline bool iwl_mvm_rrm_scan_needed(struct iwl_mvm *mvm) { /* require rrm scan whenever the fw supports it */ @@ -717,22 +739,6 @@ iwl_mvm_build_scan_probe(struct iwl_mvm *mvm, struct ieee80211_vif *vif, params->preq.common_data.len = cpu_to_le16(ies->common_ie_len); } -static __le32 iwl_mvm_scan_priority(struct iwl_mvm *mvm, - enum iwl_scan_priority_ext prio) -{ - if (fw_has_api(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_API_EXT_SCAN_PRIORITY)) - return cpu_to_le32(prio); - - if (prio <= IWL_SCAN_PRIORITY_EXT_2) - return cpu_to_le32(IWL_SCAN_PRIORITY_LOW); - - if (prio <= IWL_SCAN_PRIORITY_EXT_4) - return cpu_to_le32(IWL_SCAN_PRIORITY_MEDIUM); - - return cpu_to_le32(IWL_SCAN_PRIORITY_HIGH); -} - static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_lmac *cmd, struct iwl_mvm_scan_params *params) @@ -743,7 +749,7 @@ static void iwl_mvm_scan_lmac_dwell(struct iwl_mvm *mvm, cmd->extended_dwell = scan_timing[params->type].dwell_extended; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_prio = iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_prio = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); } static inline bool iwl_mvm_scan_fits(struct iwl_mvm *mvm, int n_ssids, @@ -1033,21 +1039,24 @@ static void iwl_mvm_scan_umac_dwell(struct iwl_mvm *mvm, struct iwl_scan_req_umac *cmd, struct iwl_mvm_scan_params *params) { - cmd->extended_dwell = scan_timing[params->type].dwell_extended; - cmd->active_dwell = scan_timing[params->type].dwell_active; - cmd->passive_dwell = scan_timing[params->type].dwell_passive; + if (params->measurement_dwell) { + cmd->active_dwell = params->measurement_dwell; + cmd->passive_dwell = params->measurement_dwell; + cmd->extended_dwell = params->measurement_dwell; + } else { + cmd->active_dwell = scan_timing[params->type].dwell_active; + cmd->passive_dwell = scan_timing[params->type].dwell_passive; + cmd->extended_dwell = scan_timing[params->type].dwell_extended; + } cmd->fragmented_dwell = scan_timing[params->type].dwell_fragmented; cmd->max_out_time = cpu_to_le32(scan_timing[params->type].max_out_time); cmd->suspend_time = cpu_to_le32(scan_timing[params->type].suspend_time); - cmd->scan_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->scan_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); if (iwl_mvm_is_regular_scan(params)) - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_6); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_6); else - cmd->ooc_priority = - iwl_mvm_scan_priority(mvm, IWL_SCAN_PRIORITY_EXT_2); + cmd->ooc_priority = cpu_to_le32(IWL_SCAN_PRIORITY_EXT_2); } static void @@ -1067,11 +1076,11 @@ iwl_mvm_umac_scan_cfg_channels(struct iwl_mvm *mvm, } } -static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, +static u16 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, struct iwl_mvm_scan_params *params, struct ieee80211_vif *vif) { - int flags = 0; + u16 flags = 0; if (params->n_ssids == 0) flags = IWL_UMAC_SCAN_GEN_FLAGS_PASSIVE; @@ -1093,6 +1102,9 @@ static u32 iwl_mvm_scan_umac_flags(struct iwl_mvm *mvm, if (!iwl_mvm_is_regular_scan(params)) flags |= IWL_UMAC_SCAN_GEN_FLAGS_PERIODIC; + if (params->measurement_dwell) + flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; + #ifdef CONFIG_IWLWIFI_DEBUGFS if (mvm->scan_iter_notif_enabled) flags |= IWL_UMAC_SCAN_GEN_FLAGS_ITER_COMPLETE; @@ -1119,6 +1131,7 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->fw->ucode_capa.n_scan_channels; int uid, i; u32 ssid_bitmap = 0; + struct iwl_mvm_vif *scan_vif = iwl_mvm_vif_from_mac80211(vif); lockdep_assert_held(&mvm->mutex); @@ -1136,8 +1149,9 @@ static int iwl_mvm_scan_umac(struct iwl_mvm *mvm, struct ieee80211_vif *vif, mvm->scan_uid_status[uid] = type; cmd->uid = cpu_to_le32(uid); - cmd->general_flags = cpu_to_le32(iwl_mvm_scan_umac_flags(mvm, params, + cmd->general_flags = cpu_to_le16(iwl_mvm_scan_umac_flags(mvm, params, vif)); + cmd->scan_start_mac_id = scan_vif->id; if (type == IWL_MVM_SCAN_SCHED || type == IWL_MVM_SCAN_NETDETECT) cmd->flags = cpu_to_le32(IWL_UMAC_SCAN_FLAG_PREEMPTIVE); @@ -1289,6 +1303,12 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_scan_type(mvm, vif->type == NL80211_IFTYPE_P2P_DEVICE); + ret = iwl_mvm_get_measurement_dwell(mvm, req, ¶ms); + if (ret < 0) + return ret; + + params.measurement_dwell = ret; + iwl_mvm_build_scan_probe(mvm, vif, ies, ¶ms); if (fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_UMAC_SCAN)) { @@ -1315,6 +1335,7 @@ int iwl_mvm_reg_scan_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, IWL_DEBUG_SCAN(mvm, "Scan request was sent successfully\n"); mvm->scan_status |= IWL_MVM_SCAN_REGULAR; + mvm->scan_vif = iwl_mvm_vif_from_mac80211(vif); iwl_mvm_ref(mvm, IWL_MVM_REF_SCAN); queue_delayed_work(system_wq, &mvm->scan_timeout_dwork, @@ -1437,9 +1458,12 @@ void iwl_mvm_rx_umac_scan_complete_notif(struct iwl_mvm *mvm, if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_REGULAR) { struct cfg80211_scan_info info = { .aborted = aborted, + .scan_start_tsf = mvm->scan_start, }; + memcpy(info.tsf_bssid, mvm->scan_vif->bssid, ETH_ALEN); ieee80211_scan_completed(mvm->hw, &info); + mvm->scan_vif = NULL; iwl_mvm_unref(mvm, IWL_MVM_REF_SCAN); cancel_delayed_work(&mvm->scan_timeout_dwork); } else if (mvm->scan_uid_status[uid] == IWL_MVM_SCAN_SCHED) { @@ -1473,6 +1497,8 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, struct iwl_umac_scan_iter_complete_notif *notif = (void *)pkt->data; u8 buf[256]; + mvm->scan_start = le64_to_cpu(notif->start_tsf); + IWL_DEBUG_SCAN(mvm, "UMAC Scan iteration complete: status=0x%x scanned_channels=%d channels list: %s\n", notif->status, notif->scanned_channels, @@ -1485,6 +1511,10 @@ void iwl_mvm_rx_umac_scan_iter_complete_notif(struct iwl_mvm *mvm, ieee80211_sched_scan_results(mvm->hw); mvm->sched_scan_pass_all = SCHED_SCAN_PASS_ALL_ENABLED; } + + IWL_DEBUG_SCAN(mvm, + "UMAC Scan iteration complete: scan started at %llu (TSF)\n", + mvm->scan_start); } static int iwl_mvm_umac_scan_abort(struct iwl_mvm *mvm, int type) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 3130b9c68a74..fc771885e383 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -468,6 +468,11 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, i != IWL_MVM_DQA_BSS_CLIENT_QUEUE) continue; + /* Don't try and take queues being reconfigured */ + if (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING) + continue; + ac_to_queue[mvm->queue_info[i].mac80211_ac] = i; } @@ -501,31 +506,37 @@ static int iwl_mvm_get_shared_queue(struct iwl_mvm *mvm, queue = ac_to_queue[IEEE80211_AC_VO]; /* Make sure queue found (or not) is legal */ - if (!((queue >= IWL_MVM_DQA_MIN_MGMT_QUEUE && - queue <= IWL_MVM_DQA_MAX_MGMT_QUEUE) || - (queue >= IWL_MVM_DQA_MIN_DATA_QUEUE && - queue <= IWL_MVM_DQA_MAX_DATA_QUEUE) || - (queue == IWL_MVM_DQA_BSS_CLIENT_QUEUE))) { + if (!iwl_mvm_is_dqa_data_queue(mvm, queue) && + !iwl_mvm_is_dqa_mgmt_queue(mvm, queue) && + (queue != IWL_MVM_DQA_BSS_CLIENT_QUEUE)) { IWL_ERR(mvm, "No DATA queues available to share\n"); - queue = -ENOSPC; + return -ENOSPC; + } + + /* Make sure the queue isn't in the middle of being reconfigured */ + if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_RECONFIGURING) { + IWL_ERR(mvm, + "TXQ %d is in the middle of re-config - try again\n", + queue); + return -EBUSY; } return queue; } /* - * If a given queue has a higher AC than the TID stream that is being added to - * it, the queue needs to be redirected to the lower AC. This function does that + * If a given queue has a higher AC than the TID stream that is being compared + * to, the queue needs to be redirected to the lower AC. This function does that * in such a case, otherwise - if no redirection required - it does nothing, * unless the %force param is true. */ -static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, - int ac, int ssn, unsigned int wdg_timeout, - bool force) +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool shared_queue; unsigned long mq; @@ -551,11 +562,12 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, cmd.sta_id = mvm->queue_info[queue].ra_sta_id; cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[mvm->queue_info[queue].mac80211_ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; mq = mvm->queue_info[queue].hw_queue_to_mac80211; shared_queue = (mvm->queue_info[queue].hw_queue_refcount > 1); spin_unlock_bh(&mvm->queue_info_lock); - IWL_DEBUG_TX_QUEUES(mvm, "Redirecting shared TXQ #%d to FIFO #%d\n", + IWL_DEBUG_TX_QUEUES(mvm, "Redirecting TXQ #%d to FIFO #%d\n", queue, iwl_mvm_ac_to_tx_fifo[ac]); /* Stop MAC queues and wait for this queue to empty */ @@ -576,9 +588,12 @@ static int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, ret); /* Make sure the SCD wrptr is correctly set before reconfiguring */ - iwl_trans_txq_enable(mvm->trans, queue, iwl_mvm_ac_to_tx_fifo[ac], - cmd.sta_id, tid, LINK_QUAL_AGG_FRAME_LIMIT_DEF, - ssn, wdg_timeout); + iwl_trans_txq_enable_cfg(mvm->trans, queue, ssn, NULL, wdg_timeout); + + /* Update the TID "owner" of the queue */ + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].txq_tid = tid; + spin_unlock_bh(&mvm->queue_info_lock); /* TODO: Work-around SCD bug when moving back by multiples of 0x40 */ @@ -709,7 +724,7 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (WARN_ON(queue <= 0)) { IWL_ERR(mvm, "No available queues for tid %d on sta_id %d\n", tid, cfg.sta_id); - return -ENOSPC; + return queue; } /* @@ -728,21 +743,23 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, if (using_inactive_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; - u8 ac; + u8 txq_curr_ac; disable_agg_tids = iwl_mvm_remove_sta_queue_marking(mvm, queue); spin_lock_bh(&mvm->queue_info_lock); - ac = mvm->queue_info[queue].mac80211_ac; + txq_curr_ac = mvm->queue_info[queue].mac80211_ac; cmd.sta_id = mvm->queue_info[queue].ra_sta_id; - cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[txq_curr_ac]; + cmd.tid = mvm->queue_info[queue].txq_tid; spin_unlock_bh(&mvm->queue_info_lock); /* Disable the queue */ - iwl_mvm_invalidate_sta_queue(mvm, queue, disable_agg_tids, - true); + if (disable_agg_tids) + iwl_mvm_invalidate_sta_queue(mvm, queue, + disable_agg_tids, false); iwl_trans_txq_disable(mvm->trans, queue, false); ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); @@ -758,6 +775,10 @@ static int iwl_mvm_sta_alloc_queue(struct iwl_mvm *mvm, return ret; } + + /* If TXQ is allocated to another STA, update removal in FW */ + if (cmd.sta_id != mvmsta->sta_id) + iwl_mvm_invalidate_sta_queue(mvm, queue, 0, true); } IWL_DEBUG_TX_QUEUES(mvm, @@ -827,6 +848,119 @@ out_err: return ret; } +static void iwl_mvm_change_queue_owner(struct iwl_mvm *mvm, int queue) +{ + struct iwl_scd_txq_cfg_cmd cmd = { + .scd_queue = queue, + .action = SCD_CFG_UPDATE_QUEUE_TID, + }; + s8 sta_id; + int tid; + unsigned long tid_bitmap; + int ret; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + if (WARN(!tid_bitmap, "TXQ %d has no tids assigned to it\n", queue)) + return; + + /* Find any TID for queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + cmd.tid = tid; + cmd.tx_fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + + ret = iwl_mvm_send_cmd_pdu(mvm, SCD_QUEUE_CFG, 0, sizeof(cmd), &cmd); + if (ret) + IWL_ERR(mvm, "Failed to update owner of TXQ %d (ret=%d)\n", + queue, ret); + else + IWL_DEBUG_TX_QUEUES(mvm, "Changed TXQ %d ownership to tid %d\n", + queue, tid); +} + +static void iwl_mvm_unshare_queue(struct iwl_mvm *mvm, int queue) +{ + struct ieee80211_sta *sta; + struct iwl_mvm_sta *mvmsta; + s8 sta_id; + int tid = -1; + unsigned long tid_bitmap; + unsigned int wdg_timeout; + int ssn; + int ret = true; + + lockdep_assert_held(&mvm->mutex); + + spin_lock_bh(&mvm->queue_info_lock); + sta_id = mvm->queue_info[queue].ra_sta_id; + tid_bitmap = mvm->queue_info[queue].tid_bitmap; + spin_unlock_bh(&mvm->queue_info_lock); + + /* Find TID for queue, and make sure it is the only one on the queue */ + tid = find_first_bit(&tid_bitmap, IWL_MAX_TID_COUNT + 1); + if (tid_bitmap != BIT(tid)) { + IWL_ERR(mvm, "Failed to unshare q %d, active tids=0x%lx\n", + queue, tid_bitmap); + return; + } + + IWL_DEBUG_TX_QUEUES(mvm, "Unsharing TXQ %d, keeping tid %d\n", queue, + tid); + + sta = rcu_dereference_protected(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + + if (WARN_ON_ONCE(IS_ERR_OR_NULL(sta))) + return; + + mvmsta = iwl_mvm_sta_from_mac80211(sta); + wdg_timeout = iwl_mvm_get_wd_timeout(mvm, mvmsta->vif, false, false); + + ssn = IEEE80211_SEQ_TO_SN(mvmsta->tid_data[tid].seq_number); + + ret = iwl_mvm_scd_queue_redirect(mvm, queue, tid, + tid_to_mac80211_ac[tid], ssn, + wdg_timeout, true); + if (ret) { + IWL_ERR(mvm, "Failed to redirect TXQ %d\n", queue); + return; + } + + /* If aggs should be turned back on - do it */ + if (mvmsta->tid_data[tid].state == IWL_AGG_ON) { + struct iwl_mvm_add_sta_cmd cmd = {0}; + + mvmsta->tid_disable_agg &= ~BIT(tid); + + cmd.mac_id_n_color = cpu_to_le32(mvmsta->mac_id_n_color); + cmd.sta_id = mvmsta->sta_id; + cmd.add_modify = STA_MODE_MODIFY; + cmd.modify_mask = STA_MODIFY_TID_DISABLE_TX; + cmd.tfd_queue_msk = cpu_to_le32(mvmsta->tfd_queue_msk); + cmd.tid_disable_tx = cpu_to_le16(mvmsta->tid_disable_agg); + + ret = iwl_mvm_send_cmd_pdu(mvm, ADD_STA, CMD_ASYNC, + iwl_mvm_add_sta_cmd_size(mvm), &cmd); + if (!ret) { + IWL_DEBUG_TX_QUEUES(mvm, + "TXQ #%d is now aggregated again\n", + queue); + + /* Mark queue intenally as aggregating again */ + iwl_trans_txq_set_shared_mode(mvm->trans, queue, false); + } + } + + spin_lock_bh(&mvm->queue_info_lock); + mvm->queue_info[queue].status = IWL_MVM_QUEUE_READY; + spin_unlock_bh(&mvm->queue_info_lock); +} + static inline u8 iwl_mvm_tid_to_ac_queue(int tid) { if (tid == IWL_MAX_TID_COUNT) @@ -894,13 +1028,42 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk) struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; unsigned long deferred_tid_traffic; - int sta_id, tid; + int queue, sta_id, tid; /* Check inactivity of queues */ iwl_mvm_inactivity_check(mvm); mutex_lock(&mvm->mutex); + /* Reconfigure queues requiring reconfiguation */ + for (queue = 0; queue < IWL_MAX_HW_QUEUES; queue++) { + bool reconfig; + bool change_owner; + + spin_lock_bh(&mvm->queue_info_lock); + reconfig = (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_RECONFIGURING); + + /* + * We need to take into account a situation in which a TXQ was + * allocated to TID x, and then turned shared by adding TIDs y + * and z. If TID x becomes inactive and is removed from the TXQ, + * ownership must be given to one of the remaining TIDs. + * This is mainly because if TID x continues - a new queue can't + * be allocated for it as long as it is an owner of another TXQ. + */ + change_owner = !(mvm->queue_info[queue].tid_bitmap & + BIT(mvm->queue_info[queue].txq_tid)) && + (mvm->queue_info[queue].status == + IWL_MVM_QUEUE_SHARED); + spin_unlock_bh(&mvm->queue_info_lock); + + if (reconfig) + iwl_mvm_unshare_queue(mvm, queue); + else if (change_owner) + iwl_mvm_change_queue_owner(mvm, queue); + } + /* Go over all stations with deferred traffic */ for_each_set_bit(sta_id, mvm->sta_deferred_frames, IWL_MVM_STATION_COUNT) { @@ -963,6 +1126,61 @@ static int iwl_mvm_reserve_sta_stream(struct iwl_mvm *mvm, return 0; } +/* + * In DQA mode, after a HW restart the queues should be allocated as before, in + * order to avoid race conditions when there are shared queues. This function + * does the re-mapping and queue allocation. + * + * Note that re-enabling aggregations isn't done in this function. + */ +static void iwl_mvm_realloc_queues_after_restart(struct iwl_mvm *mvm, + struct iwl_mvm_sta *mvm_sta) +{ + unsigned int wdg_timeout = + iwl_mvm_get_wd_timeout(mvm, mvm_sta->vif, false, false); + int i; + struct iwl_trans_txq_scd_cfg cfg = { + .sta_id = mvm_sta->sta_id, + .frame_limit = IWL_FRAME_LIMIT, + }; + + /* Make sure reserved queue is still marked as such (or allocated) */ + mvm->queue_info[mvm_sta->reserved_queue].status = + IWL_MVM_QUEUE_RESERVED; + + for (i = 0; i <= IWL_MAX_TID_COUNT; i++) { + struct iwl_mvm_tid_data *tid_data = &mvm_sta->tid_data[i]; + int txq_id = tid_data->txq_id; + int ac; + u8 mac_queue; + + if (txq_id == IEEE80211_INVAL_HW_QUEUE) + continue; + + skb_queue_head_init(&tid_data->deferred_tx_frames); + + ac = tid_to_mac80211_ac[i]; + mac_queue = mvm_sta->vif->hw_queue[ac]; + + cfg.tid = i; + cfg.fifo = iwl_mvm_ac_to_tx_fifo[ac]; + cfg.aggregate = (txq_id >= IWL_MVM_DQA_MIN_DATA_QUEUE || + txq_id == IWL_MVM_DQA_BSS_CLIENT_QUEUE); + + IWL_DEBUG_TX_QUEUES(mvm, + "Re-mapping sta %d tid %d to queue %d\n", + mvm_sta->sta_id, i, txq_id); + + iwl_mvm_enable_txq(mvm, txq_id, mac_queue, + IEEE80211_SEQ_TO_SN(tid_data->seq_number), + &cfg, wdg_timeout); + + mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_READY; + } + + atomic_set(&mvm->pending_frames[mvm_sta->sta_id], 0); +} + int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta) @@ -985,6 +1203,13 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, spin_lock_init(&mvm_sta->lock); + /* In DQA mode, if this is a HW restart, re-alloc existing queues */ + if (iwl_mvm_is_dqa_supported(mvm) && + test_bit(IWL_MVM_STATUS_IN_HW_RESTART, &mvm->status)) { + iwl_mvm_realloc_queues_after_restart(mvm, mvm_sta); + goto update_fw; + } + mvm_sta->sta_id = sta_id; mvm_sta->mac_id_n_color = FW_CMD_ID_AND_COLOR(mvmvif->id, mvmvif->color); @@ -1048,6 +1273,7 @@ int iwl_mvm_add_sta(struct iwl_mvm *mvm, goto err; } +update_fw: ret = iwl_mvm_sta_send_to_fw(mvm, sta, false, 0); if (ret) goto err; @@ -1071,13 +1297,6 @@ err: return ret; } -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta) -{ - return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); -} - int iwl_mvm_drain_sta(struct iwl_mvm *mvm, struct iwl_mvm_sta *mvmsta, bool drain) { @@ -1270,9 +1489,31 @@ int iwl_mvm_rm_sta(struct iwl_mvm *mvm, ret = iwl_mvm_drain_sta(mvm, mvm_sta, false); /* If DQA is supported - the queues can be disabled now */ - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { + u8 reserved_txq = mvm_sta->reserved_queue; + enum iwl_mvm_queue_status *status; + iwl_mvm_disable_sta_queues(mvm, vif, mvm_sta); + /* + * If no traffic has gone through the reserved TXQ - it + * is still marked as IWL_MVM_QUEUE_RESERVED, and + * should be manually marked as free again + */ + spin_lock_bh(&mvm->queue_info_lock); + status = &mvm->queue_info[reserved_txq].status; + if (WARN((*status != IWL_MVM_QUEUE_RESERVED) && + (*status != IWL_MVM_QUEUE_FREE), + "sta_id %d reserved txq %d status %d", + mvm_sta->sta_id, reserved_txq, *status)) { + spin_unlock_bh(&mvm->queue_info_lock); + return -EINVAL; + } + + *status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); + } + if (vif->type == NL80211_IFTYPE_STATION && mvmvif->ap_sta_id == mvm_sta->sta_id) { /* if associated - we can't remove the AP STA now */ @@ -1802,11 +2043,9 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta, baid_data->baid = baid; baid_data->timeout = timeout; baid_data->last_rx = jiffies; - init_timer(&baid_data->session_timer); - baid_data->session_timer.function = - iwl_mvm_rx_agg_session_expired; - baid_data->session_timer.data = - (unsigned long)&mvm->baid_map[baid]; + setup_timer(&baid_data->session_timer, + iwl_mvm_rx_agg_session_expired, + (unsigned long)&mvm->baid_map[baid]); baid_data->mvm = mvm; baid_data->tid = tid; baid_data->sta_id = mvm_sta->sta_id; @@ -1956,7 +2195,7 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, return -EIO; } - spin_lock_bh(&mvm->queue_info_lock); + spin_lock(&mvm->queue_info_lock); /* * Note the possible cases: @@ -1967,14 +2206,20 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, * non-DQA mode, since the TXQ hasn't yet been allocated */ txq_id = mvmsta->tid_data[tid].txq_id; - if (!iwl_mvm_is_dqa_supported(mvm) || + if (iwl_mvm_is_dqa_supported(mvm) && + unlikely(mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_SHARED)) { + ret = -ENXIO; + IWL_DEBUG_TX_QUEUES(mvm, + "Can't start tid %d agg on shared queue!\n", + tid); + goto release_locks; + } else if (!iwl_mvm_is_dqa_supported(mvm) || mvm->queue_info[txq_id].status != IWL_MVM_QUEUE_READY) { txq_id = iwl_mvm_find_free_queue(mvm, mvmsta->sta_id, mvm->first_agg_queue, mvm->last_agg_queue); if (txq_id < 0) { ret = txq_id; - spin_unlock_bh(&mvm->queue_info_lock); IWL_ERR(mvm, "Failed to allocate agg queue\n"); goto release_locks; } @@ -1982,7 +2227,8 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, /* TXQ hasn't yet been enabled, so mark it only as reserved */ mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_RESERVED; } - spin_unlock_bh(&mvm->queue_info_lock); + + spin_unlock(&mvm->queue_info_lock); IWL_DEBUG_TX_QUEUES(mvm, "AGG for tid %d will be on queue #%d\n", @@ -2006,8 +2252,11 @@ int iwl_mvm_sta_tx_agg_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif, } ret = 0; + goto out; release_locks: + spin_unlock(&mvm->queue_info_lock); +out: spin_unlock_bh(&mvmsta->lock); return ret; @@ -2023,6 +2272,7 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, iwl_mvm_get_wd_timeout(mvm, vif, sta->tdls, false); int queue, ret; bool alloc_queue = true; + enum iwl_mvm_queue_status queue_status; u16 ssn; struct iwl_trans_txq_scd_cfg cfg = { @@ -2048,13 +2298,15 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, cfg.fifo = iwl_mvm_ac_to_tx_fifo[tid_to_mac80211_ac[tid]]; + spin_lock_bh(&mvm->queue_info_lock); + queue_status = mvm->queue_info[queue].status; + spin_unlock_bh(&mvm->queue_info_lock); + /* In DQA mode, the existing queue might need to be reconfigured */ if (iwl_mvm_is_dqa_supported(mvm)) { - spin_lock_bh(&mvm->queue_info_lock); /* Maybe there is no need to even alloc a queue... */ if (mvm->queue_info[queue].status == IWL_MVM_QUEUE_READY) alloc_queue = false; - spin_unlock_bh(&mvm->queue_info_lock); /* * Only reconfig the SCD for the queue if the window size has @@ -2089,9 +2341,12 @@ int iwl_mvm_sta_tx_agg_oper(struct iwl_mvm *mvm, struct ieee80211_vif *vif, vif->hw_queue[tid_to_mac80211_ac[tid]], ssn, &cfg, wdg_timeout); - ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); - if (ret) - return -EIO; + /* Send ADD_STA command to enable aggs only if the queue isn't shared */ + if (queue_status != IWL_MVM_QUEUE_SHARED) { + ret = iwl_mvm_sta_tx_agg(mvm, sta, tid, queue, true); + if (ret) + return -EIO; + } /* No need to mark as reserved */ spin_lock_bh(&mvm->queue_info_lock); @@ -2123,7 +2378,6 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, u16 txq_id; int err; - /* * If mac80211 is cleaning its state, then say that we finished since * our state has been cleared anyway. @@ -2152,6 +2406,7 @@ int iwl_mvm_sta_tx_agg_stop(struct iwl_mvm *mvm, struct ieee80211_vif *vif, */ if (mvm->queue_info[txq_id].status == IWL_MVM_QUEUE_RESERVED) mvm->queue_info[txq_id].status = IWL_MVM_QUEUE_FREE; + spin_unlock_bh(&mvm->queue_info_lock); switch (tid_data->state) { @@ -2412,9 +2667,15 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, struct iwl_mvm_mgmt_mcast_key_cmd igtk_cmd = {}; /* verify the key details match the required command's expectations */ - if (WARN_ON((keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC) || - (keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || - (keyconf->keyidx != 4 && keyconf->keyidx != 5))) + if (WARN_ON((keyconf->flags & IEEE80211_KEY_FLAG_PAIRWISE) || + (keyconf->keyidx != 4 && keyconf->keyidx != 5) || + (keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_128 && + keyconf->cipher != WLAN_CIPHER_SUITE_BIP_GMAC_256))) + return -EINVAL; + + if (WARN_ON(!iwl_mvm_has_new_rx_api(mvm) && + keyconf->cipher != WLAN_CIPHER_SUITE_AES_CMAC)) return -EINVAL; igtk_cmd.key_id = cpu_to_le32(keyconf->keyidx); @@ -2430,11 +2691,18 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, case WLAN_CIPHER_SUITE_AES_CMAC: igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM); break; + case WLAN_CIPHER_SUITE_BIP_GMAC_128: + case WLAN_CIPHER_SUITE_BIP_GMAC_256: + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_GCMP); + break; default: return -EINVAL; } - memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen); + memcpy(igtk_cmd.igtk, keyconf->key, keyconf->keylen); + if (keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) + igtk_cmd.ctrl_flags |= + cpu_to_le32(STA_KEY_FLG_KEY_32BYTES); ieee80211_get_key_rx_seq(keyconf, 0, &seq); pn = seq.aes_cmac.pn; igtk_cmd.receive_seq_cnt = cpu_to_le64(((u64) pn[5] << 0) | @@ -2449,6 +2717,19 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, remove_key ? "removing" : "installing", igtk_cmd.sta_id); + if (!iwl_mvm_has_new_rx_api(mvm)) { + struct iwl_mvm_mgmt_mcast_key_cmd_v1 igtk_cmd_v1 = { + .ctrl_flags = igtk_cmd.ctrl_flags, + .key_id = igtk_cmd.key_id, + .sta_id = igtk_cmd.sta_id, + .receive_seq_cnt = igtk_cmd.receive_seq_cnt + }; + + memcpy(igtk_cmd_v1.igtk, igtk_cmd.igtk, + ARRAY_SIZE(igtk_cmd_v1.igtk)); + return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, + sizeof(igtk_cmd_v1), &igtk_cmd_v1); + } return iwl_mvm_send_cmd_pdu(mvm, MGMT_MCAST_KEY, 0, sizeof(igtk_cmd), &igtk_cmd); } @@ -2573,7 +2854,9 @@ int iwl_mvm_set_sta_key(struct iwl_mvm *mvm, } sta_id = mvm_sta->sta_id; - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) { + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) { ret = iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, false); goto end; } @@ -2659,7 +2942,9 @@ int iwl_mvm_remove_sta_key(struct iwl_mvm *mvm, IWL_DEBUG_WEP(mvm, "mvm remove dynamic key: idx=%d sta=%d\n", keyconf->keyidx, sta_id); - if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC) + if (keyconf->cipher == WLAN_CIPHER_SUITE_AES_CMAC || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_128 || + keyconf->cipher == WLAN_CIPHER_SUITE_BIP_GMAC_256) return iwl_mvm_send_sta_igtk(mvm, keyconf, sta_id, true); if (!__test_and_clear_bit(keyconf->hw_key_idx, mvm->fw_key_table)) { diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h index bbc1cab2c3bf..e068d5355865 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h @@ -473,9 +473,14 @@ int iwl_mvm_sta_send_to_fw(struct iwl_mvm *mvm, struct ieee80211_sta *sta, int iwl_mvm_add_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); -int iwl_mvm_update_sta(struct iwl_mvm *mvm, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta); + +static inline int iwl_mvm_update_sta(struct iwl_mvm *mvm, + struct ieee80211_vif *vif, + struct ieee80211_sta *sta) +{ + return iwl_mvm_sta_send_to_fw(mvm, sta, true, 0); +} + int iwl_mvm_rm_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif, struct ieee80211_sta *sta); @@ -554,4 +559,8 @@ void iwl_mvm_modify_all_sta_disable_tx(struct iwl_mvm *mvm, void iwl_mvm_csa_client_absent(struct iwl_mvm *mvm, struct ieee80211_vif *vif); void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk); +int iwl_mvm_scd_queue_redirect(struct iwl_mvm *mvm, int queue, int tid, + int ac, int ssn, unsigned int wdg_timeout, + bool force); + #endif /* __sta_h__ */ diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c index 58fc7b3c711c..63a051be832e 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tt.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tt.c @@ -241,11 +241,8 @@ static int iwl_mvm_get_temp_cmd(struct iwl_mvm *mvm) }; u32 cmdid; - if (fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, - PHY_OPS_GROUP, 0); - else - cmdid = CMD_DTS_MEASUREMENT_TRIGGER; + cmdid = iwl_cmd_id(CMD_DTS_MEASUREMENT_TRIGGER_WIDE, + PHY_OPS_GROUP, 0); if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_EXTENDED_DTS_MEASURE)) @@ -261,9 +258,6 @@ int iwl_mvm_get_temp(struct iwl_mvm *mvm, s32 *temp) DTS_MEASUREMENT_NOTIF_WIDE) }; int ret; - if (!fw_has_api(&mvm->fw->ucode_capa, IWL_UCODE_TLV_API_WIDE_CMD_HDR)) - temp_notif[0] = DTS_MEASUREMENT_NOTIFICATION; - lockdep_assert_held(&mvm->mutex); iwl_init_notification_wait(&mvm->notif_wait, &wait_temp_notif, diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c index b3a87a31de30..66957ac12ca4 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c @@ -346,7 +346,7 @@ void iwl_mvm_set_tx_cmd_rate(struct iwl_mvm *mvm, struct iwl_tx_cmd *tx_cmd, rate_idx = info->control.rates[0].idx; /* if the rate isn't a well known legacy rate, take the lowest one */ - if (rate_idx < 0 || rate_idx > IWL_RATE_COUNT_LEGACY) + if (rate_idx < 0 || rate_idx >= IWL_RATE_COUNT_LEGACY) rate_idx = rate_lowest_index( &mvm->nvm_data->bands[info->band], sta); @@ -441,7 +441,7 @@ static void iwl_mvm_set_tx_cmd_crypto(struct iwl_mvm *mvm, * one. * Need to handle this. */ - tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TC_CMD_SEC_KEY_FROM_TABLE; + tx_cmd->sec_ctl |= TX_CMD_SEC_GCMP | TX_CMD_SEC_KEY_FROM_TABLE; tx_cmd->key[0] = keyconf->hw_key_idx; iwl_mvm_set_tx_cmd_pn(info, crypto_hdr); break; @@ -490,16 +490,34 @@ iwl_mvm_set_tx_params(struct iwl_mvm *mvm, struct sk_buff *skb, static int iwl_mvm_get_ctrl_vif_queue(struct iwl_mvm *mvm, struct ieee80211_tx_info *info, __le16 fc) { - if (iwl_mvm_is_dqa_supported(mvm)) { - if (info->control.vif->type == NL80211_IFTYPE_AP && - ieee80211_is_probe_resp(fc)) - return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; - else if (ieee80211_is_mgmt(fc) && - info->control.vif->type == NL80211_IFTYPE_P2P_DEVICE) - return IWL_MVM_DQA_P2P_DEVICE_QUEUE; - } + if (!iwl_mvm_is_dqa_supported(mvm)) + return info->hw_queue; - return info->hw_queue; + switch (info->control.vif->type) { + case NL80211_IFTYPE_AP: + /* + * handle legacy hostapd as well, where station may be added + * only after assoc. + */ + if (ieee80211_is_probe_resp(fc) || ieee80211_is_auth(fc)) + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_AP_PROBE_RESP_QUEUE; + case NL80211_IFTYPE_P2P_DEVICE: + if (ieee80211_is_mgmt(fc)) + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + if (info->hw_queue == info->control.vif->cab_queue) + return info->hw_queue; + + WARN_ON_ONCE(1); + return IWL_MVM_DQA_P2P_DEVICE_QUEUE; + default: + WARN_ONCE(1, "Not a ctrl vif, no available queue\n"); + return -1; + } } int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) @@ -559,6 +577,9 @@ int iwl_mvm_tx_skb_non_sta(struct iwl_mvm *mvm, struct sk_buff *skb) sta_id = mvmvif->bcast_sta.sta_id; queue = iwl_mvm_get_ctrl_vif_queue(mvm, &info, hdr->frame_control); + if (queue < 0) + return -1; + } else if (info.control.vif->type == NL80211_IFTYPE_STATION && is_multicast_ether_addr(hdr->addr1)) { u8 ap_sta_id = ACCESS_ONCE(mvmvif->ap_sta_id); @@ -837,6 +858,22 @@ static void iwl_mvm_tx_add_stream(struct iwl_mvm *mvm, } } +/* Check if there are any timed-out TIDs on a given shared TXQ */ +static bool iwl_mvm_txq_should_update(struct iwl_mvm *mvm, int txq_id) +{ + unsigned long queue_tid_bitmap = mvm->queue_info[txq_id].tid_bitmap; + unsigned long now = jiffies; + int tid; + + for_each_set_bit(tid, &queue_tid_bitmap, IWL_MAX_TID_COUNT + 1) { + if (time_before(mvm->queue_info[txq_id].last_frame_time[tid] + + IWL_MVM_DQA_QUEUE_TIMEOUT, now)) + return true; + } + + return false; +} + /* * Sets the fields in the Tx cmd that are crypto related */ @@ -903,9 +940,13 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, tid = IWL_MAX_TID_COUNT; } - if (iwl_mvm_is_dqa_supported(mvm)) + if (iwl_mvm_is_dqa_supported(mvm)) { txq_id = mvmsta->tid_data[tid].txq_id; + if (ieee80211_is_mgmt(fc)) + tx_cmd->tid_tspec = IWL_TID_NON_QOS; + } + /* Copy MAC header from skb into command buffer */ memcpy(tx_cmd->hdr, hdr, hdrlen); @@ -939,7 +980,6 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, iwl_trans_free_tx_cmd(mvm->trans, dev_cmd); spin_unlock(&mvmsta->lock); return 0; - } /* If we are here - TXQ exists and needs to be re-activated */ @@ -952,8 +992,25 @@ static int iwl_mvm_tx_mpdu(struct iwl_mvm *mvm, struct sk_buff *skb, txq_id); } - /* Keep track of the time of the last frame for this RA/TID */ - mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + if (iwl_mvm_is_dqa_supported(mvm)) { + /* Keep track of the time of the last frame for this RA/TID */ + mvm->queue_info[txq_id].last_frame_time[tid] = jiffies; + + /* + * If we have timed-out TIDs - schedule the worker that will + * reconfig the queues and update them + * + * Note that the mvm->queue_info_lock isn't being taken here in + * order to not serialize the TX flow. This isn't dangerous + * because scheduling mvm->add_stream_wk can't ruin the state, + * and if we DON'T schedule it due to some race condition then + * next TX we get here we will. + */ + if (unlikely(mvm->queue_info[txq_id].status == + IWL_MVM_QUEUE_SHARED && + iwl_mvm_txq_should_update(mvm, txq_id))) + schedule_work(&mvm->add_stream_wk); + } IWL_DEBUG_TX(mvm, "TX to [%d|%d] Q:%d - seq: 0x%x\n", mvmsta->sta_id, tid, txq_id, IEEE80211_SEQ_TO_SN(seq_number)); @@ -1067,9 +1124,13 @@ static void iwl_mvm_check_ratid_empty(struct iwl_mvm *mvm, IWL_DEBUG_TX_QUEUES(mvm, "Can continue DELBA flow ssn = next_recl = %d\n", tid_data->next_reclaimed); - iwl_mvm_disable_txq(mvm, tid_data->txq_id, - vif->hw_queue[tid_to_mac80211_ac[tid]], tid, - CMD_ASYNC); + if (!iwl_mvm_is_dqa_supported(mvm)) { + u8 mac80211_ac = tid_to_mac80211_ac[tid]; + + iwl_mvm_disable_txq(mvm, tid_data->txq_id, + vif->hw_queue[mac80211_ac], tid, + CMD_ASYNC); + } tid_data->state = IWL_AGG_OFF; ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid); break; @@ -1547,41 +1608,16 @@ void iwl_mvm_rx_tx_cmd(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) iwl_mvm_rx_tx_cmd_agg(mvm, pkt); } -static void iwl_mvm_tx_info_from_ba_notif(struct ieee80211_tx_info *info, - struct iwl_mvm_ba_notif *ba_notif, - struct iwl_mvm_tid_data *tid_data) +static void iwl_mvm_tx_reclaim(struct iwl_mvm *mvm, int sta_id, int tid, + int txq, int index, + struct ieee80211_tx_info *ba_info, u32 rate) { - info->flags |= IEEE80211_TX_STAT_AMPDU; - info->status.ampdu_ack_len = ba_notif->txed_2_done; - info->status.ampdu_len = ba_notif->txed; - iwl_mvm_hwrate_to_tx_status(tid_data->rate_n_flags, - info); - /* TODO: not accounted if the whole A-MPDU failed */ - info->status.tx_time = tid_data->tx_time; - info->status.status_driver_data[0] = - (void *)(uintptr_t)ba_notif->reduced_txp; - info->status.status_driver_data[1] = - (void *)(uintptr_t)tid_data->rate_n_flags; -} - -void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) -{ - struct iwl_rx_packet *pkt = rxb_addr(rxb); - struct iwl_mvm_ba_notif *ba_notif = (void *)pkt->data; struct sk_buff_head reclaimed_skbs; struct iwl_mvm_tid_data *tid_data; struct ieee80211_sta *sta; struct iwl_mvm_sta *mvmsta; struct sk_buff *skb; - int sta_id, tid, freed; - /* "flow" corresponds to Tx queue */ - u16 scd_flow = le16_to_cpu(ba_notif->scd_flow); - /* "ssn" is start of block-ack Tx window, corresponds to index - * (in Tx queue's circular buffer) of first TFD/frame in window */ - u16 ba_resp_scd_ssn = le16_to_cpu(ba_notif->scd_ssn); - - sta_id = ba_notif->sta_id; - tid = ba_notif->tid; + int freed; if (WARN_ONCE(sta_id >= IWL_MVM_STATION_COUNT || tid >= IWL_MAX_TID_COUNT, @@ -1601,10 +1637,10 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) mvmsta = iwl_mvm_sta_from_mac80211(sta); tid_data = &mvmsta->tid_data[tid]; - if (tid_data->txq_id != scd_flow) { + if (tid_data->txq_id != txq) { IWL_ERR(mvm, - "invalid BA notification: Q %d, tid %d, flow %d\n", - tid_data->txq_id, tid, scd_flow); + "invalid BA notification: Q %d, tid %d\n", + tid_data->txq_id, tid); rcu_read_unlock(); return; } @@ -1618,27 +1654,14 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * block-ack window (we assume that they've been successfully * transmitted ... if not, it's too late anyway). */ - iwl_trans_reclaim(mvm->trans, scd_flow, ba_resp_scd_ssn, - &reclaimed_skbs); + iwl_trans_reclaim(mvm->trans, txq, index, &reclaimed_skbs); - IWL_DEBUG_TX_REPLY(mvm, - "BA_NOTIFICATION Received from %pM, sta_id = %d\n", - (u8 *)&ba_notif->sta_addr_lo32, - ba_notif->sta_id); - IWL_DEBUG_TX_REPLY(mvm, - "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", - ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), - (unsigned long long)le64_to_cpu(ba_notif->bitmap), - scd_flow, ba_resp_scd_ssn, ba_notif->txed, - ba_notif->txed_2_done); - - IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", - ba_notif->reduced_txp); - tid_data->next_reclaimed = ba_resp_scd_ssn; + tid_data->next_reclaimed = index; iwl_mvm_check_ratid_empty(mvm, sta, tid); freed = 0; + ba_info->status.status_driver_data[1] = (void *)(uintptr_t)rate; skb_queue_walk(&reclaimed_skbs, skb) { struct ieee80211_hdr *hdr = (void *)skb->data; @@ -1660,8 +1683,12 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) /* this is the first skb we deliver in this batch */ /* put the rate scaling data there */ - if (freed == 1) - iwl_mvm_tx_info_from_ba_notif(info, ba_notif, tid_data); + if (freed == 1) { + info->flags |= IEEE80211_TX_STAT_AMPDU; + memcpy(&info->status, &ba_info->status, + sizeof(ba_info->status)); + iwl_mvm_hwrate_to_tx_status(rate, info); + } } spin_unlock_bh(&mvmsta->lock); @@ -1671,7 +1698,6 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) * Still it's important to update RS about sent vs. acked. */ if (skb_queue_empty(&reclaimed_skbs)) { - struct ieee80211_tx_info ba_info = {}; struct ieee80211_chanctx_conf *chanctx_conf = NULL; if (mvmsta->vif) @@ -1681,11 +1707,11 @@ void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) if (WARN_ON_ONCE(!chanctx_conf)) goto out; - ba_info.band = chanctx_conf->def.chan->band; - iwl_mvm_tx_info_from_ba_notif(&ba_info, ba_notif, tid_data); + ba_info->band = chanctx_conf->def.chan->band; + iwl_mvm_hwrate_to_tx_status(rate, ba_info); IWL_DEBUG_TX_REPLY(mvm, "No reclaim. Update rs directly\n"); - iwl_mvm_rs_tx_status(mvm, sta, tid, &ba_info, false); + iwl_mvm_rs_tx_status(mvm, sta, tid, ba_info, false); } out: @@ -1697,6 +1723,92 @@ out: } } +void iwl_mvm_rx_ba_notif(struct iwl_mvm *mvm, struct iwl_rx_cmd_buffer *rxb) +{ + struct iwl_rx_packet *pkt = rxb_addr(rxb); + int sta_id, tid, txq, index; + struct ieee80211_tx_info ba_info = {}; + struct iwl_mvm_ba_notif *ba_notif; + struct iwl_mvm_tid_data *tid_data; + struct iwl_mvm_sta *mvmsta; + + if (iwl_mvm_has_new_tx_api(mvm)) { + struct iwl_mvm_compressed_ba_notif *ba_res = + (void *)pkt->data; + + sta_id = ba_res->sta_id; + ba_info.status.ampdu_ack_len = (u8)le16_to_cpu(ba_res->done); + ba_info.status.ampdu_len = (u8)le16_to_cpu(ba_res->txed); + ba_info.status.tx_time = + (u16)le32_to_cpu(ba_res->wireless_time); + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_res->reduced_txp; + + /* + * TODO: + * When supporting multi TID aggregations - we need to move + * next_reclaimed to be per TXQ and not per TID or handle it + * in a different way. + * This will go together with SN and AddBA offload and cannot + * be handled properly for now. + */ + WARN_ON(le16_to_cpu(ba_res->tfd_cnt) != 1); + iwl_mvm_tx_reclaim(mvm, sta_id, ba_res->ra_tid[0].tid, + (int)ba_res->tfd[0].q_num, + le16_to_cpu(ba_res->tfd[0].tfd_index), + &ba_info, le32_to_cpu(ba_res->tx_rate)); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from sta_id = %d, flags %x, sent:%d, acked:%d\n", + sta_id, le32_to_cpu(ba_res->flags), + le16_to_cpu(ba_res->txed), + le16_to_cpu(ba_res->done)); + return; + } + + ba_notif = (void *)pkt->data; + sta_id = ba_notif->sta_id; + tid = ba_notif->tid; + /* "flow" corresponds to Tx queue */ + txq = le16_to_cpu(ba_notif->scd_flow); + /* "ssn" is start of block-ack Tx window, corresponds to index + * (in Tx queue's circular buffer) of first TFD/frame in window */ + index = le16_to_cpu(ba_notif->scd_ssn); + + rcu_read_lock(); + mvmsta = iwl_mvm_sta_from_staid_rcu(mvm, sta_id); + if (WARN_ON_ONCE(!mvmsta)) { + rcu_read_unlock(); + return; + } + + tid_data = &mvmsta->tid_data[tid]; + + ba_info.status.ampdu_ack_len = ba_notif->txed_2_done; + ba_info.status.ampdu_len = ba_notif->txed; + ba_info.status.tx_time = tid_data->tx_time; + ba_info.status.status_driver_data[0] = + (void *)(uintptr_t)ba_notif->reduced_txp; + + rcu_read_unlock(); + + iwl_mvm_tx_reclaim(mvm, sta_id, tid, txq, index, &ba_info, + tid_data->rate_n_flags); + + IWL_DEBUG_TX_REPLY(mvm, + "BA_NOTIFICATION Received from %pM, sta_id = %d\n", + (u8 *)&ba_notif->sta_addr_lo32, ba_notif->sta_id); + + IWL_DEBUG_TX_REPLY(mvm, + "TID = %d, SeqCtl = %d, bitmap = 0x%llx, scd_flow = %d, scd_ssn = %d sent:%d, acked:%d\n", + ba_notif->tid, le16_to_cpu(ba_notif->seq_ctl), + le64_to_cpu(ba_notif->bitmap), txq, index, + ba_notif->txed, ba_notif->txed_2_done); + + IWL_DEBUG_TX_REPLY(mvm, "reduced txp from ba notif %d\n", + ba_notif->reduced_txp); +} + /* * Note that there are transports that buffer frames before they reach * the firmware. This means that after flush_tx_path is called, the diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c index 68f4e7fdfc11..d04babd99b53 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c @@ -512,7 +512,7 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm) base = mvm->fw->inst_errlog_ptr; } - if (base < 0x800000) { + if (base < 0x400000) { IWL_ERR(mvm, "Not valid error log pointer 0x%08X for %s uCode\n", base, @@ -610,7 +610,7 @@ int iwl_mvm_reconfig_scd(struct iwl_mvm *mvm, int queue, int fifo, int sta_id, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = frame_limit, .sta_id = sta_id, .ssn = cpu_to_le16(ssn), @@ -669,6 +669,8 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, tid_to_mac80211_ac[cfg->tid]; else mvm->queue_info[queue].mac80211_ac = IEEE80211_AC_VO; + + mvm->queue_info[queue].txq_tid = cfg->tid; } IWL_DEBUG_TX_QUEUES(mvm, @@ -682,7 +684,7 @@ void iwl_mvm_enable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, if (enable_queue) { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 1, + .action = SCD_CFG_ENABLE_QUEUE, .window = cfg->frame_limit, .sta_id = cfg->sta_id, .ssn = cpu_to_le16(ssn), @@ -709,7 +711,7 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, { struct iwl_scd_txq_cfg_cmd cmd = { .scd_queue = queue, - .enable = 0, + .action = SCD_CFG_DISABLE_QUEUE, }; bool remove_mac_queue = true; int ret; @@ -744,8 +746,9 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, ~BIT(mac80211_queue); mvm->queue_info[queue].hw_queue_refcount--; - cmd.enable = mvm->queue_info[queue].hw_queue_refcount ? 1 : 0; - if (!cmd.enable) + cmd.action = mvm->queue_info[queue].hw_queue_refcount ? + SCD_CFG_ENABLE_QUEUE : SCD_CFG_DISABLE_QUEUE; + if (cmd.action == SCD_CFG_DISABLE_QUEUE) mvm->queue_info[queue].status = IWL_MVM_QUEUE_FREE; IWL_DEBUG_TX_QUEUES(mvm, @@ -755,12 +758,13 @@ void iwl_mvm_disable_txq(struct iwl_mvm *mvm, int queue, int mac80211_queue, mvm->queue_info[queue].hw_queue_to_mac80211); /* If the queue is still enabled - nothing left to do in this func */ - if (cmd.enable) { + if (cmd.action == SCD_CFG_ENABLE_QUEUE) { spin_unlock_bh(&mvm->queue_info_lock); return; } cmd.sta_id = mvm->queue_info[queue].ra_sta_id; + cmd.tid = mvm->queue_info[queue].txq_tid; /* Make sure queue info is correct even though we overwrite it */ WARN(mvm->queue_info[queue].hw_queue_refcount || @@ -1131,7 +1135,13 @@ static void iwl_mvm_remove_inactive_tids(struct iwl_mvm *mvm, BIT(mvmsta->vif->hw_queue[tid_to_mac80211_ac[tid]]); } - /* TODO: if queue was shared - need to re-enable AGGs */ + /* If the queue is marked as shared - "unshare" it */ + if (mvm->queue_info[queue].hw_queue_refcount == 1 && + mvm->queue_info[queue].status == IWL_MVM_QUEUE_SHARED) { + mvm->queue_info[queue].status = IWL_MVM_QUEUE_RECONFIGURING; + IWL_DEBUG_TX_QUEUES(mvm, "Marking Q:%d for reconfig\n", + queue); + } } void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) @@ -1215,6 +1225,28 @@ void iwl_mvm_inactivity_check(struct iwl_mvm *mvm) rcu_read_unlock(); } +void iwl_mvm_get_sync_time(struct iwl_mvm *mvm, u32 *gp2, u64 *boottime) +{ + bool ps_disabled; + + lockdep_assert_held(&mvm->mutex); + + /* Disable power save when reading GP2 */ + ps_disabled = mvm->ps_disabled; + if (!ps_disabled) { + mvm->ps_disabled = true; + iwl_mvm_power_update_device(mvm); + } + + *gp2 = iwl_read_prph(mvm->trans, DEVICE_SYSTEM_TIME_REG); + *boottime = ktime_get_boot_ns(); + + if (!ps_disabled) { + mvm->ps_disabled = ps_disabled; + iwl_mvm_power_update_device(mvm); + } +} + int iwl_mvm_send_lqm_cmd(struct ieee80211_vif *vif, enum iwl_lqm_cmd_operatrions operation, u32 duration, u32 timeout) diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c index 78cf9a7f3eac..001be406a3d3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c @@ -487,6 +487,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x1130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0130, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x1010, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x10D0, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0050, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0150, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x9010, iwl8265_2ac_cfg)}, @@ -500,22 +501,36 @@ static const struct pci_device_id iwl_hw_card_ids[] = { {IWL_PCI_DEVICE(0x24FD, 0x0930, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0950, iwl8265_2ac_cfg)}, {IWL_PCI_DEVICE(0x24FD, 0x0850, iwl8265_2ac_cfg)}, + {IWL_PCI_DEVICE(0x24FD, 0x0012, iwl8275_2ac_cfg)}, /* 9000 Series */ + {IWL_PCI_DEVICE(0x271B, 0x0010, iwl9160_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0000, iwl9260_2ac_cfg)}, {IWL_PCI_DEVICE(0x2526, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9260_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x2526, 0x1420, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl5165_2ac_cfg)}, - {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl5165_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1410, iwl9270_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0210, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0410, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0610, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0310, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0000, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0510, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2010, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1420, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0710, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x2A10, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x30DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0060, iwl9460_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x9DF0, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x31DC, 0x0030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0x2526, 0x1030, iwl9560_2ac_cfg)}, + {IWL_PCI_DEVICE(0xA370, 0x1030, iwl9560_2ac_cfg)}, /* a000 Series */ {IWL_PCI_DEVICE(0x2720, 0x0A10, iwla000_2ac_cfg)}, @@ -608,7 +623,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { const struct iwl_cfg *cfg = (struct iwl_cfg *)(ent->driver_data); const struct iwl_cfg *cfg_7265d __maybe_unused = NULL; - const struct iwl_cfg *cfg_9260lc __maybe_unused = NULL; struct iwl_trans *iwl_trans; int ret; @@ -637,11 +651,10 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } if (iwl_trans->cfg->rf_id) { - if (cfg == &iwl9260_2ac_cfg) - cfg_9260lc = &iwl9260lc_2ac_cfg; - if (cfg_9260lc && iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { - cfg = cfg_9260lc; - iwl_trans->cfg = cfg_9260lc; + if (cfg == &iwl9460_2ac_cfg && + iwl_trans->hw_rf_id == CSR_HW_RF_ID_TYPE_LC) { + cfg = &iwl9000lc_2ac_cfg; + iwl_trans->cfg = cfg; } } #endif diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h index 11e347dd44c7..cac6d99012b3 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h @@ -37,6 +37,7 @@ #include #include #include +#include #include "iwl-fh.h" #include "iwl-csr.h" @@ -49,7 +50,7 @@ * be needed for potential data in the SKB's head. The remaining ones can * be used for frags. */ -#define IWL_PCIE_MAX_FRAGS (IWL_NUM_OF_TBS - 3) +#define IWL_PCIE_MAX_FRAGS(x) (x->max_tbs - 3) /* * RX related structures and functions @@ -192,41 +193,9 @@ struct iwl_cmd_meta { /* only for SYNC commands, iff the reply skb is wanted */ struct iwl_host_cmd *source; u32 flags; + u32 tbs; }; -/* - * Generic queue structure - * - * Contains common data for Rx and Tx queues. - * - * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware - * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless - * there might be HW changes in the future). For the normal TX - * queues, n_window, which is the size of the software queue data - * is also 256; however, for the command queue, n_window is only - * 32 since we don't need so many commands pending. Since the HW - * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. As a result, - * the software buffers (in the variables @meta, @txb in struct - * iwl_txq) only have 32 entries, while the HW buffers (@tfds in - * the same struct) have 256. - * This means that we end up with the following: - * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | - * SW entries: | 0 | ... | 31 | - * where N is a number between 0 and 7. This means that the SW - * data is a window overlayed over the HW queue. - */ -struct iwl_queue { - int write_ptr; /* 1-st empty entry (index) host_w*/ - int read_ptr; /* last used entry (index) host_r*/ - /* use for monitoring and recovering the stuck queue */ - dma_addr_t dma_addr; /* physical addr for BD's */ - int n_window; /* safe queue window */ - u32 id; - int low_mark; /* low watermark, resume queue if free - * space more than this */ - int high_mark; /* high watermark, stop queue if free - * space less than this */ -}; #define TFD_TX_CMD_SLOTS 256 #define TFD_CMD_SLOTS 32 @@ -273,13 +242,32 @@ struct iwl_pcie_first_tb_buf { * @wd_timeout: queue watchdog timeout (jiffies) - per queue * @frozen: tx stuck queue timer is frozen * @frozen_expiry_remainder: remember how long until the timer fires + * @write_ptr: 1-st empty entry (index) host_w + * @read_ptr: last used entry (index) host_r + * @dma_addr: physical addr for BD's + * @n_window: safe queue window + * @id: queue id + * @low_mark: low watermark, resume queue if free space more than this + * @high_mark: high watermark, stop queue if free space less than this * * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame * descriptors) and required locking structures. + * + * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware + * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless + * there might be HW changes in the future). For the normal TX + * queues, n_window, which is the size of the software queue data + * is also 256; however, for the command queue, n_window is only + * 32 since we don't need so many commands pending. Since the HW + * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256. + * This means that we end up with the following: + * HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 | + * SW entries: | 0 | ... | 31 | + * where N is a number between 0 and 7. This means that the SW + * data is a window overlayed over the HW queue. */ struct iwl_txq { - struct iwl_queue q; - struct iwl_tfd *tfds; + void *tfds; struct iwl_pcie_first_tb_buf *first_tb_bufs; dma_addr_t first_tb_dma; struct iwl_pcie_txq_entry *entries; @@ -294,6 +282,14 @@ struct iwl_txq { bool block; unsigned long wd_timeout; struct sk_buff_head overflow_q; + + int write_ptr; + int read_ptr; + dma_addr_t dma_addr; + int n_window; + u32 id; + int low_mark; + int high_mark; }; static inline dma_addr_t @@ -308,6 +304,16 @@ struct iwl_tso_hdr_page { u8 *pos; }; +/** + * enum iwl_shared_irq_flags - level of sharing for irq + * @IWL_SHARED_IRQ_NON_RX: interrupt vector serves non rx causes. + * @IWL_SHARED_IRQ_FIRST_RSS: interrupt vector serves first RSS queue. + */ +enum iwl_shared_irq_flags { + IWL_SHARED_IRQ_NON_RX = BIT(0), + IWL_SHARED_IRQ_FIRST_RSS = BIT(1), +}; + /** * struct iwl_trans_pcie - PCIe transport specific data * @rxq: all the RX queue data @@ -326,7 +332,6 @@ struct iwl_tso_hdr_page { * @rx_buf_size: Rx buffer size * @bc_table_dword: true if the BC table expects DWORD (as opposed to bytes) * @scd_set_active: should the transport configure the SCD for HCMD queue - * @wide_cmd_header: true when ucode supports wide command header format * @sw_csum_tx: if true, then the transport will compute the csum of the TXed * frame. * @rx_page_order: page order for receive buffer size @@ -338,8 +343,10 @@ struct iwl_tso_hdr_page { * @fw_mon_size: size of the buffer for the firmware monitor * @msix_entries: array of MSI-X entries * @msix_enabled: true if managed to enable MSI-X - * @allocated_vector: the number of interrupt vector allocated by the OS - * @default_irq_num: default irq for non rx interrupt + * @shared_vec_mask: the type of causes the shared vector handles + * (see iwl_shared_irq_flags). + * @alloc_vecs: the number of interrupt vectors allocated by the OS + * @def_irq: default irq for non rx causes * @fh_init_mask: initial unmasked fh causes * @hw_init_mask: initial unmasked hw causes * @fh_mask: current unmasked fh causes @@ -391,11 +398,12 @@ struct iwl_trans_pcie { unsigned int cmd_q_wdg_timeout; u8 n_no_reclaim_cmds; u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS]; + u8 max_tbs; + u16 tfd_size; enum iwl_amsdu_size rx_buf_size; bool bc_table_dword; bool scd_set_active; - bool wide_cmd_header; bool sw_csum_tx; u32 rx_page_order; @@ -410,12 +418,14 @@ struct iwl_trans_pcie { struct msix_entry msix_entries[IWL_MAX_RX_HW_QUEUES]; bool msix_enabled; - u32 allocated_vector; - u32 default_irq_num; + u8 shared_vec_mask; + u32 alloc_vecs; + u32 def_irq; u32 fh_init_mask; u32 hw_init_mask; u32 fh_mask; u32 hw_mask; + cpumask_t affinity_mask[IWL_MAX_RX_HW_QUEUES]; }; static inline struct iwl_trans_pcie * @@ -474,6 +484,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int queue, bool configure_scd); void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, bool shared_mode); +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq); void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq); int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, @@ -486,11 +497,20 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct sk_buff_head *skbs); void iwl_trans_pcie_tx_reset(struct iwl_trans *trans); -static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_tfd *tfd, u8 idx) +static inline u16 iwl_pcie_tfd_tb_get_len(struct iwl_trans *trans, void *_tfd, + u8 idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; - return le16_to_cpu(tb->hi_n_len) >> 4; + return le16_to_cpu(tb->tb_len); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + + return le16_to_cpu(tb->hi_n_len) >> 4; + } } /***************************************************** @@ -617,9 +637,9 @@ static inline void iwl_wake_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (test_and_clear_bit(txq->q.id, trans_pcie->queue_stopped)) { - IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->q.id); - iwl_op_mode_queue_not_full(trans->op_mode, txq->q.id); + if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) { + IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id); + iwl_op_mode_queue_not_full(trans->op_mode, txq->id); } } @@ -628,22 +648,22 @@ static inline void iwl_stop_queue(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - if (!test_and_set_bit(txq->q.id, trans_pcie->queue_stopped)) { - iwl_op_mode_queue_full(trans->op_mode, txq->q.id); - IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->q.id); + if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) { + iwl_op_mode_queue_full(trans->op_mode, txq->id); + IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id); } else IWL_DEBUG_TX_QUEUES(trans, "hwq %d already stopped\n", - txq->q.id); + txq->id); } -static inline bool iwl_queue_used(const struct iwl_queue *q, int i) +static inline bool iwl_queue_used(const struct iwl_txq *q, int i) { return q->write_ptr >= q->read_ptr ? (i >= q->read_ptr && i < q->write_ptr) : !(i < q->read_ptr && i >= q->write_ptr); } -static inline u8 get_cmd_index(struct iwl_queue *q, u32 index) +static inline u8 get_cmd_index(struct iwl_txq *q, u32 index) { return index & (q->n_window - 1); } diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c index 5c36e6d00622..6fe5546dc773 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c @@ -487,15 +487,13 @@ static void iwl_pcie_rx_allocator(struct iwl_trans *trans) while (pending) { int i; - struct list_head local_allocated; + LIST_HEAD(local_allocated); gfp_t gfp_mask = GFP_KERNEL; /* Do not post a warning if there are only a few requests */ if (pending < RX_PENDING_WATERMARK) gfp_mask |= __GFP_NOWARN; - INIT_LIST_HEAD(&local_allocated); - for (i = 0; i < RX_CLAIM_REQ_ALLOC;) { struct iwl_rx_mem_buffer *rxb; struct page *page; @@ -1108,13 +1106,14 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, FH_RSCSR_RXQ_POS != rxq->id); IWL_DEBUG_RX(trans, - "cmd at offset %d: %s (0x%.2x, seq 0x%x)\n", + "cmd at offset %d: %s (%.2x.%2x, seq 0x%x)\n", rxcb._offset, iwl_get_cmd_string(trans, iwl_cmd_id(pkt->hdr.cmd, pkt->hdr.group_id, 0)), - pkt->hdr.cmd, le16_to_cpu(pkt->hdr.sequence)); + pkt->hdr.group_id, pkt->hdr.cmd, + le16_to_cpu(pkt->hdr.sequence)); len = iwl_rx_packet_len(pkt); len += sizeof(u32); /* account for status word */ @@ -1142,7 +1141,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans, sequence = le16_to_cpu(pkt->hdr.sequence); index = SEQ_TO_INDEX(sequence); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); if (rxq->id == 0) iwl_op_mode_rx(trans->op_mode, &rxq->napi, @@ -1885,6 +1884,20 @@ irqreturn_t iwl_pcie_irq_msix_handler(int irq, void *dev_id) inta_fh, iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD)); + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) && + inta_fh & MSIX_FH_INT_CAUSES_Q0) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 0); + local_bh_enable(); + } + + if ((trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) && + inta_fh & MSIX_FH_INT_CAUSES_Q1) { + local_bh_disable(); + iwl_pcie_rx_handle(trans, 1); + local_bh_enable(); + } + /* This "Tx" DMA channel is used only for loading uCode */ if (inta_fh & MSIX_FH_INT_CAUSES_D2S_CH0_NUM) { IWL_DEBUG_ISR(trans, "uCode load interrupt\n"); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c index 74f2f035bd28..ae95533e587d 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c @@ -827,10 +827,16 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, if (ret) return ret; - /* Notify the ucode of the loaded section number and status */ - val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); - val = val | (sec_num << shift_param); - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + /* Notify ucode of loaded section number and status */ + if (trans->cfg->use_tfh) { + val = iwl_read_prph(trans, UREG_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, val); + } else { + val = iwl_read_direct32(trans, FH_UCODE_LOAD_STATUS); + val = val | (sec_num << shift_param); + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, val); + } sec_num = (sec_num << 1) | 0x1; } @@ -838,10 +844,21 @@ static int iwl_pcie_load_cpu_sections_8000(struct iwl_trans *trans, iwl_enable_interrupts(trans); - if (cpu == 1) - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFF); - else - iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, 0xFFFFFFFF); + if (trans->cfg->use_tfh) { + if (cpu == 1) + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_prph(trans, UREG_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } else { + if (cpu == 1) + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFF); + else + iwl_write_direct32(trans, FH_UCODE_LOAD_STATUS, + 0xFFFFFFFF); + } return 0; } @@ -886,14 +903,6 @@ static int iwl_pcie_load_cpu_sections(struct iwl_trans *trans, return ret; } - if (trans->cfg->device_family == IWL_DEVICE_FAMILY_8000) - iwl_set_bits_prph(trans, - CSR_UCODE_LOAD_STATUS_ADDR, - (LMPM_CPU_UCODE_LOADING_COMPLETED | - LMPM_CPU_HDRS_LOADING_COMPLETED | - LMPM_CPU_UCODE_LOADING_STARTED) << - shift_param); - *first_ucode_section = last_read_idx; return 0; @@ -1161,7 +1170,7 @@ static void iwl_pcie_synchronize_irqs(struct iwl_trans *trans) if (trans_pcie->msix_enabled) { int i; - for (i = 0; i < trans_pcie->allocated_vector; i++) + for (i = 0; i < trans_pcie->alloc_vecs; i++) synchronize_irq(trans_pcie->msix_entries[i].vector); } else { synchronize_irq(trans_pcie->pci_dev->irq); @@ -1420,13 +1429,58 @@ static struct iwl_causes_list causes_list[] = { {MSIX_HW_INT_CAUSES_REG_HAP, CSR_MSIX_HW_INT_MASK_AD, 0x2E}, }; +static void iwl_pcie_map_non_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int val = trans_pcie->def_irq | MSIX_NON_AUTO_CLEAR_CAUSE; + int i; + + /* + * Access all non RX causes and map them to the default irq. + * In case we are missing at least one interrupt vector, + * the first interrupt vector will serve non-RX and FBQ causes. + */ + for (i = 0; i < ARRAY_SIZE(causes_list); i++) { + iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); + iwl_clear_bit(trans, causes_list[i].mask_reg, + causes_list[i].cause_num); + } +} + +static void iwl_pcie_map_rx_causes(struct iwl_trans *trans) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + u32 offset = + trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 1 : 0; + u32 val, idx; + + /* + * The first RX queue - fallback queue, which is designated for + * management frame, command responses etc, is always mapped to the + * first interrupt vector. The other RX queues are mapped to + * the other (N - 2) interrupt vectors. + */ + val = BIT(MSIX_FH_INT_CAUSES_Q(0)); + for (idx = 1; idx < trans->num_rx_queues; idx++) { + iwl_write8(trans, CSR_MSIX_RX_IVAR(idx), + MSIX_FH_INT_CAUSES_Q(idx - offset)); + val |= BIT(MSIX_FH_INT_CAUSES_Q(idx)); + } + iwl_write32(trans, CSR_MSIX_FH_INT_MASK_AD, ~val); + + val = MSIX_FH_INT_CAUSES_Q(0); + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_NON_RX) + val |= MSIX_NON_AUTO_CLEAR_CAUSE; + iwl_write8(trans, CSR_MSIX_RX_IVAR(0), val); + + if (trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS) + iwl_write8(trans, CSR_MSIX_RX_IVAR(1), val); +} + static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) { - u32 val, max_rx_vector, i; struct iwl_trans *trans = trans_pcie->trans; - max_rx_vector = trans_pcie->allocated_vector - 1; - if (!trans_pcie->msix_enabled) { if (trans->cfg->mq_rx_supported) iwl_write_prph(trans, UREG_CHICK, @@ -1437,25 +1491,16 @@ static void iwl_pcie_init_msix(struct iwl_trans_pcie *trans_pcie) iwl_write_prph(trans, UREG_CHICK, UREG_CHICK_MSIX_ENABLE); /* - * Each cause from the list above and the RX causes is represented as - * a byte in the IVAR table. We access the first (N - 1) bytes and map - * them to the (N - 1) vectors so these vectors will be used as rx - * vectors. Then access all non rx causes and map them to the - * default queue (N'th queue). + * Each cause from the causes list above and the RX causes is + * represented as a byte in the IVAR table. The first nibble + * represents the bound interrupt vector of the cause, the second + * represents no auto clear for this cause. This will be set if its + * interrupt vector is bound to serve other causes. */ - for (i = 0; i < max_rx_vector; i++) { - iwl_write8(trans, CSR_MSIX_RX_IVAR(i), MSIX_FH_INT_CAUSES_Q(i)); - iwl_clear_bit(trans, CSR_MSIX_FH_INT_MASK_AD, - BIT(MSIX_FH_INT_CAUSES_Q(i))); - } + iwl_pcie_map_rx_causes(trans); + + iwl_pcie_map_non_rx_causes(trans); - for (i = 0; i < ARRAY_SIZE(causes_list); i++) { - val = trans_pcie->default_irq_num | - MSIX_NON_AUTO_CLEAR_CAUSE; - iwl_write8(trans, CSR_MSIX_IVAR(causes_list[i].addr), val); - iwl_clear_bit(trans, causes_list[i].mask_reg, - causes_list[i].cause_num); - } trans_pcie->fh_init_mask = ~iwl_read32(trans, CSR_MSIX_FH_INT_MASK_AD); trans_pcie->fh_mask = trans_pcie->fh_init_mask; @@ -1468,40 +1513,55 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, struct iwl_trans *trans) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int max_irqs, num_irqs, i, ret, nr_online_cpus; u16 pci_cmd; - int max_vector; - int ret, i; - if (trans->cfg->mq_rx_supported) { - max_vector = min_t(u32, (num_possible_cpus() + 2), - IWL_MAX_RX_HW_QUEUES); - for (i = 0; i < max_vector; i++) - trans_pcie->msix_entries[i].entry = i; + if (!trans->cfg->mq_rx_supported) + goto enable_msi; - ret = pci_enable_msix_range(pdev, trans_pcie->msix_entries, - MSIX_MIN_INTERRUPT_VECTORS, - max_vector); - if (ret > 1) { - IWL_DEBUG_INFO(trans, - "Enable MSI-X allocate %d interrupt vector\n", - ret); - trans_pcie->allocated_vector = ret; - trans_pcie->default_irq_num = - trans_pcie->allocated_vector - 1; - trans_pcie->trans->num_rx_queues = - trans_pcie->allocated_vector - 1; - trans_pcie->msix_enabled = true; + nr_online_cpus = num_online_cpus(); + max_irqs = min_t(u32, nr_online_cpus + 2, IWL_MAX_RX_HW_QUEUES); + for (i = 0; i < max_irqs; i++) + trans_pcie->msix_entries[i].entry = i; - return; - } + num_irqs = pci_enable_msix_range(pdev, trans_pcie->msix_entries, + MSIX_MIN_INTERRUPT_VECTORS, + max_irqs); + if (num_irqs < 0) { IWL_DEBUG_INFO(trans, - "ret = %d %s move to msi mode\n", ret, - (ret == 1) ? - "can't allocate more than 1 interrupt vector" : - "failed to enable msi-x mode"); - pci_disable_msix(pdev); + "Failed to enable msi-x mode (ret %d). Moving to msi mode.\n", + num_irqs); + goto enable_msi; + } + trans_pcie->def_irq = (num_irqs == max_irqs) ? num_irqs - 1 : 0; + + IWL_DEBUG_INFO(trans, + "MSI-X enabled. %d interrupt vectors were allocated\n", + num_irqs); + + /* + * In case the OS provides fewer interrupts than requested, different + * causes will share the same interrupt vector as follows: + * One interrupt less: non rx causes shared with FBQ. + * Two interrupts less: non rx causes shared with FBQ and RSS. + * More than two interrupts: we will use fewer RSS queues. + */ + if (num_irqs <= nr_online_cpus) { + trans_pcie->trans->num_rx_queues = num_irqs + 1; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX | + IWL_SHARED_IRQ_FIRST_RSS; + } else if (num_irqs == nr_online_cpus + 1) { + trans_pcie->trans->num_rx_queues = num_irqs; + trans_pcie->shared_vec_mask = IWL_SHARED_IRQ_NON_RX; + } else { + trans_pcie->trans->num_rx_queues = num_irqs - 1; } + trans_pcie->alloc_vecs = num_irqs; + trans_pcie->msix_enabled = true; + return; + +enable_msi: ret = pci_enable_msi(pdev); if (ret) { dev_err(&pdev->dev, "pci_enable_msi failed - %d\n", ret); @@ -1514,36 +1574,57 @@ static void iwl_pcie_set_interrupt_capa(struct pci_dev *pdev, } } +static void iwl_pcie_irq_set_affinity(struct iwl_trans *trans) +{ + int iter_rx_q, i, ret, cpu, offset; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + i = trans_pcie->shared_vec_mask & IWL_SHARED_IRQ_FIRST_RSS ? 0 : 1; + iter_rx_q = trans_pcie->trans->num_rx_queues - 1 + i; + offset = 1 + i; + for (; i < iter_rx_q ; i++) { + /* + * Get the cpu prior to the place to search + * (i.e. return will be > i - 1). + */ + cpu = cpumask_next(i - offset, cpu_online_mask); + cpumask_set_cpu(cpu, &trans_pcie->affinity_mask[i]); + ret = irq_set_affinity_hint(trans_pcie->msix_entries[i].vector, + &trans_pcie->affinity_mask[i]); + if (ret) + IWL_ERR(trans_pcie->trans, + "Failed to set affinity mask for IRQ %d\n", + i); + } +} + static int iwl_pcie_init_msix_handler(struct pci_dev *pdev, struct iwl_trans_pcie *trans_pcie) { - int i, last_vector; + int i; - last_vector = trans_pcie->trans->num_rx_queues; - - for (i = 0; i < trans_pcie->allocated_vector; i++) { + for (i = 0; i < trans_pcie->alloc_vecs; i++) { int ret; + struct msix_entry *msix_entry; - ret = request_threaded_irq(trans_pcie->msix_entries[i].vector, - iwl_pcie_msix_isr, - (i == last_vector) ? - iwl_pcie_irq_msix_handler : - iwl_pcie_irq_rx_msix_handler, - IRQF_SHARED, - DRV_NAME, - &trans_pcie->msix_entries[i]); + msix_entry = &trans_pcie->msix_entries[i]; + ret = devm_request_threaded_irq(&pdev->dev, + msix_entry->vector, + iwl_pcie_msix_isr, + (i == trans_pcie->def_irq) ? + iwl_pcie_irq_msix_handler : + iwl_pcie_irq_rx_msix_handler, + IRQF_SHARED, + DRV_NAME, + msix_entry); if (ret) { - int j; - IWL_ERR(trans_pcie->trans, "Error allocating IRQ %d\n", i); - for (j = 0; j < i; j++) - free_irq(trans_pcie->msix_entries[j].vector, - &trans_pcie->msix_entries[j]); - pci_disable_msix(pdev); + return ret; } } + iwl_pcie_irq_set_affinity(trans_pcie->trans); return 0; } @@ -1672,7 +1753,6 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans, trans_pcie->rx_page_order = iwl_trans_get_rb_size_order(trans_pcie->rx_buf_size); - trans_pcie->wide_cmd_header = trans_cfg->wide_cmd_header; trans_pcie->bc_table_dword = trans_cfg->bc_table_dword; trans_pcie->scd_set_active = trans_cfg->scd_set_active; trans_pcie->sw_csum_tx = trans_cfg->sw_csum_tx; @@ -1703,22 +1783,16 @@ void iwl_trans_pcie_free(struct iwl_trans *trans) iwl_pcie_rx_free(trans); if (trans_pcie->msix_enabled) { - for (i = 0; i < trans_pcie->allocated_vector; i++) - free_irq(trans_pcie->msix_entries[i].vector, - &trans_pcie->msix_entries[i]); + for (i = 0; i < trans_pcie->alloc_vecs; i++) { + irq_set_affinity_hint( + trans_pcie->msix_entries[i].vector, + NULL); + } - pci_disable_msix(trans_pcie->pci_dev); trans_pcie->msix_enabled = false; } else { - free_irq(trans_pcie->pci_dev->irq, trans); - iwl_pcie_free_ict(trans); - - pci_disable_msi(trans_pcie->pci_dev); } - iounmap(trans_pcie->hw_base); - pci_release_regions(trans_pcie->pci_dev); - pci_disable_device(trans_pcie->pci_dev); iwl_pcie_free_fw_monitor(trans); @@ -1890,7 +1964,7 @@ static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans, txq->frozen = freeze; - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) goto next_queue; if (freeze) { @@ -1938,7 +2012,7 @@ static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block) txq->block--; if (!txq->block) { iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (i << 8)); + txq->write_ptr | (i << 8)); } } else if (block) { txq->block++; @@ -1958,10 +2032,14 @@ void iwl_trans_pcie_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq) int cnt; IWL_ERR(trans, "Current SW read_ptr %d write_ptr %d\n", - txq->q.read_ptr, txq->q.write_ptr); + txq->read_ptr, txq->write_ptr); + + if (trans->cfg->use_tfh) + /* TODO: access new SCD registers and dump them */ + return; scd_sram_addr = trans_pcie->scd_base_addr + - SCD_TX_STTS_QUEUE_OFFSET(txq->q.id); + SCD_TX_STTS_QUEUE_OFFSET(txq->id); iwl_trans_read_mem_bytes(trans, scd_sram_addr, buf, sizeof(buf)); iwl_print_hex_error(trans, buf, sizeof(buf)); @@ -1996,7 +2074,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; int cnt; unsigned long now = jiffies; int ret = 0; @@ -2014,13 +2091,12 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", cnt); txq = &trans_pcie->txq[cnt]; - q = &txq->q; - wr_ptr = ACCESS_ONCE(q->write_ptr); + wr_ptr = ACCESS_ONCE(txq->write_ptr); - while (q->read_ptr != ACCESS_ONCE(q->write_ptr) && + while (txq->read_ptr != ACCESS_ONCE(txq->write_ptr) && !time_after(jiffies, now + msecs_to_jiffies(IWL_FLUSH_WAIT_MS))) { - u8 write_ptr = ACCESS_ONCE(q->write_ptr); + u8 write_ptr = ACCESS_ONCE(txq->write_ptr); if (WARN_ONCE(wr_ptr != write_ptr, "WR pointer moved while flushing %d -> %d\n", @@ -2029,7 +2105,7 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, u32 txq_bm) usleep_range(1000, 2000); } - if (q->read_ptr != q->write_ptr) { + if (txq->read_ptr != txq->write_ptr) { IWL_ERR(trans, "fail to flush all tx fifo queues Q %d\n", cnt); ret = -ETIMEDOUT; @@ -2197,7 +2273,6 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, struct iwl_trans *trans = file->private_data; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq; - struct iwl_queue *q; char *buf; int pos = 0; int cnt; @@ -2215,10 +2290,9 @@ static ssize_t iwl_dbgfs_tx_queue_read(struct file *file, for (cnt = 0; cnt < trans->cfg->base_params->num_of_queues; cnt++) { txq = &trans_pcie->txq[cnt]; - q = &txq->q; pos += scnprintf(buf + pos, bufsz - pos, "hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n", - cnt, q->read_ptr, q->write_ptr, + cnt, txq->read_ptr, txq->write_ptr, !!test_bit(cnt, trans_pcie->queue_used), !!test_bit(cnt, trans_pcie->queue_stopped), txq->need_update, txq->frozen, @@ -2424,13 +2498,14 @@ err: } #endif /*CONFIG_IWLWIFI_DEBUGFS */ -static u32 iwl_trans_pcie_get_cmdlen(struct iwl_tfd *tfd) +static u32 iwl_trans_pcie_get_cmdlen(struct iwl_trans *trans, void *tfd) { + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 cmdlen = 0; int i; - for (i = 0; i < IWL_NUM_OF_TBS; i++) - cmdlen += iwl_pcie_tfd_tb_get_len(tfd, i); + for (i = 0; i < trans_pcie->max_tbs; i++) + cmdlen += iwl_pcie_tfd_tb_get_len(trans, tfd, i); return cmdlen; } @@ -2645,7 +2720,7 @@ static struct iwl_trans_dump_data /* host commands */ len += sizeof(*data) + - cmdq->q.n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); + cmdq->n_window * (sizeof(*txcmd) + TFD_MAX_PAYLOAD_SIZE); /* FW monitor */ if (trans_pcie->fw_mon_page) { @@ -2713,12 +2788,13 @@ static struct iwl_trans_dump_data data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_TXCMD); txcmd = (void *)data->data; spin_lock_bh(&cmdq->lock); - ptr = cmdq->q.write_ptr; - for (i = 0; i < cmdq->q.n_window; i++) { - u8 idx = get_cmd_index(&cmdq->q, ptr); + ptr = cmdq->write_ptr; + for (i = 0; i < cmdq->n_window; i++) { + u8 idx = get_cmd_index(cmdq, ptr); u32 caplen, cmdlen; - cmdlen = iwl_trans_pcie_get_cmdlen(&cmdq->tfds[ptr]); + cmdlen = iwl_trans_pcie_get_cmdlen(trans, cmdq->tfds + + trans_pcie->tfd_size * ptr); caplen = min_t(u32, TFD_MAX_PAYLOAD_SIZE, cmdlen); if (cmdlen) { @@ -2788,6 +2864,8 @@ static const struct iwl_trans_ops trans_ops_pcie = { .txq_disable = iwl_trans_pcie_txq_disable, .txq_enable = iwl_trans_pcie_txq_enable, + .get_txq_byte_table = iwl_trans_pcie_get_txq_byte_table, + .txq_set_shared_mode = iwl_trans_pcie_txq_set_shared_mode, .wait_tx_queue_empty = iwl_trans_pcie_wait_txq_empty, @@ -2821,13 +2899,15 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, struct iwl_trans *trans; int ret, addr_size; + ret = pcim_enable_device(pdev); + if (ret) + return ERR_PTR(ret); + trans = iwl_trans_alloc(sizeof(struct iwl_trans_pcie), &pdev->dev, cfg, &trans_ops_pcie, 0); if (!trans) return ERR_PTR(-ENOMEM); - trans->max_skb_frags = IWL_PCIE_MAX_FRAGS; - trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); trans_pcie->trans = trans; @@ -2841,9 +2921,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, goto out_no_pci; } - ret = pci_enable_device(pdev); - if (ret) - goto out_no_pci; if (!cfg->base_params->pcie_l1_allowed) { /* @@ -2861,6 +2938,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, else addr_size = 36; + if (cfg->use_tfh) { + trans_pcie->max_tbs = IWL_TFH_NUM_TBS; + trans_pcie->tfd_size = sizeof(struct iwl_tfh_tfd); + + } else { + trans_pcie->max_tbs = IWL_NUM_OF_TBS; + trans_pcie->tfd_size = sizeof(struct iwl_tfd); + } + trans->max_skb_frags = IWL_PCIE_MAX_FRAGS(trans_pcie); + pci_set_master(pdev); ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(addr_size)); @@ -2875,21 +2962,21 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, /* both attempts failed: */ if (ret) { dev_err(&pdev->dev, "No suitable DMA available\n"); - goto out_pci_disable_device; + goto out_no_pci; } } - ret = pci_request_regions(pdev, DRV_NAME); + ret = pcim_iomap_regions_request_all(pdev, BIT(0), DRV_NAME); if (ret) { - dev_err(&pdev->dev, "pci_request_regions failed\n"); - goto out_pci_disable_device; + dev_err(&pdev->dev, "pcim_iomap_regions_request_all failed\n"); + goto out_no_pci; } - trans_pcie->hw_base = pci_ioremap_bar(pdev, 0); + trans_pcie->hw_base = pcim_iomap_table(pdev)[0]; if (!trans_pcie->hw_base) { - dev_err(&pdev->dev, "pci_ioremap_bar failed\n"); + dev_err(&pdev->dev, "pcim_iomap_table failed\n"); ret = -ENODEV; - goto out_pci_release_regions; + goto out_no_pci; } /* We disable the RETRY_TIMEOUT register (0x41) to keep @@ -2916,7 +3003,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, ret = iwl_pcie_prepare_card_hw(trans); if (ret) { IWL_WARN(trans, "Exit HW not ready\n"); - goto out_pci_disable_msi; + goto out_no_pci; } /* @@ -2933,7 +3020,7 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, 25000); if (ret < 0) { IWL_DEBUG_INFO(trans, "Failed to wake up the nic\n"); - goto out_pci_disable_msi; + goto out_no_pci; } if (iwl_trans_grab_nic_access(trans, &flags)) { @@ -2965,15 +3052,16 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, if (trans_pcie->msix_enabled) { if (iwl_pcie_init_msix_handler(pdev, trans_pcie)) - goto out_pci_release_regions; + goto out_no_pci; } else { ret = iwl_pcie_alloc_ict(trans); if (ret) - goto out_pci_disable_msi; + goto out_no_pci; - ret = request_threaded_irq(pdev->irq, iwl_pcie_isr, - iwl_pcie_irq_handler, - IRQF_SHARED, DRV_NAME, trans); + ret = devm_request_threaded_irq(&pdev->dev, pdev->irq, + iwl_pcie_isr, + iwl_pcie_irq_handler, + IRQF_SHARED, DRV_NAME, trans); if (ret) { IWL_ERR(trans, "Error allocating IRQ %d\n", pdev->irq); goto out_free_ict; @@ -2991,12 +3079,6 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev, out_free_ict: iwl_pcie_free_ict(trans); -out_pci_disable_msi: - pci_disable_msi(pdev); -out_pci_release_regions: - pci_release_regions(pdev); -out_pci_disable_device: - pci_disable_device(pdev); out_no_pci: free_percpu(trans_pcie->tso_hdr_page); iwl_trans_free(trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c index 18650dccdb58..e9a278b60dfd 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c @@ -71,7 +71,7 @@ * ***************************************************/ -static int iwl_queue_space(const struct iwl_queue *q) +static int iwl_queue_space(const struct iwl_txq *q) { unsigned int max; unsigned int used; @@ -102,7 +102,7 @@ static int iwl_queue_space(const struct iwl_queue *q) /* * iwl_queue_init - Initialize queue's high/low-water and read/write indexes */ -static int iwl_queue_init(struct iwl_queue *q, int slots_num, u32 id) +static int iwl_queue_init(struct iwl_txq *q, int slots_num, u32 id) { q->n_window = slots_num; q->id = id; @@ -158,13 +158,13 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) spin_lock(&txq->lock); /* check if triggered erroneously */ - if (txq->q.read_ptr == txq->q.write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_unlock(&txq->lock); return; } spin_unlock(&txq->lock); - IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->q.id, + IWL_ERR(trans, "Queue %d stuck for %u ms.\n", txq->id, jiffies_to_msecs(txq->wd_timeout)); iwl_trans_pcie_log_scd_error(trans, txq); @@ -176,22 +176,21 @@ static void iwl_pcie_txq_stuck_timer(unsigned long data) * iwl_pcie_txq_update_byte_cnt_tbl - Set up entry in Tx byte-count array */ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, - struct iwl_txq *txq, u16 byte_cnt) + struct iwl_txq *txq, u16 byte_cnt, + int num_tbs) { struct iwlagn_scd_bc_tbl *scd_bc_tbl; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - int write_ptr = txq->q.write_ptr; - int txq_id = txq->q.id; + int write_ptr = txq->write_ptr; + int txq_id = txq->id; u8 sec_ctl = 0; - u8 sta_id = 0; u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *) txq->entries[txq->q.write_ptr].cmd->payload; + (void *)txq->entries[txq->write_ptr].cmd->payload; scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - sta_id = tx_cmd->sta_id; sec_ctl = tx_cmd->sec_ctl; switch (sec_ctl & TX_CMD_SEC_MSK) { @@ -205,14 +204,32 @@ static void iwl_pcie_txq_update_byte_cnt_tbl(struct iwl_trans *trans, len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN; break; } - if (trans_pcie->bc_table_dword) len = DIV_ROUND_UP(len, 4); if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX)) return; - bc_ent = cpu_to_le16(len | (sta_id << 12)); + if (trans->cfg->use_tfh) { + u8 filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) + + num_tbs * sizeof(struct iwl_tfh_tb); + /* + * filled_tfd_size contains the number of filled bytes in the + * TFD. + * Dividing it by 64 will give the number of chunks to fetch + * to SRAM- 0 for one chunk, 1 for 2 and so on. + * If, for example, TFD contains only 3 TBs then 32 bytes + * of the TFD are used, and only one chunk of 64 bytes should + * be fetched + */ + u8 num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1; + + bc_ent = cpu_to_le16(len | (num_fetch_chunks << 12)); + } else { + u8 sta_id = tx_cmd->sta_id; + + bc_ent = cpu_to_le16(len | (sta_id << 12)); + } scd_bc_tbl[txq_id].tfd_offset[write_ptr] = bc_ent; @@ -227,12 +244,12 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwlagn_scd_bc_tbl *scd_bc_tbl = trans_pcie->scd_bc_tbls.addr; - int txq_id = txq->q.id; - int read_ptr = txq->q.read_ptr; + int txq_id = txq->id; + int read_ptr = txq->read_ptr; u8 sta_id = 0; __le16 bc_ent; struct iwl_tx_cmd *tx_cmd = - (void *)txq->entries[txq->q.read_ptr].cmd->payload; + (void *)txq->entries[read_ptr].cmd->payload; WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX); @@ -240,6 +257,7 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans, sta_id = tx_cmd->sta_id; bc_ent = cpu_to_le16(1 | (sta_id << 12)); + scd_bc_tbl[txq_id].tfd_offset[read_ptr] = bc_ent; if (read_ptr < TFD_QUEUE_SIZE_BC_DUP) @@ -255,7 +273,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u32 reg = 0; - int txq_id = txq->q.id; + int txq_id = txq->id; lockdep_assert_held(&txq->lock); @@ -289,10 +307,10 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans, * if not in power-save mode, uCode will never sleep when we're * trying to tx (during RFKILL, we're not trying to tx). */ - IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->q.write_ptr); + IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr); if (!txq->block) iwl_write32(trans, HBUS_TARG_WRPTR, - txq->q.write_ptr | (txq_id << 8)); + txq->write_ptr | (txq_id << 8)); } void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) @@ -312,49 +330,93 @@ void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans) } } -static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx) +static inline void *iwl_pcie_get_tfd(struct iwl_trans_pcie *trans_pcie, + struct iwl_txq *txq, int idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; - - dma_addr_t addr = get_unaligned_le32(&tb->lo); - if (sizeof(dma_addr_t) > sizeof(u32)) - addr |= - ((dma_addr_t)(le16_to_cpu(tb->hi_n_len) & 0xF) << 16) << 16; - - return addr; + return txq->tfds + trans_pcie->tfd_size * idx; } -static inline void iwl_pcie_tfd_set_tb(struct iwl_tfd *tfd, u8 idx, - dma_addr_t addr, u16 len) +static inline dma_addr_t iwl_pcie_tfd_tb_get_addr(struct iwl_trans *trans, + void *_tfd, u8 idx) { - struct iwl_tfd_tb *tb = &tfd->tbs[idx]; - u16 hi_n_len = len << 4; - put_unaligned_le32(addr, &tb->lo); - if (sizeof(dma_addr_t) > sizeof(u32)) - hi_n_len |= ((addr >> 16) >> 16) & 0xF; + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd = _tfd; + struct iwl_tfh_tb *tb = &tfd->tbs[idx]; - tb->hi_n_len = cpu_to_le16(hi_n_len); + return (dma_addr_t)(le64_to_cpu(tb->addr)); + } else { + struct iwl_tfd *tfd = _tfd; + struct iwl_tfd_tb *tb = &tfd->tbs[idx]; + dma_addr_t addr = get_unaligned_le32(&tb->lo); + dma_addr_t hi_len; - tfd->num_tbs = idx + 1; + if (sizeof(dma_addr_t) <= sizeof(u32)) + return addr; + + hi_len = le16_to_cpu(tb->hi_n_len) & 0xF; + + /* + * shift by 16 twice to avoid warnings on 32-bit + * (where this code never runs anyway due to the + * if statement above) + */ + return addr | ((hi_len << 16) << 16); + } } -static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_tfd *tfd) +static inline void iwl_pcie_tfd_set_tb(struct iwl_trans *trans, void *tfd, + u8 idx, dma_addr_t addr, u16 len) { - return tfd->num_tbs & 0x1f; + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + struct iwl_tfh_tb *tb = &tfd_fh->tbs[idx]; + + put_unaligned_le64(addr, &tb->addr); + tb->tb_len = cpu_to_le16(len); + + tfd_fh->num_tbs = cpu_to_le16(idx + 1); + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + struct iwl_tfd_tb *tb = &tfd_fh->tbs[idx]; + + u16 hi_n_len = len << 4; + + put_unaligned_le32(addr, &tb->lo); + if (sizeof(dma_addr_t) > sizeof(u32)) + hi_n_len |= ((addr >> 16) >> 16) & 0xF; + + tb->hi_n_len = cpu_to_le16(hi_n_len); + + tfd_fh->num_tbs = idx + 1; + } +} + +static inline u8 iwl_pcie_tfd_get_num_tbs(struct iwl_trans *trans, void *_tfd) +{ + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd = _tfd; + + return le16_to_cpu(tfd->num_tbs) & 0x1f; + } else { + struct iwl_tfd *tfd = _tfd; + + return tfd->num_tbs & 0x1f; + } } static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, struct iwl_cmd_meta *meta, - struct iwl_tfd *tfd) + struct iwl_txq *txq, int index) { - int i; - int num_tbs; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + int i, num_tbs; + void *tfd = iwl_pcie_get_tfd(trans_pcie, txq, index); /* Sanity check on number of chunks */ - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - if (num_tbs >= IWL_NUM_OF_TBS) { + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Too many chunks: %i\n", num_tbs); /* @todo issue fatal error, it is quite serious situation */ return; @@ -363,18 +425,30 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, /* first TB is never freed - it's the bidirectional DMA data */ for (i = 1; i < num_tbs; i++) { - if (meta->flags & BIT(i + CMD_TB_BITMAP_POS)) + if (meta->tbs & BIT(i)) dma_unmap_page(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, i), + iwl_pcie_tfd_tb_get_len(trans, tfd, i), DMA_TO_DEVICE); else dma_unmap_single(trans->dev, - iwl_pcie_tfd_tb_get_addr(tfd, i), - iwl_pcie_tfd_tb_get_len(tfd, i), + iwl_pcie_tfd_tb_get_addr(trans, tfd, + i), + iwl_pcie_tfd_tb_get_len(trans, tfd, + i), DMA_TO_DEVICE); } - tfd->num_tbs = 0; + + if (trans->cfg->use_tfh) { + struct iwl_tfh_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } else { + struct iwl_tfd *tfd_fh = (void *)tfd; + + tfd_fh->num_tbs = 0; + } + } /* @@ -388,20 +462,18 @@ static void iwl_pcie_tfd_unmap(struct iwl_trans *trans, */ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) { - struct iwl_tfd *tfd_tmp = txq->tfds; - /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and * idx is bounded by n_window */ - int rd_ptr = txq->q.read_ptr; - int idx = get_cmd_index(&txq->q, rd_ptr); + int rd_ptr = txq->read_ptr; + int idx = get_cmd_index(txq, rd_ptr); lockdep_assert_held(&txq->lock); /* We have only q->n_window txq->entries, but we use * TFD_QUEUE_SIZE_MAX tfds */ - iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, &tfd_tmp[rd_ptr]); + iwl_pcie_tfd_unmap(trans, &txq->entries[idx].meta, txq, rd_ptr); /* free SKB */ if (txq->entries) { @@ -423,23 +495,21 @@ static void iwl_pcie_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq) static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, dma_addr_t addr, u16 len, bool reset) { - struct iwl_queue *q; - struct iwl_tfd *tfd, *tfd_tmp; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + void *tfd; u32 num_tbs; - q = &txq->q; - tfd_tmp = txq->tfds; - tfd = &tfd_tmp[q->write_ptr]; + tfd = txq->tfds + trans_pcie->tfd_size * txq->write_ptr; if (reset) - memset(tfd, 0, sizeof(*tfd)); + memset(tfd, 0, trans_pcie->tfd_size); - num_tbs = iwl_pcie_tfd_get_num_tbs(tfd); + num_tbs = iwl_pcie_tfd_get_num_tbs(trans, tfd); - /* Each TFD can point to a maximum 20 Tx buffers */ - if (num_tbs >= IWL_NUM_OF_TBS) { + /* Each TFD can point to a maximum max_tbs Tx buffers */ + if (num_tbs >= trans_pcie->max_tbs) { IWL_ERR(trans, "Error can not send more than %d chunks\n", - IWL_NUM_OF_TBS); + trans_pcie->max_tbs); return -EINVAL; } @@ -447,7 +517,7 @@ static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq, "Unaligned address = %llx\n", (unsigned long long)addr)) return -EINVAL; - iwl_pcie_tfd_set_tb(tfd, num_tbs, addr, len); + iwl_pcie_tfd_set_tb(trans, tfd, num_tbs, addr, len); return num_tbs; } @@ -457,7 +527,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, u32 txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); - size_t tfd_sz = sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX; + size_t tfd_sz = trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX; size_t tb0_buf_sz; int i; @@ -468,7 +538,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, (unsigned long)txq); txq->trans_pcie = trans_pcie; - txq->q.n_window = slots_num; + txq->n_window = slots_num; txq->entries = kcalloc(slots_num, sizeof(struct iwl_pcie_txq_entry), @@ -489,7 +559,7 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, /* Circular buffer of transmit frame descriptors (TFDs), * shared with device */ txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz, - &txq->q.dma_addr, GFP_KERNEL); + &txq->dma_addr, GFP_KERNEL); if (!txq->tfds) goto error; @@ -503,11 +573,11 @@ static int iwl_pcie_txq_alloc(struct iwl_trans *trans, if (!txq->first_tb_bufs) goto err_free_tfds; - txq->q.id = txq_id; + txq->id = txq_id; return 0; err_free_tfds: - dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->q.dma_addr); + dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr); error: if (txq->entries && txq_id == trans_pcie->cmd_queue) for (i = 0; i < slots_num; i++) @@ -531,7 +601,7 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, BUILD_BUG_ON(TFD_QUEUE_SIZE_MAX & (TFD_QUEUE_SIZE_MAX - 1)); /* Initialize queue's high/low-water marks, and head/tail indexes */ - ret = iwl_queue_init(&txq->q, slots_num, txq_id); + ret = iwl_queue_init(txq, slots_num, txq_id); if (ret) return ret; @@ -545,10 +615,10 @@ static int iwl_pcie_txq_init(struct iwl_trans *trans, struct iwl_txq *txq, if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); return 0; } @@ -595,15 +665,14 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; spin_lock_bh(&txq->lock); - while (q->write_ptr != q->read_ptr) { + while (txq->write_ptr != txq->read_ptr) { IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n", - txq_id, q->read_ptr); + txq_id, txq->read_ptr); if (txq_id != trans_pcie->cmd_queue) { - struct sk_buff *skb = txq->entries[q->read_ptr].skb; + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -611,15 +680,15 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id) iwl_pcie_free_tso_page(trans_pcie, skb); } iwl_pcie_txq_free_tfd(trans, txq); - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr); + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr); - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { unsigned long flags; spin_lock_irqsave(&trans_pcie->reg_lock, flags); if (txq_id != trans_pcie->cmd_queue) { IWL_DEBUG_RPM(trans, "Q %d - last tx freed\n", - q->id); + txq->id); iwl_trans_unref(trans); } else { iwl_pcie_clear_cmd_in_flight(trans); @@ -663,7 +732,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc array of command/tx buffers */ if (txq_id == trans_pcie->cmd_queue) - for (i = 0; i < txq->q.n_window; i++) { + for (i = 0; i < txq->n_window; i++) { kzfree(txq->entries[i].cmd); kzfree(txq->entries[i].free_buf); } @@ -671,13 +740,13 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id) /* De-alloc circular buffer of TFDs */ if (txq->tfds) { dma_free_coherent(dev, - sizeof(struct iwl_tfd) * TFD_QUEUE_SIZE_MAX, - txq->tfds, txq->q.dma_addr); - txq->q.dma_addr = 0; + trans_pcie->tfd_size * TFD_QUEUE_SIZE_MAX, + txq->tfds, txq->dma_addr); + txq->dma_addr = 0; txq->tfds = NULL; dma_free_coherent(dev, - sizeof(*txq->first_tb_bufs) * txq->q.n_window, + sizeof(*txq->first_tb_bufs) * txq->n_window, txq->first_tb_bufs, txq->first_tb_dma); } @@ -703,6 +772,9 @@ void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr) memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped)); memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used)); + if (trans->cfg->use_tfh) + return; + trans_pcie->scd_base_addr = iwl_read_prph(trans, SCD_SRAM_BASE_ADDR); @@ -758,14 +830,14 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans) if (trans->cfg->use_tfh) iwl_write_direct64(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr); + txq->dma_addr); else iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id), - txq->q.dma_addr >> 8); + txq->dma_addr >> 8); iwl_pcie_txq_unmap(trans, txq_id); - txq->q.read_ptr = 0; - txq->q.write_ptr = 0; + txq->read_ptr = 0; + txq->write_ptr = 0; } /* Tell NIC where to find the "keep warm" buffer */ @@ -970,11 +1042,13 @@ int iwl_pcie_tx_init(struct iwl_trans *trans) } } - if (trans->cfg->use_tfh) + if (trans->cfg->use_tfh) { iwl_write_direct32(trans, TFH_TRANSFER_MODE, TFH_TRANSFER_MAX_PENDING_REQ | TFH_CHUNK_SIZE_128 | TFH_CHUNK_SPLIT_MODE); + return 0; + } iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE); if (trans->cfg->base_params->num_of_queues > 20) @@ -1007,7 +1081,7 @@ static inline void iwl_pcie_txq_progress(struct iwl_txq *txq) * if empty delete timer, otherwise move timer forward * since we're making progress on this queue */ - if (txq->q.read_ptr == txq->q.write_ptr) + if (txq->read_ptr == txq->write_ptr) del_timer(&txq->stuck_timer); else mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); @@ -1020,7 +1094,6 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; int tfd_num = ssn & (TFD_QUEUE_SIZE_MAX - 1); - struct iwl_queue *q = &txq->q; int last_to_free; /* This function is not meant to release cmd queue*/ @@ -1035,21 +1108,21 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; } - if (txq->q.read_ptr == tfd_num) + if (txq->read_ptr == tfd_num) goto out; IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d -> %d (%d)\n", - txq_id, txq->q.read_ptr, tfd_num, ssn); + txq_id, txq->read_ptr, tfd_num, ssn); /*Since we free until index _not_ inclusive, the one before index is * the last we will free. This one must be used */ last_to_free = iwl_queue_dec_wrap(tfd_num); - if (!iwl_queue_used(q, last_to_free)) { + if (!iwl_queue_used(txq, last_to_free)) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n", __func__, txq_id, last_to_free, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); goto out; } @@ -1057,9 +1130,9 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, goto out; for (; - q->read_ptr != tfd_num; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { - struct sk_buff *skb = txq->entries[txq->q.read_ptr].skb; + txq->read_ptr != tfd_num; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { + struct sk_buff *skb = txq->entries[txq->read_ptr].skb; if (WARN_ON_ONCE(!skb)) continue; @@ -1068,16 +1141,17 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, __skb_queue_tail(skbs, skb); - txq->entries[txq->q.read_ptr].skb = NULL; + txq->entries[txq->read_ptr].skb = NULL; - iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); + if (!trans->cfg->use_tfh) + iwl_pcie_txq_inval_byte_cnt_tbl(trans, txq); iwl_pcie_txq_free_tfd(trans, txq); } iwl_pcie_txq_progress(txq); - if (iwl_queue_space(&txq->q) > txq->q.low_mark && + if (iwl_queue_space(txq) > txq->low_mark && test_bit(txq_id, trans_pcie->queue_stopped)) { struct sk_buff_head overflow_skbs; @@ -1109,12 +1183,12 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn, } spin_lock_bh(&txq->lock); - if (iwl_queue_space(&txq->q) > txq->q.low_mark) + if (iwl_queue_space(txq) > txq->low_mark) iwl_wake_queue(trans, txq); } - if (q->read_ptr == q->write_ptr) { - IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", q->id); + if (txq->read_ptr == txq->write_ptr) { + IWL_DEBUG_RPM(trans, "Q %d - last tx reclaimed\n", txq->id); iwl_trans_unref(trans); } @@ -1176,31 +1250,30 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx) { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[txq_id]; - struct iwl_queue *q = &txq->q; unsigned long flags; int nfreed = 0; lockdep_assert_held(&txq->lock); - if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(q, idx))) { + if ((idx >= TFD_QUEUE_SIZE_MAX) || (!iwl_queue_used(txq, idx))) { IWL_ERR(trans, "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n", __func__, txq_id, idx, TFD_QUEUE_SIZE_MAX, - q->write_ptr, q->read_ptr); + txq->write_ptr, txq->read_ptr); return; } - for (idx = iwl_queue_inc_wrap(idx); q->read_ptr != idx; - q->read_ptr = iwl_queue_inc_wrap(q->read_ptr)) { + for (idx = iwl_queue_inc_wrap(idx); txq->read_ptr != idx; + txq->read_ptr = iwl_queue_inc_wrap(txq->read_ptr)) { if (nfreed++ > 0) { IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n", - idx, q->write_ptr, q->read_ptr); + idx, txq->write_ptr, txq->read_ptr); iwl_force_nmi(trans); } } - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { spin_lock_irqsave(&trans_pcie->reg_lock, flags); iwl_pcie_clear_cmd_in_flight(trans); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1249,6 +1322,9 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, if (test_and_set_bit(txq_id, trans_pcie->queue_used)) WARN_ONCE(1, "queue %d already used - expect issues", txq_id); + if (cfg && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + txq->wd_timeout = msecs_to_jiffies(wdg_timeout); if (cfg) { @@ -1283,14 +1359,14 @@ void iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn, */ iwl_scd_txq_disable_agg(trans, txq_id); - ssn = txq->q.read_ptr; + ssn = txq->read_ptr; } } /* Place first TFD at index corresponding to start sequence number. * Assumes that ssn_idx is valid (!= 0xFFF) */ - txq->q.read_ptr = (ssn & 0xff); - txq->q.write_ptr = (ssn & 0xff); + txq->read_ptr = (ssn & 0xff); + txq->write_ptr = (ssn & 0xff); iwl_write_direct32(trans, HBUS_TARG_WRPTR, (ssn & 0xff) | (txq_id << 8)); @@ -1343,6 +1419,14 @@ void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id, txq->ampdu = !shared_mode; } +dma_addr_t iwl_trans_pcie_get_txq_byte_table(struct iwl_trans *trans, int txq) +{ + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); + + return trans_pcie->scd_bc_tbls.dma + + txq * sizeof(struct iwlagn_scd_bc_tbl); +} + void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, bool configure_scd) { @@ -1366,6 +1450,9 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id, return; } + if (configure_scd && trans->cfg->use_tfh) + WARN_ONCE(1, "Expected no calls to SCD configuration"); + if (configure_scd) { iwl_scd_txq_set_inactive(trans, txq_id); @@ -1395,7 +1482,6 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, { struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; struct iwl_device_cmd *out_cmd; struct iwl_cmd_meta *out_meta; unsigned long flags; @@ -1410,7 +1496,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD]; u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD]; - if (WARN(!trans_pcie->wide_cmd_header && + if (WARN(!trans->wide_cmd_header && group_id > IWL_ALWAYS_LONG_GROUP, "unsupported wide command %#x\n", cmd->id)) return -EINVAL; @@ -1494,7 +1580,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, spin_lock_bh(&txq->lock); - if (iwl_queue_space(q) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { + if (iwl_queue_space(txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) { spin_unlock_bh(&txq->lock); IWL_ERR(trans, "No space in command queue\n"); @@ -1503,7 +1589,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, goto free_dup_buf; } - idx = get_cmd_index(q, q->write_ptr); + idx = get_cmd_index(txq, txq->write_ptr); out_cmd = txq->entries[idx].cmd; out_meta = &txq->entries[idx].meta; @@ -1522,7 +1608,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr_wide.reserved = 0; out_cmd->hdr_wide.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); cmd_pos = sizeof(struct iwl_cmd_header_wide); copy_size = sizeof(struct iwl_cmd_header_wide); @@ -1530,7 +1616,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id); out_cmd->hdr.sequence = cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) | - INDEX_TO_SEQ(q->write_ptr)); + INDEX_TO_SEQ(txq->write_ptr)); out_cmd->hdr.group_id = 0; cmd_pos = sizeof(struct iwl_cmd_header); @@ -1580,7 +1666,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_get_cmd_string(trans, cmd->id), group_id, out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence), - cmd_size, q->write_ptr, idx, trans_pcie->cmd_queue); + cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue); /* start the TFD with the minimum copy bytes */ tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE); @@ -1596,8 +1682,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, copy_size - tb0_size, DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1620,8 +1706,8 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, phys_addr = dma_map_single(trans->dev, (void *)data, cmdlen[i], DMA_TO_DEVICE); if (dma_mapping_error(trans->dev, phys_addr)) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); idx = -ENOMEM; goto out; } @@ -1629,8 +1715,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false); } - BUILD_BUG_ON(IWL_NUM_OF_TBS + CMD_TB_BITMAP_POS > - sizeof(out_meta->flags) * BITS_PER_BYTE); + BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE); out_meta->flags = cmd->flags; if (WARN_ON_ONCE(txq->entries[idx].free_buf)) kzfree(txq->entries[idx].free_buf); @@ -1639,7 +1724,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr && txq->wd_timeout) + if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout); spin_lock_irqsave(&trans_pcie->reg_lock, flags); @@ -1651,7 +1736,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans, } /* Increment and update queue's write index */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); iwl_pcie_txq_inc_wr_ptr(trans, txq); spin_unlock_irqrestore(&trans_pcie->reg_lock, flags); @@ -1689,20 +1774,20 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans, if (WARN(txq_id != trans_pcie->cmd_queue, "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n", txq_id, trans_pcie->cmd_queue, sequence, - trans_pcie->txq[trans_pcie->cmd_queue].q.read_ptr, - trans_pcie->txq[trans_pcie->cmd_queue].q.write_ptr)) { + trans_pcie->txq[trans_pcie->cmd_queue].read_ptr, + trans_pcie->txq[trans_pcie->cmd_queue].write_ptr)) { iwl_print_hex_error(trans, pkt, 32); return; } spin_lock_bh(&txq->lock); - cmd_index = get_cmd_index(&txq->q, index); + cmd_index = get_cmd_index(txq, index); cmd = txq->entries[cmd_index].cmd; meta = &txq->entries[cmd_index].meta; cmd_id = iwl_cmd_id(cmd->hdr.cmd, group_id, 0); - iwl_pcie_tfd_unmap(trans, meta, &txq->tfds[index]); + iwl_pcie_tfd_unmap(trans, meta, txq, index); /* Input error checking is done when commands are added to queue. */ if (meta->flags & CMD_WANT_SKB) { @@ -1815,14 +1900,13 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans, HOST_COMPLETE_TIMEOUT); if (!ret) { struct iwl_txq *txq = &trans_pcie->txq[trans_pcie->cmd_queue]; - struct iwl_queue *q = &txq->q; IWL_ERR(trans, "Error sending %s: time out after %dms.\n", iwl_get_cmd_string(trans, cmd->id), jiffies_to_msecs(HOST_COMPLETE_TIMEOUT)); IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n", - q->read_ptr, q->write_ptr); + txq->read_ptr, txq->write_ptr); clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status); IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n", @@ -1900,7 +1984,7 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_cmd_meta *out_meta, struct iwl_device_cmd *dev_cmd, u16 tb1_len) { - struct iwl_queue *q = &txq->q; + struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); u16 tb2_len; int i; @@ -1915,8 +1999,8 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb->data + hdr_len, tb2_len, DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb2_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } iwl_pcie_txq_build_tfd(trans, txq, tb2_phys, tb2_len, false); @@ -1935,19 +2019,19 @@ static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb, skb_frag_size(frag), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(trans->dev, tb_phys))) { - iwl_pcie_tfd_unmap(trans, out_meta, - &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, + txq->write_ptr); return -EINVAL; } tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys, skb_frag_size(frag), false); - out_meta->flags |= BIT(tb_idx + CMD_TB_BITMAP_POS); + out_meta->tbs |= BIT(tb_idx); } trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, skb->data + hdr_len, tb2_len); trace_iwlwifi_dev_tx_data(trans->dev, skb, @@ -2008,7 +2092,6 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, struct ieee80211_hdr *hdr = (void *)skb->data; unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room; unsigned int mss = skb_shinfo(skb)->gso_size; - struct iwl_queue *q = &txq->q; u16 length, iv_len, amsdu_pad; u8 *start_hdr; struct iwl_tso_hdr_page *hdr_page; @@ -2022,8 +2105,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, IEEE80211_CCMP_HDR_LEN : 0; trace_iwlwifi_dev_tx(trans->dev, skb, - &txq->tfds[txq->q.write_ptr], - sizeof(struct iwl_tfd), + iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr), + trans_pcie->tfd_size, &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, NULL, 0); @@ -2179,7 +2262,7 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb, return 0; out_unmap: - iwl_pcie_tfd_unmap(trans, out_meta, &txq->tfds[q->write_ptr]); + iwl_pcie_tfd_unmap(trans, out_meta, txq, txq->write_ptr); return ret; } #else /* CONFIG_INET */ @@ -2203,9 +2286,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, struct iwl_tx_cmd *tx_cmd = (struct iwl_tx_cmd *)dev_cmd->payload; struct iwl_cmd_meta *out_meta; struct iwl_txq *txq; - struct iwl_queue *q; dma_addr_t tb0_phys, tb1_phys, scratch_phys; void *tb1_addr; + void *tfd; u16 len, tb1_len; bool wait_write_ptr; __le16 fc; @@ -2214,7 +2297,6 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, bool amsdu; txq = &trans_pcie->txq[txq_id]; - q = &txq->q; if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used), "TX on unused queue %d\n", txq_id)) @@ -2236,7 +2318,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } if (skb_is_nonlinear(skb) && - skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS && + skb_shinfo(skb)->nr_frags > IWL_PCIE_MAX_FRAGS(trans_pcie) && __skb_linearize(skb)) return -ENOMEM; @@ -2249,11 +2331,11 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, spin_lock(&txq->lock); - if (iwl_queue_space(q) < q->high_mark) { + if (iwl_queue_space(txq) < txq->high_mark) { iwl_stop_queue(trans, txq); /* don't put the packet on the ring, if there is no room */ - if (unlikely(iwl_queue_space(q) < 3)) { + if (unlikely(iwl_queue_space(txq) < 3)) { struct iwl_device_cmd **dev_cmd_ptr; dev_cmd_ptr = (void *)((u8 *)skb->cb + @@ -2274,19 +2356,19 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, */ wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); WARN_ONCE(txq->ampdu && - (wifi_seq & 0xff) != q->write_ptr, + (wifi_seq & 0xff) != txq->write_ptr, "Q: %d WiFi Seq %d tfdNum %d", - txq_id, wifi_seq, q->write_ptr); + txq_id, wifi_seq, txq->write_ptr); /* Set up driver data for this TFD */ - txq->entries[q->write_ptr].skb = skb; - txq->entries[q->write_ptr].cmd = dev_cmd; + txq->entries[txq->write_ptr].skb = skb; + txq->entries[txq->write_ptr].cmd = dev_cmd; dev_cmd->hdr.sequence = cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) | - INDEX_TO_SEQ(q->write_ptr))); + INDEX_TO_SEQ(txq->write_ptr))); - tb0_phys = iwl_pcie_get_first_tb_dma(txq, q->write_ptr); + tb0_phys = iwl_pcie_get_first_tb_dma(txq, txq->write_ptr); scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) + offsetof(struct iwl_tx_cmd, scratch); @@ -2294,7 +2376,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, tx_cmd->dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys); /* Set up first empty entry in queue's array of Tx/cmd buffers */ - out_meta = &txq->entries[q->write_ptr].meta; + out_meta = &txq->entries[txq->write_ptr].meta; out_meta->flags = 0; /* @@ -2319,7 +2401,7 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, } /* The first TB points to bi-directional DMA data */ - memcpy(&txq->first_tb_bufs[q->write_ptr], &dev_cmd->hdr, + memcpy(&txq->first_tb_bufs[txq->write_ptr], &dev_cmd->hdr, IWL_FIRST_TB_SIZE); iwl_pcie_txq_build_tfd(trans, txq, tb0_phys, IWL_FIRST_TB_SIZE, true); @@ -2344,13 +2426,15 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, goto out_err; } + tfd = iwl_pcie_get_tfd(trans_pcie, txq, txq->write_ptr); /* Set up entry for this TFD in Tx byte-count array */ - iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len)); + iwl_pcie_txq_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->len), + iwl_pcie_tfd_get_num_tbs(trans, tfd)); wait_write_ptr = ieee80211_has_morefrags(fc); /* start timer if queue currently empty */ - if (q->read_ptr == q->write_ptr) { + if (txq->read_ptr == txq->write_ptr) { if (txq->wd_timeout) { /* * If the TXQ is active, then set the timer, if not, @@ -2364,12 +2448,12 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb, else txq->frozen_expiry_remainder = txq->wd_timeout; } - IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", q->id); + IWL_DEBUG_RPM(trans, "Q: %d first tx - take ref\n", txq->id); iwl_trans_ref(trans); } /* Tell device the write index *just past* this latest filled TFD */ - q->write_ptr = iwl_queue_inc_wrap(q->write_ptr); + txq->write_ptr = iwl_queue_inc_wrap(txq->write_ptr); if (!wait_write_ptr) iwl_pcie_txq_inc_wr_ptr(trans, txq); diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index 3e5fa7872b64..a5656bc0e6aa 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -3041,13 +3041,9 @@ static int prism2_ioctl_priv_download(local_info_t *local, struct iw_point *p) p->length > 1024 || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } if (p->length < sizeof(struct prism2_download_param) + @@ -3803,13 +3799,9 @@ static int prism2_ioctl_priv_hostapd(local_info_t *local, struct iw_point *p) p->length > PRISM2_HOSTAPD_MAX_BUF_SIZE || !p->pointer) return -EINVAL; - param = kmalloc(p->length, GFP_KERNEL); - if (param == NULL) - return -ENOMEM; - - if (copy_from_user(param, p->pointer, p->length)) { - ret = -EFAULT; - goto out; + param = memdup_user(p->pointer, p->length); + if (IS_ERR(param)) { + return PTR_ERR(param); } switch (param->cmd) { diff --git a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c index 56f109bc8394..bca6935a94db 100644 --- a/drivers/net/wireless/intersil/orinoco/orinoco_usb.c +++ b/drivers/net/wireless/intersil/orinoco/orinoco_usb.c @@ -1613,10 +1613,8 @@ static int ezusb_probe(struct usb_interface *interface, } upriv->read_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!upriv->read_urb) { - err("No free urbs available"); + if (!upriv->read_urb) goto error; - } if (le16_to_cpu(ep->wMaxPacketSize) != 64) pr_warn("bulk in: wMaxPacketSize!= 64\n"); if (ep->bEndpointAddress != (2 | USB_DIR_IN)) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 8c35ac838fce..431f13b4faf6 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -487,7 +487,7 @@ static const struct ieee80211_iface_combination hwsim_if_comb_p2p_dev[] = { }; static spinlock_t hwsim_radio_lock; -static struct list_head hwsim_radios; +static LIST_HEAD(hwsim_radios); static int hwsim_radio_idx; static struct platform_driver mac80211_hwsim_driver = { @@ -3376,7 +3376,6 @@ static int __init init_mac80211_hwsim(void) mac80211_hwsim_unassign_vif_chanctx; spin_lock_init(&hwsim_radio_lock); - INIT_LIST_HEAD(&hwsim_radios); err = register_pernet_device(&hwsim_net_ops); if (err) diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c index 799a2efe5793..e0ade40d9497 100644 --- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c +++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c @@ -198,22 +198,16 @@ static int if_usb_probe(struct usb_interface *intf, } cardp->rx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->rx_urb) { - lbtf_deb_usbd(&udev->dev, "Rx URB allocation failed\n"); + if (!cardp->rx_urb) goto dealloc; - } cardp->tx_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->tx_urb) { - lbtf_deb_usbd(&udev->dev, "Tx URB allocation failed\n"); + if (!cardp->tx_urb) goto dealloc; - } cardp->cmd_urb = usb_alloc_urb(0, GFP_KERNEL); - if (!cardp->cmd_urb) { - lbtf_deb_usbd(&udev->dev, "Cmd URB allocation failed\n"); + if (!cardp->cmd_urb) goto dealloc; - } cardp->ep_out_buf = kmalloc(MRVDRV_ETH_TX_PACKET_BUFFER_SIZE, GFP_KERNEL); diff --git a/drivers/net/wireless/marvell/mwifiex/11h.c b/drivers/net/wireless/marvell/mwifiex/11h.c index 81c60d0a1bda..43dccd5b0291 100644 --- a/drivers/net/wireless/marvell/mwifiex/11h.c +++ b/drivers/net/wireless/marvell/mwifiex/11h.c @@ -260,22 +260,17 @@ int mwifiex_11h_handle_radar_detected(struct mwifiex_private *priv, rdr_event = (void *)(skb->data + sizeof(u32)); - if (le32_to_cpu(rdr_event->passed)) { - mwifiex_dbg(priv->adapter, MSG, - "radar detected; indicating kernel\n"); - if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) - mwifiex_dbg(priv->adapter, ERROR, - "Failed to stop CAC in FW\n"); - cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, - GFP_KERNEL); - mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", - rdr_event->reg_domain); - mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", - rdr_event->det_type); - } else { - mwifiex_dbg(priv->adapter, MSG, - "false radar detection event!\n"); - } + mwifiex_dbg(priv->adapter, MSG, + "radar detected; indicating kernel\n"); + if (mwifiex_stop_radar_detection(priv, &priv->dfs_chandef)) + mwifiex_dbg(priv->adapter, ERROR, + "Failed to stop CAC in FW\n"); + cfg80211_radar_event(priv->adapter->wiphy, &priv->dfs_chandef, + GFP_KERNEL); + mwifiex_dbg(priv->adapter, MSG, "regdomain: %d\n", + rdr_event->reg_domain); + mwifiex_dbg(priv->adapter, MSG, "radar detection type: %d\n", + rdr_event->det_type); return 0; } diff --git a/drivers/net/wireless/marvell/mwifiex/11n.h b/drivers/net/wireless/marvell/mwifiex/11n.h index afdd58aa90de..ea0fa68b9913 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n.h +++ b/drivers/net/wireless/marvell/mwifiex/11n.h @@ -171,9 +171,10 @@ mwifiex_find_stream_to_delete(struct mwifiex_private *priv, int ptr_tid, static inline int mwifiex_is_sta_11n_enabled(struct mwifiex_private *priv, struct mwifiex_sta_node *node) { - - if (!node || (priv->bss_role != MWIFIEX_BSS_ROLE_UAP) || - !priv->ap_11n_enabled) + if (!node || ((priv->bss_role == MWIFIEX_BSS_ROLE_UAP) && + !priv->ap_11n_enabled) || + ((priv->bss_mode == NL80211_IFTYPE_ADHOC) && + !priv->adapter->adhoc_11n_enabled)) return 0; return node->is_11n_enabled; diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c index a74cc43b1953..94480123efa3 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.c @@ -78,8 +78,15 @@ static int mwifiex_11n_dispatch_amsdu_pkt(struct mwifiex_private *priv, */ static int mwifiex_11n_dispatch_pkt(struct mwifiex_private *priv, void *payload) { - int ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); + int ret; + + if (!payload) { + mwifiex_dbg(priv->adapter, INFO, "info: fw drop data\n"); + return 0; + } + + ret = mwifiex_11n_dispatch_amsdu_pkt(priv, payload); if (!ret) return 0; @@ -921,3 +928,72 @@ void mwifiex_coex_ampdu_rxwinsize(struct mwifiex_adapter *adapter) else mwifiex_update_ampdu_rxwinsize(adapter, false); } + +/* This function handles rxba_sync event + */ +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len) +{ + struct mwifiex_ie_types_rxba_sync *tlv_rxba = (void *)event_buf; + u16 tlv_type, tlv_len; + struct mwifiex_rx_reorder_tbl *rx_reor_tbl_ptr; + u8 i, j; + u16 seq_num, tlv_seq_num, tlv_bitmap_len; + int tlv_buf_left = len; + int ret; + u8 *tmp; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "RXBA_SYNC event:", + event_buf, len); + while (tlv_buf_left >= sizeof(*tlv_rxba)) { + tlv_type = le16_to_cpu(tlv_rxba->header.type); + tlv_len = le16_to_cpu(tlv_rxba->header.len); + if (tlv_type != TLV_TYPE_RXBA_SYNC) { + mwifiex_dbg(priv->adapter, ERROR, + "Wrong TLV id=0x%x\n", tlv_type); + return; + } + + tlv_seq_num = le16_to_cpu(tlv_rxba->seq_num); + tlv_bitmap_len = le16_to_cpu(tlv_rxba->bitmap_len); + mwifiex_dbg(priv->adapter, INFO, + "%pM tid=%d seq_num=%d bitmap_len=%d\n", + tlv_rxba->mac, tlv_rxba->tid, tlv_seq_num, + tlv_bitmap_len); + + rx_reor_tbl_ptr = + mwifiex_11n_get_rx_reorder_tbl(priv, tlv_rxba->tid, + tlv_rxba->mac); + if (!rx_reor_tbl_ptr) { + mwifiex_dbg(priv->adapter, ERROR, + "Can not find rx_reorder_tbl!"); + return; + } + + for (i = 0; i < tlv_bitmap_len; i++) { + for (j = 0 ; j < 8; j++) { + if (tlv_rxba->bitmap[i] & (1 << j)) { + seq_num = (MAX_TID_VALUE - 1) & + (tlv_seq_num + i * 8 + j); + + mwifiex_dbg(priv->adapter, ERROR, + "drop packet,seq=%d\n", + seq_num); + + ret = mwifiex_11n_rx_reorder_pkt + (priv, seq_num, tlv_rxba->tid, + tlv_rxba->mac, 0, NULL); + + if (ret) + mwifiex_dbg(priv->adapter, + ERROR, + "Fail to drop packet"); + } + } + } + + tlv_buf_left -= (sizeof(*tlv_rxba) + tlv_len); + tmp = (u8 *)tlv_rxba + tlv_len + sizeof(*tlv_rxba); + tlv_rxba = (struct mwifiex_ie_types_rxba_sync *)tmp; + } +} diff --git a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h index 63ecea89b4ab..22d991f514c8 100644 --- a/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h +++ b/drivers/net/wireless/marvell/mwifiex/11n_rxreorder.h @@ -81,5 +81,6 @@ struct mwifiex_rx_reorder_tbl * mwifiex_11n_get_rx_reorder_tbl(struct mwifiex_private *priv, int tid, u8 *ta); void mwifiex_11n_del_rx_reorder_tbl_by_ta(struct mwifiex_private *priv, u8 *ta); void mwifiex_update_rxreor_flags(struct mwifiex_adapter *adapter, u8 flags); - +void mwifiex_11n_rxba_sync_event(struct mwifiex_private *priv, + u8 *event_buf, u16 len); #endif /* _MWIFIEX_11N_RXREORDER_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c index a8ff969c95c2..39ce76ad00bc 100644 --- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c +++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c @@ -483,6 +483,29 @@ mwifiex_cfg80211_add_key(struct wiphy *wiphy, struct net_device *netdev, return 0; } +/* + * CFG802.11 operation handler to set default mgmt key. + */ +static int +mwifiex_cfg80211_set_default_mgmt_key(struct wiphy *wiphy, + struct net_device *netdev, + u8 key_index) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(netdev); + struct mwifiex_ds_encrypt_key encrypt_key; + + wiphy_dbg(wiphy, "set default mgmt key, key index=%d\n", key_index); + + memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + encrypt_key.key_len = WLAN_KEY_LEN_CCMP; + encrypt_key.key_index = key_index; + encrypt_key.is_igtk_def_key = true; + eth_broadcast_addr(encrypt_key.mac_addr); + + return mwifiex_send_cmd(priv, HostCmd_CMD_802_11_KEY_MATERIAL, + HostCmd_ACT_GEN_SET, true, &encrypt_key, true); +} + /* * This function sends domain information to the firmware. * @@ -2012,10 +2035,6 @@ mwifiex_cfg80211_disconnect(struct wiphy *wiphy, struct net_device *dev, if (mwifiex_deauthenticate(priv, NULL)) return -EFAULT; - mwifiex_dbg(priv->adapter, MSG, - "info: successfully disconnected from %pM:\t" - "reason code %d\n", priv->cfg_bssid, reason_code); - eth_zero_addr(priv->cfg_bssid); priv->hs2_enabled = false; @@ -2485,6 +2504,16 @@ mwifiex_cfg80211_scan(struct wiphy *wiphy, priv->scan_request = request; + if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + ether_addr_copy(priv->random_mac, request->mac_addr); + for (i = 0; i < ETH_ALEN; i++) { + priv->random_mac[i] &= request->mac_addr_mask[i]; + priv->random_mac[i] |= get_random_int() & + ~(request->mac_addr_mask[i]); + } + } + + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = request->n_ssids; user_scan_cfg->ssid_list = request->ssids; @@ -2726,7 +2755,7 @@ mwifiex_setup_ht_caps(struct ieee80211_sta_ht_cap *ht_info, ht_info->cap &= ~IEEE80211_HT_CAP_SGI_40; if (adapter->user_dev_mcs_support == HT_STREAM_2X2) - ht_info->cap |= 3 << IEEE80211_HT_CAP_RX_STBC_SHIFT; + ht_info->cap |= 2 << IEEE80211_HT_CAP_RX_STBC_SHIFT; else ht_info->cap |= 1 << IEEE80211_HT_CAP_RX_STBC_SHIFT; @@ -3913,6 +3942,88 @@ static int mwifiex_cfg80211_get_channel(struct wiphy *wiphy, return ret; } +#ifdef CONFIG_NL80211_TESTMODE + +enum mwifiex_tm_attr { + __MWIFIEX_TM_ATTR_INVALID = 0, + MWIFIEX_TM_ATTR_CMD = 1, + MWIFIEX_TM_ATTR_DATA = 2, + + /* keep last */ + __MWIFIEX_TM_ATTR_AFTER_LAST, + MWIFIEX_TM_ATTR_MAX = __MWIFIEX_TM_ATTR_AFTER_LAST - 1, +}; + +static const struct nla_policy mwifiex_tm_policy[MWIFIEX_TM_ATTR_MAX + 1] = { + [MWIFIEX_TM_ATTR_CMD] = { .type = NLA_U32 }, + [MWIFIEX_TM_ATTR_DATA] = { .type = NLA_BINARY, + .len = MWIFIEX_SIZE_OF_CMD_BUFFER }, +}; + +enum mwifiex_tm_command { + MWIFIEX_TM_CMD_HOSTCMD = 0, +}; + +static int mwifiex_tm_cmd(struct wiphy *wiphy, struct wireless_dev *wdev, + void *data, int len) +{ + struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev); + struct mwifiex_ds_misc_cmd *hostcmd; + struct nlattr *tb[MWIFIEX_TM_ATTR_MAX + 1]; + struct mwifiex_adapter *adapter; + struct sk_buff *skb; + int err; + + if (!priv) + return -EINVAL; + adapter = priv->adapter; + + err = nla_parse(tb, MWIFIEX_TM_ATTR_MAX, data, len, + mwifiex_tm_policy); + if (err) + return err; + + if (!tb[MWIFIEX_TM_ATTR_CMD]) + return -EINVAL; + + switch (nla_get_u32(tb[MWIFIEX_TM_ATTR_CMD])) { + case MWIFIEX_TM_CMD_HOSTCMD: + if (!tb[MWIFIEX_TM_ATTR_DATA]) + return -EINVAL; + + hostcmd = kzalloc(sizeof(*hostcmd), GFP_KERNEL); + if (!hostcmd) + return -ENOMEM; + + hostcmd->len = nla_len(tb[MWIFIEX_TM_ATTR_DATA]); + memcpy(hostcmd->cmd, nla_data(tb[MWIFIEX_TM_ATTR_DATA]), + hostcmd->len); + + if (mwifiex_send_cmd(priv, 0, 0, 0, hostcmd, true)) { + dev_err(priv->adapter->dev, "Failed to process hostcmd\n"); + return -EFAULT; + } + + /* process hostcmd response*/ + skb = cfg80211_testmode_alloc_reply_skb(wiphy, hostcmd->len); + if (!skb) + return -ENOMEM; + err = nla_put(skb, MWIFIEX_TM_ATTR_DATA, + hostcmd->len, hostcmd->cmd); + if (err) { + kfree_skb(skb); + return -EMSGSIZE; + } + + err = cfg80211_testmode_reply(skb); + kfree(hostcmd); + return err; + default: + return -EOPNOTSUPP; + } +} +#endif + static int mwifiex_cfg80211_start_radar_detection(struct wiphy *wiphy, struct net_device *dev, @@ -3994,6 +4105,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .leave_ibss = mwifiex_cfg80211_leave_ibss, .add_key = mwifiex_cfg80211_add_key, .del_key = mwifiex_cfg80211_del_key, + .set_default_mgmt_key = mwifiex_cfg80211_set_default_mgmt_key, .mgmt_tx = mwifiex_cfg80211_mgmt_tx, .mgmt_frame_register = mwifiex_cfg80211_mgmt_frame_register, .remain_on_channel = mwifiex_cfg80211_remain_on_channel, @@ -4025,6 +4137,7 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = { .tdls_cancel_channel_switch = mwifiex_cfg80211_tdls_cancel_chan_switch, .add_station = mwifiex_cfg80211_add_station, .change_station = mwifiex_cfg80211_change_station, + CFG80211_TESTMODE_CMD(mwifiex_tm_cmd) .get_channel = mwifiex_cfg80211_get_channel, .start_radar_detection = mwifiex_cfg80211_start_radar_detection, .channel_switch = mwifiex_cfg80211_channel_switch, @@ -4135,9 +4248,12 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->cipher_suites = mwifiex_cipher_suites; wiphy->n_cipher_suites = ARRAY_SIZE(mwifiex_cipher_suites); - if (adapter->region_code) - wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS | + if (adapter->regd) { + wiphy->regulatory_flags |= REGULATORY_CUSTOM_REG | + REGULATORY_DISABLE_BEACON_HINTS | REGULATORY_COUNTRY_IE_IGNORE; + wiphy_apply_custom_regulatory(wiphy, adapter->regd); + } ether_addr_copy(wiphy->perm_addr, adapter->perm_addr); wiphy->signal_type = CFG80211_SIGNAL_TYPE_MBM; @@ -4173,7 +4289,10 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) wiphy->features |= NL80211_FEATURE_HT_IBSS | NL80211_FEATURE_INACTIVITY_TIMER | NL80211_FEATURE_LOW_PRIORITY_SCAN | - NL80211_FEATURE_NEED_OBSS_SCAN; + NL80211_FEATURE_NEED_OBSS_SCAN | + NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR | + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; if (ISSUPP_TDLS_ENABLED(adapter->fw_cap_info)) wiphy->features |= NL80211_FEATURE_TDLS_CHANNEL_SWITCH; @@ -4200,19 +4319,27 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter) return ret; } - if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { - mwifiex_dbg(adapter, INFO, - "driver hint alpha2: %2.2s\n", reg_alpha2); - regulatory_hint(wiphy, reg_alpha2); - } else { - if (adapter->region_code == 0x00) { - mwifiex_dbg(adapter, WARN, "Ignore world regulatory domain\n"); + if (!adapter->regd) { + if (reg_alpha2 && mwifiex_is_valid_alpha2(reg_alpha2)) { + mwifiex_dbg(adapter, INFO, + "driver hint alpha2: %2.2s\n", reg_alpha2); + regulatory_hint(wiphy, reg_alpha2); } else { - country_code = - mwifiex_11d_code_2_region(adapter->region_code); - if (country_code && - regulatory_hint(wiphy, country_code)) - mwifiex_dbg(priv->adapter, ERROR, "regulatory_hint() failed\n"); + if (adapter->region_code == 0x00) { + mwifiex_dbg(adapter, WARN, + "Ignore world regulatory domain\n"); + } else { + wiphy->regulatory_flags |= + REGULATORY_DISABLE_BEACON_HINTS | + REGULATORY_COUNTRY_IE_IGNORE; + country_code = + mwifiex_11d_code_2_region( + adapter->region_code); + if (country_code && + regulatory_hint(wiphy, country_code)) + mwifiex_dbg(priv->adapter, ERROR, + "regulatory_hint() failed\n"); + } } } diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c index c29f26d8baf2..53477280f39c 100644 --- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c +++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c @@ -480,13 +480,27 @@ int mwifiex_free_cmd_buffer(struct mwifiex_adapter *adapter) */ int mwifiex_process_event(struct mwifiex_adapter *adapter) { - int ret; + int ret, i; struct mwifiex_private *priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); struct sk_buff *skb = adapter->event_skb; - u32 eventcause = adapter->event_cause; + u32 eventcause; struct mwifiex_rxinfo *rx_info; + if ((adapter->event_cause & EVENT_ID_MASK) == EVENT_RADAR_DETECTED) { + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && mwifiex_is_11h_active(priv)) { + adapter->event_cause |= + ((priv->bss_num & 0xff) << 16) | + ((priv->bss_type & 0xff) << 24); + break; + } + } + } + + eventcause = adapter->event_cause; + /* Save the last event to debug log */ adapter->dbg.last_event_index = (adapter->dbg.last_event_index + 1) % DBG_CMD_NUM; @@ -581,6 +595,14 @@ int mwifiex_send_cmd(struct mwifiex_private *priv, u16 cmd_no, return -1; } } + /* We don't expect commands in manufacturing mode. They are cooked + * in application and ready to download buffer is passed to the driver + */ + if (adapter->mfg_mode && cmd_no) { + dev_dbg(adapter->dev, "Ignoring commands in manufacturing mode\n"); + return -1; + } + /* Get a new command node */ cmd_node = mwifiex_get_cmd_node(adapter); diff --git a/drivers/net/wireless/marvell/mwifiex/debugfs.c b/drivers/net/wireless/marvell/mwifiex/debugfs.c index bccf17ad588e..b9284b533294 100644 --- a/drivers/net/wireless/marvell/mwifiex/debugfs.c +++ b/drivers/net/wireless/marvell/mwifiex/debugfs.c @@ -118,6 +118,8 @@ mwifiex_info_read(struct file *file, char __user *ubuf, p += sprintf(p, "bssid=\"%pM\"\n", info.bssid); p += sprintf(p, "channel=\"%d\"\n", (int) info.bss_chan); p += sprintf(p, "country_code = \"%s\"\n", info.country_code); + p += sprintf(p, "region_code=\"0x%x\"\n", + priv->adapter->region_code); netdev_for_each_mc_addr(ha, netdev) p += sprintf(p, "multicast_address[%d]=\"%pM\"\n", diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h index 5596b6be1898..4b1894b4757f 100644 --- a/drivers/net/wireless/marvell/mwifiex/fw.h +++ b/drivers/net/wireless/marvell/mwifiex/fw.h @@ -78,6 +78,7 @@ enum KEY_TYPE_ID { KEY_TYPE_ID_AES, KEY_TYPE_ID_WAPI, KEY_TYPE_ID_AES_CMAC, + KEY_TYPE_ID_AES_CMAC_DEF, }; #define WPA_PN_SIZE 8 @@ -176,6 +177,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_TYPE_PWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 145) #define TLV_TYPE_GWK_CIPHER (PROPRIETARY_TLV_BASE_ID + 146) #define TLV_TYPE_TX_PAUSE (PROPRIETARY_TLV_BASE_ID + 148) +#define TLV_TYPE_RXBA_SYNC (PROPRIETARY_TLV_BASE_ID + 153) #define TLV_TYPE_COALESCE_RULE (PROPRIETARY_TLV_BASE_ID + 154) #define TLV_TYPE_KEY_PARAM_V2 (PROPRIETARY_TLV_BASE_ID + 156) #define TLV_TYPE_REPEAT_COUNT (PROPRIETARY_TLV_BASE_ID + 176) @@ -188,6 +190,8 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define TLV_BTCOEX_WL_AGGR_WINSIZE (PROPRIETARY_TLV_BASE_ID + 202) #define TLV_BTCOEX_WL_SCANTIME (PROPRIETARY_TLV_BASE_ID + 203) #define TLV_TYPE_BSS_MODE (PROPRIETARY_TLV_BASE_ID + 206) +#define TLV_TYPE_RANDOM_MAC (PROPRIETARY_TLV_BASE_ID + 236) +#define TLV_TYPE_CHAN_ATTR_CFG (PROPRIETARY_TLV_BASE_ID + 237) #define MWIFIEX_TX_DATA_BUF_SIZE_2K 2048 @@ -208,6 +212,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define MWIFIEX_TX_DATA_BUF_SIZE_4K 4096 #define MWIFIEX_TX_DATA_BUF_SIZE_8K 8192 +#define MWIFIEX_TX_DATA_BUF_SIZE_12K 12288 #define ISSUPP_11NENABLED(FwCapInfo) (FwCapInfo & BIT(11)) #define ISSUPP_TDLS_ENABLED(FwCapInfo) (FwCapInfo & BIT(14)) @@ -379,6 +384,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER { #define HostCmd_CMD_MC_POLICY 0x0121 #define HostCmd_CMD_TDLS_OPER 0x0122 #define HostCmd_CMD_SDIO_SP_RX_AGGR_CFG 0x0223 +#define HostCmd_CMD_CHAN_REGION_CFG 0x0242 #define PROTOCOL_NO_SECURITY 0x01 #define PROTOCOL_STATIC_WEP 0x02 @@ -411,6 +417,14 @@ enum P2P_MODES { P2P_MODE_CLIENT = 3, }; +enum mwifiex_channel_flags { + MWIFIEX_CHANNEL_PASSIVE = BIT(0), + MWIFIEX_CHANNEL_DFS = BIT(1), + MWIFIEX_CHANNEL_NOHT40 = BIT(2), + MWIFIEX_CHANNEL_NOHT80 = BIT(3), + MWIFIEX_CHANNEL_DISABLED = BIT(7), +}; + #define HostCmd_RET_BIT 0x8000 #define HostCmd_ACT_GEN_GET 0x0000 #define HostCmd_ACT_GEN_SET 0x0001 @@ -504,6 +518,8 @@ enum P2P_MODES { #define EVENT_RSSI_HIGH 0x0000001c #define EVENT_SNR_HIGH 0x0000001d #define EVENT_IBSS_COALESCED 0x0000001e +#define EVENT_IBSS_STA_CONNECT 0x00000020 +#define EVENT_IBSS_STA_DISCONNECT 0x00000021 #define EVENT_DATA_RSSI_LOW 0x00000024 #define EVENT_DATA_SNR_LOW 0x00000025 #define EVENT_DATA_RSSI_HIGH 0x00000026 @@ -531,6 +547,7 @@ enum P2P_MODES { #define EVENT_CHANNEL_REPORT_RDY 0x00000054 #define EVENT_TX_DATA_PAUSE 0x00000055 #define EVENT_EXT_SCAN_REPORT 0x00000058 +#define EVENT_RXBA_SYNC 0x00000059 #define EVENT_BG_SCAN_STOPPED 0x00000065 #define EVENT_REMAIN_ON_CHAN_EXPIRED 0x0000005f #define EVENT_MULTI_CHAN_INFO 0x0000006a @@ -734,6 +751,16 @@ struct mwifiex_ie_types_chan_list_param_set { struct mwifiex_chan_scan_param_set chan_scan_param[1]; } __packed; +struct mwifiex_ie_types_rxba_sync { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; + u8 tid; + u8 reserved; + __le16 seq_num; + __le16 bitmap_len; + u8 bitmap[1]; +} __packed; + struct chan_band_param_set { u8 radio_type; u8 chan_number; @@ -780,6 +807,11 @@ struct mwifiex_ie_types_scan_chan_gap { __le16 chan_gap; } __packed; +struct mwifiex_ie_types_random_mac { + struct mwifiex_ie_types_header header; + u8 mac[ETH_ALEN]; +} __packed; + struct mwifiex_ietypes_chanstats { struct mwifiex_ie_types_header header; struct mwifiex_fw_chan_stats chanstats[0]; @@ -1464,6 +1496,7 @@ struct mwifiex_user_scan_cfg { /* Variable number (fixed maximum) of channels to scan up */ struct mwifiex_user_scan_chan chan_list[MWIFIEX_USER_SCAN_CHAN_MAX]; u16 scan_chan_gap; + u8 random_mac[ETH_ALEN]; } __packed; #define MWIFIEX_BG_SCAN_CHAN_MAX 38 @@ -1646,7 +1679,7 @@ struct mwifiex_ie_types_sta_info { }; struct host_cmd_ds_sta_list { - u16 sta_count; + __le16 sta_count; u8 tlv[0]; } __packed; @@ -1667,6 +1700,12 @@ struct mwifiex_ie_types_wmm_param_set { u8 wmm_ie[1]; }; +struct mwifiex_ie_types_mgmt_frame { + struct mwifiex_ie_types_header header; + __le16 frame_control; + u8 frame_contents[0]; +}; + struct mwifiex_ie_types_wmm_queue_status { struct mwifiex_ie_types_header header; u8 queue_index; @@ -2034,26 +2073,26 @@ struct host_cmd_ds_set_bss_mode { struct host_cmd_ds_pcie_details { /* TX buffer descriptor ring address */ - u32 txbd_addr_lo; - u32 txbd_addr_hi; + __le32 txbd_addr_lo; + __le32 txbd_addr_hi; /* TX buffer descriptor ring count */ - u32 txbd_count; + __le32 txbd_count; /* RX buffer descriptor ring address */ - u32 rxbd_addr_lo; - u32 rxbd_addr_hi; + __le32 rxbd_addr_lo; + __le32 rxbd_addr_hi; /* RX buffer descriptor ring count */ - u32 rxbd_count; + __le32 rxbd_count; /* Event buffer descriptor ring address */ - u32 evtbd_addr_lo; - u32 evtbd_addr_hi; + __le32 evtbd_addr_lo; + __le32 evtbd_addr_hi; /* Event buffer descriptor ring count */ - u32 evtbd_count; + __le32 evtbd_count; /* Sleep cookie buffer physical address */ - u32 sleep_cookie_addr_lo; - u32 sleep_cookie_addr_hi; + __le32 sleep_cookie_addr_lo; + __le32 sleep_cookie_addr_hi; } __packed; struct mwifiex_ie_types_rssi_threshold { @@ -2093,8 +2132,8 @@ struct mwifiex_ie_types_mc_group_info { u8 chan_buf_weight; u8 band_config; u8 chan_num; - u32 chan_time; - u32 reserved; + __le32 chan_time; + __le32 reserved; union { u8 sdio_func_num; u8 usb_ep_num; @@ -2185,7 +2224,7 @@ struct host_cmd_ds_robust_coex { } __packed; struct host_cmd_ds_wakeup_reason { - u16 wakeup_reason; + __le16 wakeup_reason; } __packed; struct host_cmd_ds_gtk_rekey_params { @@ -2196,6 +2235,10 @@ struct host_cmd_ds_gtk_rekey_params { __le32 replay_ctr_high; } __packed; +struct host_cmd_ds_chan_region_cfg { + __le16 action; +} __packed; + struct host_cmd_ds_command { __le16 command; __le16 size; @@ -2270,6 +2313,7 @@ struct host_cmd_ds_command { struct host_cmd_ds_robust_coex coex; struct host_cmd_ds_wakeup_reason hs_wakeup_reason; struct host_cmd_ds_gtk_rekey_params rekey; + struct host_cmd_ds_chan_region_cfg reg_cfg; } params; } __packed; diff --git a/drivers/net/wireless/marvell/mwifiex/init.c b/drivers/net/wireless/marvell/mwifiex/init.c index 1489c90192bd..82839d9f079f 100644 --- a/drivers/net/wireless/marvell/mwifiex/init.c +++ b/drivers/net/wireless/marvell/mwifiex/init.c @@ -298,6 +298,7 @@ static void mwifiex_init_adapter(struct mwifiex_adapter *adapter) memset(&adapter->arp_filter, 0, sizeof(adapter->arp_filter)); adapter->arp_filter_size = 0; adapter->max_mgmt_ie_index = MAX_MGMT_IE_INDEX; + adapter->mfg_mode = mfg_mode; adapter->key_api_major_ver = 0; adapter->key_api_minor_ver = 0; eth_broadcast_addr(adapter->perm_addr); @@ -553,15 +554,22 @@ int mwifiex_init_fw(struct mwifiex_adapter *adapter) return -1; } } + if (adapter->mfg_mode) { + adapter->hw_status = MWIFIEX_HW_STATUS_READY; + ret = -EINPROGRESS; + } else { + for (i = 0; i < adapter->priv_num; i++) { + if (adapter->priv[i]) { + ret = mwifiex_sta_init_cmd(adapter->priv[i], + first_sta, true); + if (ret == -1) + return -1; + + first_sta = false; + } + - for (i = 0; i < adapter->priv_num; i++) { - if (adapter->priv[i]) { - ret = mwifiex_sta_init_cmd(adapter->priv[i], first_sta, - true); - if (ret == -1) - return -1; - first_sta = false; } } diff --git a/drivers/net/wireless/marvell/mwifiex/ioctl.h b/drivers/net/wireless/marvell/mwifiex/ioctl.h index 70429815ff53..536ab834b126 100644 --- a/drivers/net/wireless/marvell/mwifiex/ioctl.h +++ b/drivers/net/wireless/marvell/mwifiex/ioctl.h @@ -260,6 +260,7 @@ struct mwifiex_ds_encrypt_key { u8 is_igtk_key; u8 is_current_wep_key; u8 is_rx_seq_valid; + u8 is_igtk_def_key; }; struct mwifiex_power_cfg { diff --git a/drivers/net/wireless/marvell/mwifiex/join.c b/drivers/net/wireless/marvell/mwifiex/join.c index 1c7b00630b90..b89596c18b41 100644 --- a/drivers/net/wireless/marvell/mwifiex/join.c +++ b/drivers/net/wireless/marvell/mwifiex/join.c @@ -669,9 +669,8 @@ int mwifiex_ret_802_11_associate(struct mwifiex_private *priv, priv->assoc_rsp_size = min(le16_to_cpu(resp->size) - S_DS_GEN, sizeof(priv->assoc_rsp_buf)); - memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); - assoc_rsp->a_id = cpu_to_le16(aid); + memcpy(priv->assoc_rsp_buf, &resp->params, priv->assoc_rsp_size); if (status_code) { priv->adapter->dbg.num_cmd_assoc_failure++; diff --git a/drivers/net/wireless/marvell/mwifiex/main.c b/drivers/net/wireless/marvell/mwifiex/main.c index db4925db39aa..2478ccd6f2d9 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.c +++ b/drivers/net/wireless/marvell/mwifiex/main.c @@ -23,6 +23,7 @@ #include "11n.h" #define VERSION "1.0" +#define MFG_FIRMWARE "mwifiex_mfg.bin" static unsigned int debug_mask = MWIFIEX_DEFAULT_DEBUG_MASK; module_param(debug_mask, uint, 0); @@ -37,6 +38,10 @@ module_param(driver_mode, ushort, 0); MODULE_PARM_DESC(driver_mode, "station=0x1(default), ap-sta=0x3, station-p2p=0x5, ap-sta-p2p=0x7"); +bool mfg_mode; +module_param(mfg_mode, bool, 0); +MODULE_PARM_DESC(mfg_mode, "manufacturing mode enable:1, disable:0"); + /* * This function registers the device and performs all the necessary * initializations. @@ -139,6 +144,8 @@ static int mwifiex_unregister(struct mwifiex_adapter *adapter) adapter->nd_info = NULL; } + kfree(adapter->regd); + vfree(adapter->chan_stats); kfree(adapter); return 0; @@ -486,9 +493,11 @@ static void mwifiex_free_adapter(struct mwifiex_adapter *adapter) */ static void mwifiex_terminate_workqueue(struct mwifiex_adapter *adapter) { - flush_workqueue(adapter->workqueue); - destroy_workqueue(adapter->workqueue); - adapter->workqueue = NULL; + if (adapter->workqueue) { + flush_workqueue(adapter->workqueue); + destroy_workqueue(adapter->workqueue); + adapter->workqueue = NULL; + } if (adapter->rx_workqueue) { flush_workqueue(adapter->rx_workqueue); @@ -559,16 +568,21 @@ static void mwifiex_fw_dpc(const struct firmware *firmware, void *context) goto done; } /* Wait for mwifiex_init to complete */ - wait_event_interruptible(adapter->init_wait_q, - adapter->init_wait_q_woken); - if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) - goto err_init_fw; + if (!adapter->mfg_mode) { + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->hw_status != MWIFIEX_HW_STATUS_READY) + goto err_init_fw; + } priv = adapter->priv[MWIFIEX_BSS_ROLE_STA]; - if (mwifiex_register_cfg80211(adapter)) { - mwifiex_dbg(adapter, ERROR, - "cannot register with cfg80211\n"); - goto err_init_fw; + + if (!adapter->wiphy) { + if (mwifiex_register_cfg80211(adapter)) { + mwifiex_dbg(adapter, ERROR, + "cannot register with cfg80211\n"); + goto err_init_fw; + } } if (mwifiex_init_channel_scan_gap(adapter)) { @@ -662,16 +676,41 @@ done: /* * This function initializes the hardware and gets firmware. */ -static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter) +static int mwifiex_init_hw_fw(struct mwifiex_adapter *adapter, + bool req_fw_nowait) { int ret; - ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, - adapter->dev, GFP_KERNEL, adapter, - mwifiex_fw_dpc); - if (ret < 0) - mwifiex_dbg(adapter, ERROR, - "request_firmware_nowait error %d\n", ret); + /* Override default firmware with manufacturing one if + * manufacturing mode is enabled + */ + if (mfg_mode) { + if (strlcpy(adapter->fw_name, MFG_FIRMWARE, + sizeof(adapter->fw_name)) >= + sizeof(adapter->fw_name)) { + pr_err("%s: fw_name too long!\n", __func__); + return -1; + } + } + + if (req_fw_nowait) { + ret = request_firmware_nowait(THIS_MODULE, 1, adapter->fw_name, + adapter->dev, GFP_KERNEL, adapter, + mwifiex_fw_dpc); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware_nowait error %d\n", ret); + } else { + ret = request_firmware(&adapter->firmware, + adapter->fw_name, + adapter->dev); + if (ret < 0) + mwifiex_dbg(adapter, ERROR, + "request_firmware error %d\n", ret); + else + mwifiex_fw_dpc(adapter->firmware, (void *)adapter); + } + return ret; } @@ -1320,6 +1359,199 @@ static void mwifiex_main_work_queue(struct work_struct *work) mwifiex_main_process(adapter); } +/* + * This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_remove_card() + */ +static int +mwifiex_shutdown_sw(struct mwifiex_adapter *adapter, struct semaphore *sem) +{ + struct mwifiex_private *priv; + int i; + + if (!adapter) + goto exit_return; + + if (down_interruptible(sem)) + goto exit_sem_err; + + priv = mwifiex_get_priv(adapter, MWIFIEX_BSS_ROLE_ANY); + mwifiex_deauthenticate(priv, NULL); + + /* We can no longer handle interrupts once we start doing the teardown + * below. + */ + if (adapter->if_ops.disable_int) + adapter->if_ops.disable_int(adapter); + + adapter->surprise_removed = true; + mwifiex_terminate_workqueue(adapter); + + /* Stop data */ + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (priv && priv->netdev) { + mwifiex_stop_net_dev_queue(priv->netdev, adapter); + if (netif_carrier_ok(priv->netdev)) + netif_carrier_off(priv->netdev); + netif_device_detach(priv->netdev); + } + } + + mwifiex_dbg(adapter, CMD, "cmd: calling mwifiex_shutdown_drv...\n"); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + if (adapter->if_ops.down_dev) + adapter->if_ops.down_dev(adapter); + + mwifiex_dbg(adapter, CMD, "cmd: mwifiex_shutdown_drv done\n"); + if (atomic_read(&adapter->rx_pending) || + atomic_read(&adapter->tx_pending) || + atomic_read(&adapter->cmd_pending)) { + mwifiex_dbg(adapter, ERROR, + "rx_pending=%d, tx_pending=%d,\t" + "cmd_pending=%d\n", + atomic_read(&adapter->rx_pending), + atomic_read(&adapter->tx_pending), + atomic_read(&adapter->cmd_pending)); + } + + for (i = 0; i < adapter->priv_num; i++) { + priv = adapter->priv[i]; + if (!priv) + continue; + rtnl_lock(); + if (priv->netdev && + priv->wdev.iftype != NL80211_IFTYPE_UNSPECIFIED) + mwifiex_del_virtual_intf(adapter->wiphy, &priv->wdev); + rtnl_unlock(); + } + + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +exit_return: + return 0; +} + +/* This function gets called during PCIe function level reset. Required + * code is extracted from mwifiex_add_card() + */ +static int +mwifiex_reinit_sw(struct mwifiex_adapter *adapter, struct semaphore *sem, + struct mwifiex_if_ops *if_ops, u8 iface_type) +{ + char fw_name[32]; + struct pcie_service_card *card = adapter->card; + + if (down_interruptible(sem)) + goto exit_sem_err; + + mwifiex_init_lock_list(adapter); + if (adapter->if_ops.up_dev) + adapter->if_ops.up_dev(adapter); + + adapter->iface_type = iface_type; + adapter->card_sem = sem; + + adapter->hw_status = MWIFIEX_HW_STATUS_INITIALIZING; + adapter->surprise_removed = false; + init_waitqueue_head(&adapter->init_wait_q); + adapter->is_suspended = false; + adapter->hs_activated = false; + init_waitqueue_head(&adapter->hs_activate_wait_q); + init_waitqueue_head(&adapter->cmd_wait_q.wait); + adapter->cmd_wait_q.status = 0; + adapter->scan_wait_q_woken = false; + + if ((num_possible_cpus() > 1) || adapter->iface_type == MWIFIEX_USB) + adapter->rx_work_enabled = true; + + adapter->workqueue = + alloc_workqueue("MWIFIEX_WORK_QUEUE", + WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1); + if (!adapter->workqueue) + goto err_kmalloc; + + INIT_WORK(&adapter->main_work, mwifiex_main_work_queue); + + if (adapter->rx_work_enabled) { + adapter->rx_workqueue = alloc_workqueue("MWIFIEX_RX_WORK_QUEUE", + WQ_HIGHPRI | + WQ_MEM_RECLAIM | + WQ_UNBOUND, 1); + if (!adapter->rx_workqueue) + goto err_kmalloc; + INIT_WORK(&adapter->rx_work, mwifiex_rx_work_queue); + } + + /* Register the device. Fill up the private data structure with + * relevant information from the card. Some code extracted from + * mwifiex_register_dev() + */ + mwifiex_dbg(adapter, INFO, "%s, mwifiex_init_hw_fw()...\n", __func__); + strcpy(fw_name, adapter->fw_name); + strcpy(adapter->fw_name, PCIE8997_DEFAULT_WIFIFW_NAME); + + adapter->tx_buf_size = card->pcie.tx_buf_size; + adapter->ext_scan = card->pcie.can_ext_scan; + if (mwifiex_init_hw_fw(adapter, false)) { + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, ERROR, + "%s: firmware init failed\n", __func__); + goto err_init_fw; + } + strcpy(adapter->fw_name, fw_name); + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); + up(sem); + return 0; + +err_init_fw: + mwifiex_dbg(adapter, ERROR, "info: %s: unregister device\n", __func__); + if (adapter->if_ops.unregister_dev) + adapter->if_ops.unregister_dev(adapter); + if (adapter->hw_status == MWIFIEX_HW_STATUS_READY) { + mwifiex_dbg(adapter, ERROR, + "info: %s: shutdown mwifiex\n", __func__); + adapter->init_wait_q_woken = false; + + if (mwifiex_shutdown_drv(adapter) == -EINPROGRESS) + wait_event_interruptible(adapter->init_wait_q, + adapter->init_wait_q_woken); + } + +err_kmalloc: + mwifiex_terminate_workqueue(adapter); + adapter->surprise_removed = true; + up(sem); +exit_sem_err: + mwifiex_dbg(adapter, INFO, "%s, error\n", __func__); + + return -1; +} + +/* This function processes pre and post PCIe function level resets. + * It performs software cleanup without touching PCIe specific code. + * Also, during initialization PCIe stuff is skipped. + */ +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare) +{ + struct mwifiex_if_ops if_ops; + + if (!prepare) { + mwifiex_reinit_sw(adapter, adapter->card_sem, &if_ops, + adapter->iface_type); + } else { + memcpy(&if_ops, &adapter->if_ops, + sizeof(struct mwifiex_if_ops)); + mwifiex_shutdown_sw(adapter, adapter->card_sem); + } +} +EXPORT_SYMBOL_GPL(mwifiex_do_flr); + /* * This function adds the card. * @@ -1391,7 +1623,7 @@ mwifiex_add_card(void *card, struct semaphore *sem, goto err_registerdev; } - if (mwifiex_init_hw_fw(adapter)) { + if (mwifiex_init_hw_fw(adapter, true)) { pr_err("%s: firmware init failed\n", __func__); goto err_init_fw; } diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h index 9f6bb400bdae..26df28f4bfb2 100644 --- a/drivers/net/wireless/marvell/mwifiex/main.h +++ b/drivers/net/wireless/marvell/mwifiex/main.h @@ -58,6 +58,7 @@ #include "sdio.h" extern const char driver_version[]; +extern bool mfg_mode; struct mwifiex_adapter; struct mwifiex_private; @@ -675,6 +676,7 @@ struct mwifiex_private { struct mwifiex_user_scan_chan hidden_chan[MWIFIEX_USER_SCAN_CHAN_MAX]; u8 assoc_resp_ht_param; bool ht_param_present; + u8 random_mac[ETH_ALEN]; }; @@ -827,6 +829,8 @@ struct mwifiex_if_ops { void (*deaggr_pkt)(struct mwifiex_adapter *, struct sk_buff *); void (*multi_port_resync)(struct mwifiex_adapter *); bool (*is_port_ready)(struct mwifiex_private *); + void (*down_dev)(struct mwifiex_adapter *); + void (*up_dev)(struct mwifiex_adapter *); }; struct mwifiex_adapter { @@ -989,6 +993,7 @@ struct mwifiex_adapter { u32 drv_info_size; bool scan_chan_gap_enabled; struct sk_buff_head rx_data_q; + bool mfg_mode; struct mwifiex_chan_stats *chan_stats; u32 num_in_chan_stats; int survey_idx; @@ -1004,6 +1009,7 @@ struct mwifiex_adapter { bool usb_mc_status; bool usb_mc_setup; struct cfg80211_wowlan_nd_info *nd_info; + struct ieee80211_regdomain *regd; }; void mwifiex_process_tx_queue(struct mwifiex_adapter *adapter); @@ -1625,4 +1631,5 @@ void mwifiex_debugfs_remove(void); void mwifiex_dev_debugfs_init(struct mwifiex_private *priv); void mwifiex_dev_debugfs_remove(struct mwifiex_private *priv); #endif +void mwifiex_do_flr(struct mwifiex_adapter *adapter, bool prepare); #endif /* !_MWIFIEX_MAIN_H_ */ diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.c b/drivers/net/wireless/marvell/mwifiex/pcie.c index 453ab6ad4784..3c3c4f197da8 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.c +++ b/drivers/net/wireless/marvell/mwifiex/pcie.c @@ -225,7 +225,7 @@ static void mwifiex_pcie_remove(struct pci_dev *pdev) if (!adapter || !adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM_SLEEP if (adapter->is_suspended) mwifiex_pcie_resume(&pdev->dev); @@ -277,6 +277,52 @@ static const struct pci_device_id mwifiex_ids[] = { MODULE_DEVICE_TABLE(pci, mwifiex_ids); +static void mwifiex_pcie_reset_notify(struct pci_dev *pdev, bool prepare) +{ + struct mwifiex_adapter *adapter; + struct pcie_service_card *card; + + if (!pdev) { + pr_err("%s: PCIe device is not specified\n", __func__); + return; + } + + card = (struct pcie_service_card *)pci_get_drvdata(pdev); + if (!card || !card->adapter) { + pr_err("%s: Card or adapter structure is not valid (%ld)\n", + __func__, (long)card); + return; + } + + adapter = card->adapter; + mwifiex_dbg(adapter, INFO, + "%s: vendor=0x%4.04x device=0x%4.04x rev=%d %s\n", + __func__, pdev->vendor, pdev->device, + pdev->revision, + prepare ? "Pre-FLR" : "Post-FLR"); + + if (prepare) { + /* Kernel would be performing FLR after this notification. + * Cleanup all software without cleaning anything related to + * PCIe and HW. + */ + mwifiex_do_flr(adapter, prepare); + adapter->surprise_removed = true; + } else { + /* Kernel stores and restores PCIe function context before and + * after performing FLR respectively. Reconfigure the software + * and firmware including firmware redownload + */ + adapter->surprise_removed = false; + mwifiex_do_flr(adapter, prepare); + } + mwifiex_dbg(adapter, INFO, "%s, successful\n", __func__); +} + +static const struct pci_error_handlers mwifiex_pcie_err_handler[] = { + { .reset_notify = mwifiex_pcie_reset_notify, }, +}; + #ifdef CONFIG_PM_SLEEP /* Power Management Hooks */ static SIMPLE_DEV_PM_OPS(mwifiex_pcie_pm_ops, mwifiex_pcie_suspend, @@ -295,6 +341,7 @@ static struct pci_driver __refdata mwifiex_pcie = { }, #endif .shutdown = mwifiex_pcie_shutdown, + .err_handler = mwifiex_pcie_err_handler, }; /* @@ -1956,8 +2003,6 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, if (firmware_len - offset < txlen) txlen = firmware_len - offset; - mwifiex_dbg(adapter, INFO, "."); - tx_blocks = (txlen + card->pcie.blksz_fw_dl - 1) / card->pcie.blksz_fw_dl; @@ -2043,6 +2088,10 @@ mwifiex_check_fw_status(struct mwifiex_adapter *adapter, u32 poll_num) ret = -1; else ret = 0; + + mwifiex_dbg(adapter, INFO, "Try %d if FW is ready <%d,%#x>", + tries, ret, firmware_stat); + if (ret) continue; if (firmware_stat == FIRMWARE_READY_PCIE) { @@ -2074,8 +2123,7 @@ mwifiex_check_winner_status(struct mwifiex_adapter *adapter) adapter->winner = 1; } else { mwifiex_dbg(adapter, ERROR, - "PCI-E is not the winner <%#x,%d>, exit dnld\n", - ret, adapter->winner); + "PCI-E is not the winner <%#x>", winner); } return ret; @@ -2863,7 +2911,7 @@ static int mwifiex_pcie_request_irq(struct mwifiex_adapter *adapter) static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) { int revision_id = 0; - int version; + int version, magic; struct pcie_service_card *card = adapter->card; switch (card->dev->device) { @@ -2888,30 +2936,19 @@ static void mwifiex_pcie_get_fw_name(struct mwifiex_adapter *adapter) } break; case PCIE_DEVICE_ID_MARVELL_88W8997: - mwifiex_read_reg(adapter, 0x0c48, &revision_id); + mwifiex_read_reg(adapter, 0x8, &revision_id); mwifiex_read_reg(adapter, 0x0cd0, &version); + mwifiex_read_reg(adapter, 0x0cd4, &magic); + revision_id &= 0xff; version &= 0x7; - switch (revision_id) { - case PCIE8997_V2: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_V2); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_V2); - break; - case PCIE8997_Z: - if (version == CHIP_VER_PCIEUART) - strcpy(adapter->fw_name, - PCIEUART8997_FW_NAME_Z); - else - strcpy(adapter->fw_name, - PCIEUSB8997_FW_NAME_Z); - break; - default: - strcpy(adapter->fw_name, PCIE8997_DEFAULT_FW_NAME); - break; - } + magic &= 0xff; + if (revision_id == PCIE8997_A1 && + magic == CHIP_MAGIC_VALUE && + version == CHIP_VER_PCIEUART) + strcpy(adapter->fw_name, PCIEUART8997_FW_NAME_V4); + else + strcpy(adapter->fw_name, PCIEUSB8997_FW_NAME_V4); + break; default: break; } @@ -2952,7 +2989,6 @@ static int mwifiex_register_dev(struct mwifiex_adapter *adapter) static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) { struct pcie_service_card *card = adapter->card; - const struct mwifiex_pcie_card_reg *reg; struct pci_dev *pdev; int i; @@ -2976,8 +3012,90 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) if (card->msi_enable) pci_disable_msi(pdev); } + } +} - reg = card->pcie.reg; +/* This function initializes the PCI-E host memory space, WCB rings, etc. + * + * The following initializations steps are followed - + * - Allocate TXBD ring buffers + * - Allocate RXBD ring buffers + * - Allocate event BD ring buffers + * - Allocate command response ring buffer + * - Allocate sleep cookie buffer + * Part of mwifiex_pcie_init(), not reset the PCIE registers + */ +static void mwifiex_pcie_up_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + int ret; + struct pci_dev *pdev = card->dev; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + card->cmdrsp_buf = NULL; + ret = mwifiex_pcie_create_txbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create txbd ring\n"); + goto err_cre_txbd; + } + + ret = mwifiex_pcie_create_rxbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create rxbd ring\n"); + goto err_cre_rxbd; + } + + ret = mwifiex_pcie_create_evtbd_ring(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to create evtbd ring\n"); + goto err_cre_evtbd; + } + + ret = mwifiex_pcie_alloc_cmdrsp_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate cmdbuf buffer\n"); + goto err_alloc_cmdbuf; + } + + if (reg->sleep_cookie) { + ret = mwifiex_pcie_alloc_sleep_cookie_buf(adapter); + if (ret) { + mwifiex_dbg(adapter, ERROR, "Failed to allocate sleep_cookie buffer\n"); + goto err_alloc_cookie; + } + } else { + card->sleep_cookie_vbase = NULL; + } + return; + +err_alloc_cookie: + mwifiex_pcie_delete_cmdrsp_buf(adapter); +err_alloc_cmdbuf: + mwifiex_pcie_delete_evtbd_ring(adapter); +err_cre_evtbd: + mwifiex_pcie_delete_rxbd_ring(adapter); +err_cre_rxbd: + mwifiex_pcie_delete_txbd_ring(adapter); +err_cre_txbd: + pci_iounmap(pdev, card->pci_mmap1); +} + +/* This function cleans up the PCI-E host memory space. + * Some code is extracted from mwifiex_unregister_dev() + * + */ +static void mwifiex_pcie_down_dev(struct mwifiex_adapter *adapter) +{ + struct pcie_service_card *card = adapter->card; + const struct mwifiex_pcie_card_reg *reg = card->pcie.reg; + + if (mwifiex_write_reg(adapter, reg->drv_rdy, 0x00000000)) + mwifiex_dbg(adapter, ERROR, "Failed to write driver not-ready signature\n"); + + adapter->seq_num = 0; + adapter->tx_buf_size = MWIFIEX_TX_DATA_BUF_SIZE_4K; + + if (card) { if (reg->sleep_cookie) mwifiex_pcie_delete_sleep_cookie_buf(adapter); @@ -2987,6 +3105,8 @@ static void mwifiex_unregister_dev(struct mwifiex_adapter *adapter) mwifiex_pcie_delete_txbd_ring(adapter); card->cmdrsp_buf = NULL; } + + return; } static struct mwifiex_if_ops pcie_ops = { @@ -3013,6 +3133,8 @@ static struct mwifiex_if_ops pcie_ops = { .clean_pcie_ring = mwifiex_clean_pcie_ring_buf, .reg_dump = mwifiex_pcie_reg_dump, .device_dump = mwifiex_pcie_device_dump, + .down_dev = mwifiex_pcie_down_dev, + .up_dev = mwifiex_pcie_up_dev, }; /* diff --git a/drivers/net/wireless/marvell/mwifiex/pcie.h b/drivers/net/wireless/marvell/mwifiex/pcie.h index f05061cea5cd..46f99cae9399 100644 --- a/drivers/net/wireless/marvell/mwifiex/pcie.h +++ b/drivers/net/wireless/marvell/mwifiex/pcie.h @@ -32,11 +32,9 @@ #define PCIE8897_DEFAULT_FW_NAME "mrvl/pcie8897_uapsta.bin" #define PCIE8897_A0_FW_NAME "mrvl/pcie8897_uapsta_a0.bin" #define PCIE8897_B0_FW_NAME "mrvl/pcie8897_uapsta.bin" -#define PCIE8997_DEFAULT_FW_NAME "mrvl/pcieusb8997_combo_v2.bin" -#define PCIEUART8997_FW_NAME_Z "mrvl/pcieuart8997_combo.bin" -#define PCIEUART8997_FW_NAME_V2 "mrvl/pcieuart8997_combo_v2.bin" -#define PCIEUSB8997_FW_NAME_Z "mrvl/pcieusb8997_combo.bin" -#define PCIEUSB8997_FW_NAME_V2 "mrvl/pcieusb8997_combo_v2.bin" +#define PCIEUART8997_FW_NAME_V4 "mrvl/pcieuart8997_combo_v4.bin" +#define PCIEUSB8997_FW_NAME_V4 "mrvl/pcieusb8997_combo_v4.bin" +#define PCIE8997_DEFAULT_WIFIFW_NAME "mrvl/pcie8997_wlan_v4.bin" #define PCIE_VENDOR_ID_MARVELL (0x11ab) #define PCIE_VENDOR_ID_V2_MARVELL (0x1b4b) @@ -46,9 +44,10 @@ #define PCIE8897_A0 0x1100 #define PCIE8897_B0 0x1200 -#define PCIE8997_Z 0x0 -#define PCIE8997_V2 0x471 +#define PCIE8997_A0 0x10 +#define PCIE8997_A1 0x11 #define CHIP_VER_PCIEUART 0x3 +#define CHIP_MAGIC_VALUE 0x24 /* Constants for Buffer Descriptor (BD) rings */ #define MWIFIEX_MAX_TXRX_BD 0x20 diff --git a/drivers/net/wireless/marvell/mwifiex/scan.c b/drivers/net/wireless/marvell/mwifiex/scan.c index 21ec84794d0c..97c9765b5bc6 100644 --- a/drivers/net/wireless/marvell/mwifiex/scan.c +++ b/drivers/net/wireless/marvell/mwifiex/scan.c @@ -820,6 +820,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, struct mwifiex_adapter *adapter = priv->adapter; struct mwifiex_ie_types_num_probes *num_probes_tlv; struct mwifiex_ie_types_scan_chan_gap *chan_gap_tlv; + struct mwifiex_ie_types_random_mac *random_mac_tlv; struct mwifiex_ie_types_wildcard_ssid_params *wildcard_ssid_tlv; struct mwifiex_ie_types_bssid_list *bssid_tlv; u8 *tlv_pos; @@ -835,6 +836,7 @@ mwifiex_config_scan(struct mwifiex_private *priv, u8 ssid_filter; struct mwifiex_ie_types_htcap *ht_cap; struct mwifiex_ie_types_bss_mode *bss_mode; + const u8 zero_mac[6] = {0, 0, 0, 0, 0, 0}; /* The tlv_buf_len is calculated for each scan command. The TLVs added in this routine will be preserved since the routine that sends the @@ -967,6 +969,18 @@ mwifiex_config_scan(struct mwifiex_private *priv, tlv_pos += sizeof(struct mwifiex_ie_types_scan_chan_gap); } + + if (!ether_addr_equal(user_scan_in->random_mac, zero_mac)) { + random_mac_tlv = (void *)tlv_pos; + random_mac_tlv->header.type = + cpu_to_le16(TLV_TYPE_RANDOM_MAC); + random_mac_tlv->header.len = + cpu_to_le16(sizeof(random_mac_tlv->mac)); + ether_addr_copy(random_mac_tlv->mac, + user_scan_in->random_mac); + tlv_pos += + sizeof(struct mwifiex_ie_types_random_mac); + } } else { scan_cfg_out->bss_mode = (u8) adapter->scan_mode; num_probes = adapter->scan_probes; @@ -1922,6 +1936,7 @@ mwifiex_active_scan_req_for_passive_chan(struct mwifiex_private *priv) } adapter->active_scan_triggered = true; + ether_addr_copy(user_scan_cfg->random_mac, priv->random_mac); user_scan_cfg->num_ssids = priv->scan_request->n_ssids; user_scan_cfg->ssid_list = priv->scan_request->ssids; @@ -2179,18 +2194,14 @@ int mwifiex_ret_802_11_scan(struct mwifiex_private *priv, if (chan_band_tlv && adapter->nd_info) { adapter->nd_info->matches[idx] = - kzalloc(sizeof(*pmatch) + - sizeof(u32), GFP_ATOMIC); + kzalloc(sizeof(*pmatch) + sizeof(u32), + GFP_ATOMIC); pmatch = adapter->nd_info->matches[idx]; if (pmatch) { - memset(pmatch, 0, sizeof(*pmatch)); - if (chan_band_tlv) { - pmatch->n_channels = 1; - pmatch->channels[0] = - chan_band->chan_number; - } + pmatch->n_channels = 1; + pmatch->channels[0] = chan_band->chan_number; } } @@ -2761,6 +2772,7 @@ static int mwifiex_scan_specific_ssid(struct mwifiex_private *priv, if (!scan_cfg) return -ENOMEM; + ether_addr_copy(scan_cfg->random_mac, priv->random_mac); scan_cfg->ssid_list = req_ssid; scan_cfg->num_ssids = 1; diff --git a/drivers/net/wireless/marvell/mwifiex/sdio.c b/drivers/net/wireless/marvell/mwifiex/sdio.c index d3e1561ca075..8718950004f3 100644 --- a/drivers/net/wireless/marvell/mwifiex/sdio.c +++ b/drivers/net/wireless/marvell/mwifiex/sdio.c @@ -122,9 +122,11 @@ static int mwifiex_sdio_probe_of(struct device *dev, struct sdio_mmc_card *card) IRQF_TRIGGER_LOW, "wifi_wake", cfg); if (ret) { - dev_err(dev, + dev_dbg(dev, "Failed to request irq_wifi %d (%d)\n", cfg->irq_wifi, ret); + card->plt_wake_cfg = NULL; + return 0; } disable_irq(cfg->irq_wifi); } @@ -289,7 +291,7 @@ mwifiex_sdio_remove(struct sdio_func *func) mwifiex_dbg(adapter, INFO, "info: SDIO func num=%d\n", func->num); - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { if (adapter->is_suspended) mwifiex_sdio_resume(adapter->dev); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c index 7897037b0992..2a162c33d271 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c @@ -598,6 +598,11 @@ static int mwifiex_set_aes_key_v2(struct mwifiex_private *priv, memcpy(km->key_param_set.key_params.cmac_aes.key, enc_key->key_material, enc_key->key_len); len += sizeof(struct mwifiex_cmac_aes_param); + } else if (enc_key->is_igtk_def_key) { + mwifiex_dbg(adapter, INFO, + "%s: Set CMAC default Key index\n", __func__); + km->key_param_set.key_type = KEY_TYPE_ID_AES_CMAC_DEF; + km->key_param_set.key_idx = enc_key->key_index & KEY_INDEX_MASK; } else { mwifiex_dbg(adapter, INFO, "%s: Set AES Key\n", __func__); @@ -706,15 +711,10 @@ mwifiex_cmd_802_11_key_material_v2(struct mwifiex_private *priv, (priv->wep_key_curr_index & KEY_INDEX_MASK)) key_info |= KEY_DEFAULT; } else { - if (mac) { - if (is_broadcast_ether_addr(mac)) - key_info |= KEY_MCAST; - else - key_info |= KEY_UNICAST | - KEY_DEFAULT; - } else { + if (is_broadcast_ether_addr(mac)) key_info |= KEY_MCAST; - } + else + key_info |= KEY_UNICAST | KEY_DEFAULT; } } km->key_param_set.key_info = cpu_to_le16(key_info); @@ -1244,20 +1244,23 @@ mwifiex_cmd_pcie_host_spec(struct mwifiex_private *priv, return 0; /* Send the ring base addresses and count to firmware */ - host_spec->txbd_addr_lo = (u32)(card->txbd_ring_pbase); - host_spec->txbd_addr_hi = (u32)(((u64)card->txbd_ring_pbase)>>32); - host_spec->txbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->rxbd_addr_lo = (u32)(card->rxbd_ring_pbase); - host_spec->rxbd_addr_hi = (u32)(((u64)card->rxbd_ring_pbase)>>32); - host_spec->rxbd_count = MWIFIEX_MAX_TXRX_BD; - host_spec->evtbd_addr_lo = (u32)(card->evtbd_ring_pbase); - host_spec->evtbd_addr_hi = (u32)(((u64)card->evtbd_ring_pbase)>>32); - host_spec->evtbd_count = MWIFIEX_MAX_EVT_BD; + host_spec->txbd_addr_lo = cpu_to_le32((u32)(card->txbd_ring_pbase)); + host_spec->txbd_addr_hi = + cpu_to_le32((u32)(((u64)card->txbd_ring_pbase) >> 32)); + host_spec->txbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->rxbd_addr_lo = cpu_to_le32((u32)(card->rxbd_ring_pbase)); + host_spec->rxbd_addr_hi = + cpu_to_le32((u32)(((u64)card->rxbd_ring_pbase) >> 32)); + host_spec->rxbd_count = cpu_to_le32(MWIFIEX_MAX_TXRX_BD); + host_spec->evtbd_addr_lo = cpu_to_le32((u32)(card->evtbd_ring_pbase)); + host_spec->evtbd_addr_hi = + cpu_to_le32((u32)(((u64)card->evtbd_ring_pbase) >> 32)); + host_spec->evtbd_count = cpu_to_le32(MWIFIEX_MAX_EVT_BD); if (card->sleep_cookie_vbase) { host_spec->sleep_cookie_addr_lo = - (u32)(card->sleep_cookie_pbase); - host_spec->sleep_cookie_addr_hi = - (u32)(((u64)(card->sleep_cookie_pbase)) >> 32); + cpu_to_le32((u32)(card->sleep_cookie_pbase)); + host_spec->sleep_cookie_addr_hi = cpu_to_le32((u32)(((u64) + (card->sleep_cookie_pbase)) >> 32)); mwifiex_dbg(priv->adapter, INFO, "sleep_cook_lo phy addr: 0x%x\n", host_spec->sleep_cookie_addr_lo); @@ -1482,7 +1485,7 @@ int mwifiex_dnld_dt_cfgdata(struct mwifiex_private *priv, continue; /* property header is 6 bytes, data must fit in cmd buffer */ - if (prop && prop->value && prop->length > 6 && + if (prop->value && prop->length > 6 && prop->length <= MWIFIEX_SIZE_OF_CMD_BUFFER - S_DS_GEN) { ret = mwifiex_send_cmd(priv, HostCmd_CMD_CFG_DATA, HostCmd_ACT_GEN_SET, 0, @@ -1596,6 +1599,21 @@ static int mwifiex_cmd_gtk_rekey_offload(struct mwifiex_private *priv, return 0; } +static int mwifiex_cmd_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *cmd, + u16 cmd_action) +{ + struct host_cmd_ds_chan_region_cfg *reg = &cmd->params.reg_cfg; + + cmd->command = cpu_to_le16(HostCmd_CMD_CHAN_REGION_CFG); + cmd->size = cpu_to_le16(sizeof(*reg) + S_DS_GEN); + + if (cmd_action == HostCmd_ACT_GEN_GET) + reg->action = cpu_to_le16(cmd_action); + + return 0; +} + static int mwifiex_cmd_coalesce_cfg(struct mwifiex_private *priv, struct host_cmd_ds_command *cmd, @@ -2136,6 +2154,9 @@ int mwifiex_sta_prepare_cmd(struct mwifiex_private *priv, uint16_t cmd_no, ret = mwifiex_cmd_gtk_rekey_offload(priv, cmd_ptr, cmd_action, data_buf); break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_cmd_chan_region_cfg(priv, cmd_ptr, cmd_action); + break; default: mwifiex_dbg(priv->adapter, ERROR, "PREP_CMD: unknown cmd- %#x\n", cmd_no); @@ -2273,6 +2294,9 @@ int mwifiex_sta_init_cmd(struct mwifiex_private *priv, u8 first_sta, bool init) if (ret) return -1; } + + mwifiex_send_cmd(priv, HostCmd_CMD_CHAN_REGION_CFG, + HostCmd_ACT_GEN_GET, 0, NULL, true); } /* get tx rate */ diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c index ccf54932e321..8548027abf71 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_cmdresp.c @@ -962,7 +962,7 @@ static int mwifiex_ret_uap_sta_list(struct mwifiex_private *priv, int i; struct mwifiex_sta_node *sta_node; - for (i = 0; i < sta_list->sta_count; i++) { + for (i = 0; i < (le16_to_cpu(sta_list->sta_count)); i++) { sta_node = mwifiex_get_sta_entry(priv, sta_info->mac); if (unlikely(!sta_node)) continue; @@ -1022,6 +1022,138 @@ static int mwifiex_ret_robust_coex(struct mwifiex_private *priv, return 0; } +static struct ieee80211_regdomain * +mwifiex_create_custom_regdomain(struct mwifiex_private *priv, + u8 *buf, u16 buf_len) +{ + u16 num_chan = buf_len / 2; + struct ieee80211_regdomain *regd; + struct ieee80211_reg_rule *rule; + bool new_rule; + int regd_size, idx, freq, prev_freq = 0; + u32 bw, prev_bw = 0; + u8 chflags, prev_chflags = 0, valid_rules = 0; + + if (WARN_ON_ONCE(num_chan > NL80211_MAX_SUPP_REG_RULES)) + return ERR_PTR(-EINVAL); + + regd_size = sizeof(struct ieee80211_regdomain) + + num_chan * sizeof(struct ieee80211_reg_rule); + + regd = kzalloc(regd_size, GFP_KERNEL); + if (!regd) + return ERR_PTR(-ENOMEM); + + for (idx = 0; idx < num_chan; idx++) { + u8 chan; + enum nl80211_band band; + + chan = *buf++; + if (!chan) { + kfree(regd); + return NULL; + } + chflags = *buf++; + band = (chan <= 14) ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ; + freq = ieee80211_channel_to_frequency(chan, band); + new_rule = false; + + if (chflags & MWIFIEX_CHANNEL_DISABLED) + continue; + + if (band == NL80211_BAND_5GHZ) { + if (!(chflags & MWIFIEX_CHANNEL_NOHT80)) + bw = MHZ_TO_KHZ(80); + else if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } else { + if (!(chflags & MWIFIEX_CHANNEL_NOHT40)) + bw = MHZ_TO_KHZ(40); + else + bw = MHZ_TO_KHZ(20); + } + + if (idx == 0 || prev_chflags != chflags || prev_bw != bw || + freq - prev_freq > 20) { + valid_rules++; + new_rule = true; + } + + rule = ®d->reg_rules[valid_rules - 1]; + + rule->freq_range.end_freq_khz = MHZ_TO_KHZ(freq + 10); + + prev_chflags = chflags; + prev_freq = freq; + prev_bw = bw; + + if (!new_rule) + continue; + + rule->freq_range.start_freq_khz = MHZ_TO_KHZ(freq - 10); + rule->power_rule.max_eirp = DBM_TO_MBM(19); + + if (chflags & MWIFIEX_CHANNEL_PASSIVE) + rule->flags = NL80211_RRF_NO_IR; + + if (chflags & MWIFIEX_CHANNEL_DFS) + rule->flags = NL80211_RRF_DFS; + + rule->freq_range.max_bandwidth_khz = bw; + } + + regd->n_reg_rules = valid_rules; + regd->alpha2[0] = '9'; + regd->alpha2[1] = '9'; + + return regd; +} + +static int mwifiex_ret_chan_region_cfg(struct mwifiex_private *priv, + struct host_cmd_ds_command *resp) +{ + struct host_cmd_ds_chan_region_cfg *reg = &resp->params.reg_cfg; + u16 action = le16_to_cpu(reg->action); + u16 tlv, tlv_buf_len, tlv_buf_left; + struct mwifiex_ie_types_header *head; + struct ieee80211_regdomain *regd; + u8 *tlv_buf; + + if (action != HostCmd_ACT_GEN_GET) + return 0; + + tlv_buf = (u8 *)reg + sizeof(*reg); + tlv_buf_left = le16_to_cpu(resp->size) - S_DS_GEN - sizeof(*reg); + + while (tlv_buf_left >= sizeof(*head)) { + head = (struct mwifiex_ie_types_header *)tlv_buf; + tlv = le16_to_cpu(head->type); + tlv_buf_len = le16_to_cpu(head->len); + + if (tlv_buf_left < (sizeof(*head) + tlv_buf_len)) + break; + + switch (tlv) { + case TLV_TYPE_CHAN_ATTR_CFG: + mwifiex_dbg_dump(priv->adapter, CMD_D, "CHAN:", + (u8 *)head + sizeof(*head), + tlv_buf_len); + regd = mwifiex_create_custom_regdomain(priv, + (u8 *)head + sizeof(*head), tlv_buf_len); + if (!IS_ERR(regd)) + priv->adapter->regd = regd; + break; + } + + tlv_buf += (sizeof(*head) + tlv_buf_len); + tlv_buf_left -= (sizeof(*head) + tlv_buf_len); + } + + return 0; +} + /* * This function handles the command responses. * @@ -1239,6 +1371,9 @@ int mwifiex_process_sta_cmdresp(struct mwifiex_private *priv, u16 cmdresp_no, break; case HostCmd_CMD_GTK_REKEY_OFFLOAD_CFG: break; + case HostCmd_CMD_CHAN_REGION_CFG: + ret = mwifiex_ret_chan_region_cfg(priv, resp); + break; default: mwifiex_dbg(adapter, ERROR, "CMD_RESP: unknown cmd response %#x\n", diff --git a/drivers/net/wireless/marvell/mwifiex/sta_event.c b/drivers/net/wireless/marvell/mwifiex/sta_event.c index a422f3306d4d..9df0c4dc06ed 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_event.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_event.c @@ -25,6 +25,99 @@ #include "wmm.h" #include "11n.h" +#define MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE 12 + +static int mwifiex_check_ibss_peer_capabilties(struct mwifiex_private *priv, + struct mwifiex_sta_node *sta_ptr, + struct sk_buff *event) +{ + int evt_len, ele_len; + u8 *curr; + struct ieee_types_header *ele_hdr; + struct mwifiex_ie_types_mgmt_frame *tlv_mgmt_frame; + const struct ieee80211_ht_cap *ht_cap; + const struct ieee80211_vht_cap *vht_cap; + + skb_pull(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + evt_len = event->len; + curr = event->data; + + mwifiex_dbg_dump(priv->adapter, EVT_D, "ibss peer capabilties:", + event->data, event->len); + + skb_push(event, MWIFIEX_IBSS_CONNECT_EVT_FIX_SIZE); + + tlv_mgmt_frame = (void *)curr; + if (evt_len >= sizeof(*tlv_mgmt_frame) && + le16_to_cpu(tlv_mgmt_frame->header.type) == + TLV_TYPE_UAP_MGMT_FRAME) { + /* Locate curr pointer to the start of beacon tlv, + * timestamp 8 bytes, beacon intervel 2 bytes, + * capability info 2 bytes, totally 12 byte beacon header + */ + evt_len = le16_to_cpu(tlv_mgmt_frame->header.len); + curr += (sizeof(*tlv_mgmt_frame) + 12); + } else { + mwifiex_dbg(priv->adapter, MSG, + "management frame tlv not found!\n"); + return 0; + } + + while (evt_len >= sizeof(*ele_hdr)) { + ele_hdr = (struct ieee_types_header *)curr; + ele_len = ele_hdr->len; + + if (evt_len < ele_len + sizeof(*ele_hdr)) + break; + + switch (ele_hdr->element_id) { + case WLAN_EID_HT_CAPABILITY: + sta_ptr->is_11n_enabled = true; + ht_cap = (void *)(ele_hdr + 2); + sta_ptr->max_amsdu = le16_to_cpu(ht_cap->cap_info) & + IEEE80211_HT_CAP_MAX_AMSDU ? + MWIFIEX_TX_DATA_BUF_SIZE_8K : + MWIFIEX_TX_DATA_BUF_SIZE_4K; + mwifiex_dbg(priv->adapter, INFO, + "11n enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + + case WLAN_EID_VHT_CAPABILITY: + sta_ptr->is_11ac_enabled = true; + vht_cap = (void *)(ele_hdr + 2); + /* check VHT MAXMPDU capability */ + switch (le32_to_cpu(vht_cap->vht_cap_info) & 0x3) { + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_12K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_8K; + break; + case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: + sta_ptr->max_amsdu = + MWIFIEX_TX_DATA_BUF_SIZE_4K; + default: + break; + } + + mwifiex_dbg(priv->adapter, INFO, + "11ac enabled!, max_amsdu : %d\n", + sta_ptr->max_amsdu); + break; + default: + break; + } + + curr += (ele_len + sizeof(*ele_hdr)); + evt_len -= (ele_len + sizeof(*ele_hdr)); + } + + return 0; +} + /* * This function resets the connection state. * @@ -519,6 +612,8 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, * - EVENT_LINK_QUALITY * - EVENT_PRE_BEACON_LOST * - EVENT_IBSS_COALESCED + * - EVENT_IBSS_STA_CONNECT + * - EVENT_IBSS_STA_DISCONNECT * - EVENT_WEP_ICV_ERR * - EVENT_BW_CHANGE * - EVENT_HOSTWAKE_STAIE @@ -547,9 +642,11 @@ void mwifiex_bt_coex_wlan_param_update_event(struct mwifiex_private *priv, int mwifiex_process_sta_event(struct mwifiex_private *priv) { struct mwifiex_adapter *adapter = priv->adapter; - int ret = 0; + int ret = 0, i; u32 eventcause = adapter->event_cause; u16 ctrl, reason_code; + u8 ibss_sta_addr[ETH_ALEN]; + struct mwifiex_sta_node *sta_ptr; switch (eventcause) { case EVENT_DUMMY_HOST_WAKEUP_SIGNAL: @@ -708,7 +805,11 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) case EVENT_EXT_SCAN_REPORT: mwifiex_dbg(adapter, EVENT, "event: EXT_SCAN Report\n"); - if (adapter->ext_scan && !priv->scan_aborting) + /* We intend to skip this event during suspend, but handle + * it in interface disabled case + */ + if (adapter->ext_scan && (!priv->scan_aborting || + !netif_running(priv->netdev))) ret = mwifiex_handle_event_ext_scan_report(priv, adapter->event_skb->data); @@ -771,6 +872,39 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) HostCmd_CMD_802_11_IBSS_COALESCING_STATUS, HostCmd_ACT_GEN_GET, 0, NULL, false); break; + case EVENT_IBSS_STA_CONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_CONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_add_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && adapter->adhoc_11n_enabled) { + mwifiex_check_ibss_peer_capabilties(priv, sta_ptr, + adapter->event_skb); + if (sta_ptr->is_11n_enabled) + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + priv->aggr_prio_tbl[i].ampdu_user; + else + for (i = 0; i < MAX_NUM_TID; i++) + sta_ptr->ampdu_sta[i] = + BA_STREAM_NOT_ALLOWED; + memset(sta_ptr->rx_seq, 0xff, sizeof(sta_ptr->rx_seq)); + } + + break; + case EVENT_IBSS_STA_DISCONNECT: + ether_addr_copy(ibss_sta_addr, adapter->event_body + 2); + mwifiex_dbg(adapter, EVENT, "event: IBSS_STA_DISCONNECT %pM\n", + ibss_sta_addr); + sta_ptr = mwifiex_get_sta_entry(priv, ibss_sta_addr); + if (sta_ptr && sta_ptr->is_11n_enabled) { + mwifiex_11n_del_rx_reorder_tbl_by_ta(priv, + ibss_sta_addr); + mwifiex_del_tx_ba_stream_tbl_by_ra(priv, ibss_sta_addr); + } + mwifiex_wmm_del_peer_ra_list(priv, ibss_sta_addr); + mwifiex_del_sta_entry(priv, ibss_sta_addr); + break; case EVENT_ADDBA: mwifiex_dbg(adapter, EVENT, "event: ADDBA Request\n"); mwifiex_send_cmd(priv, HostCmd_CMD_11N_ADDBA_RSP, @@ -869,6 +1003,12 @@ int mwifiex_process_sta_event(struct mwifiex_private *priv) mwifiex_bt_coex_wlan_param_update_event(priv, adapter->event_skb); break; + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, ERROR, "event: unknown event id: %#x\n", eventcause); diff --git a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c index e06647a327b6..644f3a248741 100644 --- a/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c +++ b/drivers/net/wireless/marvell/mwifiex/sta_ioctl.c @@ -574,7 +574,7 @@ int mwifiex_enable_hs(struct mwifiex_adapter *adapter) adapter->hs_activate_wait_q_woken = false; - memset(&hscfg, 0, sizeof(struct mwifiex_ds_hs_cfg)); + memset(&hscfg, 0, sizeof(hscfg)); hscfg.is_invoke_hostcmd = true; adapter->hs_enabling = true; @@ -1138,7 +1138,7 @@ int mwifiex_set_encode(struct mwifiex_private *priv, struct key_params *kp, { struct mwifiex_ds_encrypt_key encrypt_key; - memset(&encrypt_key, 0, sizeof(struct mwifiex_ds_encrypt_key)); + memset(&encrypt_key, 0, sizeof(encrypt_key)); encrypt_key.key_len = key_len; encrypt_key.key_index = key_index; @@ -1180,7 +1180,7 @@ mwifiex_get_ver_ext(struct mwifiex_private *priv, u32 version_str_sel) { struct mwifiex_ver_ext ver_ext; - memset(&ver_ext, 0, sizeof(struct host_cmd_ds_version_ext)); + memset(&ver_ext, 0, sizeof(ver_ext)); ver_ext.version_str_sel = version_str_sel; if (mwifiex_send_cmd(priv, HostCmd_CMD_VERSION_EXT, HostCmd_ACT_GEN_GET, 0, &ver_ext, true)) diff --git a/drivers/net/wireless/marvell/mwifiex/uap_event.c b/drivers/net/wireless/marvell/mwifiex/uap_event.c index 86ff54296f39..d24eca34ac11 100644 --- a/drivers/net/wireless/marvell/mwifiex/uap_event.c +++ b/drivers/net/wireless/marvell/mwifiex/uap_event.c @@ -306,7 +306,12 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv) mwifiex_dbg(adapter, EVENT, "event: multi-chan info\n"); mwifiex_process_multi_chan_event(priv, adapter->event_skb); break; - + case EVENT_RXBA_SYNC: + dev_dbg(adapter->dev, "EVENT: RXBA_SYNC\n"); + mwifiex_11n_rxba_sync_event(priv, adapter->event_body, + adapter->event_skb->len - + sizeof(eventcause)); + break; default: mwifiex_dbg(adapter, EVENT, "event: unknown event id: %#x\n", eventcause); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 0857575c5c39..73eb0846db21 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -273,6 +273,8 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } else { mwifiex_dbg(adapter, DATA, "%s: DATA\n", __func__); + mwifiex_write_data_complete(adapter, context->skb, 0, + urb->status ? -1 : 0); for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) { port = &card->port[i]; if (context->ep == port->tx_data_ep) { @@ -282,8 +284,6 @@ static void mwifiex_usb_tx_complete(struct urb *urb) } } adapter->data_sent = false; - mwifiex_write_data_complete(adapter, context->skb, 0, - urb->status ? -1 : 0); } if (card->mc_resync_flag) @@ -611,7 +611,7 @@ static void mwifiex_usb_disconnect(struct usb_interface *intf) if (!adapter->priv_num) return; - if (user_rmmod) { + if (user_rmmod && !adapter->mfg_mode) { #ifdef CONFIG_PM if (adapter->is_suspended) mwifiex_usb_resume(intf); @@ -657,11 +657,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) card->tx_cmd.ep = card->tx_cmd_ep; card->tx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->tx_cmd.urb) { - mwifiex_dbg(adapter, ERROR, - "tx_cmd.urb allocation failed\n"); + if (!card->tx_cmd.urb) return -ENOMEM; - } for (i = 0; i < MWIFIEX_TX_DATA_PORT; i++) { port = &card->port[i]; @@ -677,11 +674,8 @@ static int mwifiex_usb_tx_init(struct mwifiex_adapter *adapter) port->tx_data_list[j].ep = port->tx_data_ep; port->tx_data_list[j].urb = usb_alloc_urb(0, GFP_KERNEL); - if (!port->tx_data_list[j].urb) { - mwifiex_dbg(adapter, ERROR, - "urb allocation failed\n"); + if (!port->tx_data_list[j].urb) return -ENOMEM; - } } } @@ -697,10 +691,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter) card->rx_cmd.ep = card->rx_cmd_ep; card->rx_cmd.urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->rx_cmd.urb) { - mwifiex_dbg(adapter, ERROR, "rx_cmd.urb allocation failed\n"); + if (!card->rx_cmd.urb) return -ENOMEM; - } card->rx_cmd.skb = dev_alloc_skb(MWIFIEX_RX_CMD_BUF_SIZE); if (!card->rx_cmd.skb) @@ -714,11 +706,8 @@ static int mwifiex_usb_rx_init(struct mwifiex_adapter *adapter) card->rx_data_list[i].ep = card->rx_data_ep; card->rx_data_list[i].urb = usb_alloc_urb(0, GFP_KERNEL); - if (!card->rx_data_list[i].urb) { - mwifiex_dbg(adapter, ERROR, - "rx_data_list[] urb allocation failed\n"); + if (!card->rx_data_list[i].urb) return -1; - } if (mwifiex_usb_submit_rx_urb(&card->rx_data_list[i], MWIFIEX_RX_DATA_BUF_SIZE)) return -1; @@ -852,7 +841,7 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, struct usb_tx_data_port *port = NULL; u8 *data = (u8 *)skb->data; struct urb *tx_urb; - int idx, ret; + int idx, ret = -EINPROGRESS; if (adapter->is_suspended) { mwifiex_dbg(adapter, ERROR, @@ -876,8 +865,9 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, if (atomic_read(&port->tx_data_urb_pending) >= MWIFIEX_TX_DATA_URB) { port->block_status = true; - ret = -EBUSY; - goto done; + adapter->data_sent = + mwifiex_usb_data_sent(adapter); + return -EBUSY; } if (port->tx_data_ix >= MWIFIEX_TX_DATA_URB) port->tx_data_ix = 0; @@ -908,6 +898,14 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, else atomic_inc(&port->tx_data_urb_pending); + if (ep != card->tx_cmd_ep && + atomic_read(&port->tx_data_urb_pending) == + MWIFIEX_TX_DATA_URB) { + port->block_status = true; + adapter->data_sent = mwifiex_usb_data_sent(adapter); + ret = -ENOSR; + } + if (usb_submit_urb(tx_urb, GFP_ATOMIC)) { mwifiex_dbg(adapter, ERROR, "%s: usb_submit_urb failed\n", __func__); @@ -916,29 +914,15 @@ static int mwifiex_usb_host_to_card(struct mwifiex_adapter *adapter, u8 ep, } else { atomic_dec(&port->tx_data_urb_pending); port->block_status = false; + adapter->data_sent = false; if (port->tx_data_ix) port->tx_data_ix--; else port->tx_data_ix = MWIFIEX_TX_DATA_URB; } - - return -1; - } else { - if (ep != card->tx_cmd_ep && - atomic_read(&port->tx_data_urb_pending) == - MWIFIEX_TX_DATA_URB) { - port->block_status = true; - ret = -ENOSR; - goto done; - } + ret = -1; } - return -EINPROGRESS; - -done: - if (ep != card->tx_cmd_ep) - adapter->data_sent = mwifiex_usb_data_sent(adapter); - return ret; } @@ -1037,6 +1021,10 @@ static int mwifiex_prog_fw_w_helper(struct mwifiex_adapter *adapter, dnld_cmd = le32_to_cpu(fwdata->fw_hdr.dnld_cmd); tlen += sizeof(struct fw_header); + /* Command 7 doesn't have data length field */ + if (dnld_cmd == FW_CMD_7) + dlen = 0; + memcpy(fwdata->data, &firmware[tlen], dlen); fwdata->seq_num = cpu_to_le32(fw_seqnum); diff --git a/drivers/net/wireless/marvell/mwifiex/usb.h b/drivers/net/wireless/marvell/mwifiex/usb.h index b4e9246bbcdc..30e8eb8c259d 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.h +++ b/drivers/net/wireless/marvell/mwifiex/usb.h @@ -46,11 +46,12 @@ #define USB8766_DEFAULT_FW_NAME "mrvl/usb8766_uapsta.bin" #define USB8797_DEFAULT_FW_NAME "mrvl/usb8797_uapsta.bin" #define USB8801_DEFAULT_FW_NAME "mrvl/usb8801_uapsta.bin" -#define USB8997_DEFAULT_FW_NAME "mrvl/usb8997_uapsta.bin" +#define USB8997_DEFAULT_FW_NAME "mrvl/usbusb8997_combo_v4.bin" #define FW_DNLD_TX_BUF_SIZE 620 #define FW_DNLD_RX_BUF_SIZE 2048 #define FW_HAS_LAST_BLOCK 0x00000004 +#define FW_CMD_7 0x00000007 #define FW_DATA_XMIT_SIZE \ (sizeof(struct fw_header) + dlen + sizeof(u32)) diff --git a/drivers/net/wireless/marvell/mwifiex/util.c b/drivers/net/wireless/marvell/mwifiex/util.c index 6681be0511c7..18fbb96a46e9 100644 --- a/drivers/net/wireless/marvell/mwifiex/util.c +++ b/drivers/net/wireless/marvell/mwifiex/util.c @@ -386,6 +386,7 @@ mwifiex_parse_mgmt_packet(struct mwifiex_private *priv, u8 *payload, u16 len, "unknown public action frame category %d\n", category); } + break; default: mwifiex_dbg(priv->adapter, INFO, "unknown mgmt frame subtype %#x\n", stype); diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.c b/drivers/net/wireless/mediatek/mt7601u/dma.c index 57a80cfa39b1..a8bc064bc14f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.c +++ b/drivers/net/wireless/mediatek/mt7601u/dma.c @@ -103,7 +103,7 @@ static void mt7601u_rx_process_seg(struct mt7601u_dev *dev, u8 *data, if (unlikely(rxwi->zero[0] || rxwi->zero[1] || rxwi->zero[2])) dev_err_once(dev->dev, "Error: RXWI zero fields are set\n"); - if (unlikely(MT76_GET(MT_RXD_INFO_TYPE, fce_info))) + if (unlikely(FIELD_GET(MT_RXD_INFO_TYPE, fce_info))) dev_err_once(dev->dev, "Error: RX path seen a non-pkt urb\n"); trace_mt_rx(dev, rxwi, fce_info); diff --git a/drivers/net/wireless/mediatek/mt7601u/dma.h b/drivers/net/wireless/mediatek/mt7601u/dma.h index 978e8a90b87f..270d126880c0 100644 --- a/drivers/net/wireless/mediatek/mt7601u/dma.h +++ b/drivers/net/wireless/mediatek/mt7601u/dma.h @@ -18,8 +18,6 @@ #include #include -#include "util.h" - #define MT_DMA_HDR_LEN 4 #define MT_RX_INFO_LEN 4 #define MT_FCE_INFO_LEN 4 @@ -79,9 +77,9 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, */ info = flags | - MT76_SET(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | - MT76_SET(MT_TXD_INFO_D_PORT, d_port) | - MT76_SET(MT_TXD_INFO_TYPE, type); + FIELD_PREP(MT_TXD_INFO_LEN, round_up(skb->len, 4)) | + FIELD_PREP(MT_TXD_INFO_D_PORT, d_port) | + FIELD_PREP(MT_TXD_INFO_TYPE, type); put_unaligned_le32(info, skb_push(skb, sizeof(info))); return skb_put_padto(skb, round_up(skb->len, 4) + 4); @@ -90,7 +88,7 @@ static inline int mt7601u_dma_skb_wrap(struct sk_buff *skb, static inline int mt7601u_dma_skb_wrap_pkt(struct sk_buff *skb, enum mt76_qsel qsel, u32 flags) { - flags |= MT76_SET(MT_TXD_PKT_INFO_QSEL, qsel); + flags |= FIELD_PREP(MT_TXD_PKT_INFO_QSEL, qsel); return mt7601u_dma_skb_wrap(skb, WLAN_PORT, DMA_PACKET, flags); } diff --git a/drivers/net/wireless/mediatek/mt7601u/eeprom.c b/drivers/net/wireless/mediatek/mt7601u/eeprom.c index 8d8ee0344f7b..da6faea092d6 100644 --- a/drivers/net/wireless/mediatek/mt7601u/eeprom.c +++ b/drivers/net/wireless/mediatek/mt7601u/eeprom.c @@ -45,8 +45,8 @@ mt7601u_efuse_read(struct mt7601u_dev *dev, u16 addr, u8 *data, val = mt76_rr(dev, MT_EFUSE_CTRL); val &= ~(MT_EFUSE_CTRL_AIN | MT_EFUSE_CTRL_MODE); - val |= MT76_SET(MT_EFUSE_CTRL_AIN, addr & ~0xf) | - MT76_SET(MT_EFUSE_CTRL_MODE, mode) | + val |= FIELD_PREP(MT_EFUSE_CTRL_AIN, addr & ~0xf) | + FIELD_PREP(MT_EFUSE_CTRL_MODE, mode) | MT_EFUSE_CTRL_KICK; mt76_wr(dev, MT_EFUSE_CTRL, val); @@ -128,8 +128,8 @@ mt7601u_set_chip_cap(struct mt7601u_dev *dev, u8 *eeprom) if (!field_valid(nic_conf0 >> 8)) return; - if (MT76_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || - MT76_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) + if (FIELD_GET(MT_EE_NIC_CONF_0_RX_PATH, nic_conf0) > 1 || + FIELD_GET(MT_EE_NIC_CONF_0_TX_PATH, nic_conf0) > 1) dev_err(dev->dev, "Error: device has more than 1 RX/TX stream!\n"); } @@ -150,7 +150,7 @@ mt7601u_set_macaddr(struct mt7601u_dev *dev, const u8 *eeprom) mt76_wr(dev, MT_MAC_ADDR_DW0, get_unaligned_le32(dev->macaddr)); mt76_wr(dev, MT_MAC_ADDR_DW1, get_unaligned_le16(dev->macaddr + 4) | - MT76_SET(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); + FIELD_PREP(MT_MAC_ADDR_DW1_U2ME_MASK, 0xff)); return 0; } @@ -176,7 +176,7 @@ mt7601u_set_channel_power(struct mt7601u_dev *dev, u8 *eeprom) u8 max_pwr; val = mt7601u_rr(dev, MT_TX_ALC_CFG_0); - max_pwr = MT76_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); + max_pwr = FIELD_GET(MT_TX_ALC_CFG_0_LIMIT_0, val); if (mt7601u_has_tssi(dev, eeprom)) { mt7601u_set_channel_target_power(dev, eeprom, max_pwr); diff --git a/drivers/net/wireless/mediatek/mt7601u/init.c b/drivers/net/wireless/mediatek/mt7601u/init.c index 8fa78d7156be..44d46e25db80 100644 --- a/drivers/net/wireless/mediatek/mt7601u/init.c +++ b/drivers/net/wireless/mediatek/mt7601u/init.c @@ -108,8 +108,9 @@ static void mt7601u_init_usb_dma(struct mt7601u_dev *dev) { u32 val; - val = MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | - MT76_SET(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, MT_USB_AGGR_SIZE_LIMIT) | + val = FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_TOUT, MT_USB_AGGR_TIMEOUT) | + FIELD_PREP(MT_USB_DMA_CFG_RX_BULK_AGG_LMT, + MT_USB_AGGR_SIZE_LIMIT) | MT_USB_DMA_CFG_RX_BULK_EN | MT_USB_DMA_CFG_TX_BULK_EN; if (dev->in_max_packet == 512) @@ -396,8 +397,9 @@ int mt7601u_init_hardware(struct mt7601u_dev *dev) mt7601u_rmw(dev, MT_US_CYC_CFG, MT_US_CYC_CNT, 0x1e); - mt7601u_wr(dev, MT_TXOP_CTRL_CFG, MT76_SET(MT_TXOP_TRUN_EN, 0x3f) | - MT76_SET(MT_TXOP_EXT_CCA_DLY, 0x58)); + mt7601u_wr(dev, MT_TXOP_CTRL_CFG, + FIELD_PREP(MT_TXOP_TRUN_EN, 0x3f) | + FIELD_PREP(MT_TXOP_EXT_CCA_DLY, 0x58)); ret = mt7601u_eeprom_init(dev); if (ret) diff --git a/drivers/net/wireless/mediatek/mt7601u/mac.c b/drivers/net/wireless/mediatek/mt7601u/mac.c index e21c53ed09fb..3c576392ed89 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mac.c +++ b/drivers/net/wireless/mediatek/mt7601u/mac.c @@ -19,13 +19,13 @@ static void mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) { - u8 idx = MT76_GET(MT_TXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_TXWI_RATE_MCS, rate); txrate->idx = 0; txrate->flags = 0; txrate->count = 1; - switch (MT76_GET(MT_TXWI_RATE_PHY_MODE, rate)) { + switch (FIELD_GET(MT_TXWI_RATE_PHY_MODE, rate)) { case MT_PHY_TYPE_OFDM: txrate->idx = idx + 4; return; @@ -47,7 +47,7 @@ mt76_mac_process_tx_rate(struct ieee80211_tx_rate *txrate, u16 rate) return; } - if (MT76_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) + if (FIELD_GET(MT_TXWI_RATE_BW, rate) == MT_PHY_BW_40) txrate->flags |= IEEE80211_TX_RC_40_MHZ_WIDTH; if (rate & MT_TXWI_RATE_SGI) @@ -125,9 +125,9 @@ u16 mt76_mac_tx_rate_val(struct mt7601u_dev *dev, bw = 0; } - rateval = MT76_SET(MT_RXWI_RATE_MCS, rate_idx); - rateval |= MT76_SET(MT_RXWI_RATE_PHY, phy); - rateval |= MT76_SET(MT_RXWI_RATE_BW, bw); + rateval = FIELD_PREP(MT_RXWI_RATE_MCS, rate_idx); + rateval |= FIELD_PREP(MT_RXWI_RATE_PHY, phy); + rateval |= FIELD_PREP(MT_RXWI_RATE_BW, bw); if (rate->flags & IEEE80211_TX_RC_SHORT_GI) rateval |= MT_RXWI_RATE_SGI; @@ -156,9 +156,9 @@ struct mt76_tx_status mt7601u_mac_fetch_tx_status(struct mt7601u_dev *dev) stat.success = !!(val & MT_TX_STAT_FIFO_SUCCESS); stat.aggr = !!(val & MT_TX_STAT_FIFO_AGGR); stat.ack_req = !!(val & MT_TX_STAT_FIFO_ACKREQ); - stat.pktid = MT76_GET(MT_TX_STAT_FIFO_PID_TYPE, val); - stat.wcid = MT76_GET(MT_TX_STAT_FIFO_WCID, val); - stat.rate = MT76_GET(MT_TX_STAT_FIFO_RATE, val); + stat.pktid = FIELD_GET(MT_TX_STAT_FIFO_PID_TYPE, val); + stat.wcid = FIELD_GET(MT_TX_STAT_FIFO_WCID, val); + stat.rate = FIELD_GET(MT_TX_STAT_FIFO_RATE, val); return stat; } @@ -270,7 +270,7 @@ void mt7601u_mac_config_tsf(struct mt7601u_dev *dev, bool enable, int interval) } val &= ~MT_BEACON_TIME_CFG_INTVAL; - val |= MT76_SET(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | + val |= FIELD_PREP(MT_BEACON_TIME_CFG_INTVAL, interval << 4) | MT_BEACON_TIME_CFG_TIMER_EN | MT_BEACON_TIME_CFG_SYNC_MODE | MT_BEACON_TIME_CFG_TBTT_EN; @@ -349,8 +349,8 @@ mt7601u_mac_wcid_setup(struct mt7601u_dev *dev, u8 idx, u8 vif_idx, u8 *mac) u8 zmac[ETH_ALEN] = {}; u32 attr; - attr = MT76_SET(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | - MT76_SET(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); + attr = FIELD_PREP(MT_WCID_ATTR_BSS_IDX, vif_idx & 7) | + FIELD_PREP(MT_WCID_ATTR_BSS_IDX_EXT, !!(vif_idx & 8)); mt76_wr(dev, MT_WCID_ATTR(idx), attr); @@ -382,15 +382,15 @@ void mt7601u_mac_set_ampdu_factor(struct mt7601u_dev *dev) rcu_read_unlock(); mt7601u_wr(dev, MT_MAX_LEN_CFG, 0xa0fff | - MT76_SET(MT_MAX_LEN_CFG_AMPDU, min_factor)); + FIELD_PREP(MT_MAX_LEN_CFG_AMPDU, min_factor)); } static void mt76_mac_process_rate(struct ieee80211_rx_status *status, u16 rate) { - u8 idx = MT76_GET(MT_RXWI_RATE_MCS, rate); + u8 idx = FIELD_GET(MT_RXWI_RATE_MCS, rate); - switch (MT76_GET(MT_RXWI_RATE_PHY, rate)) { + switch (FIELD_GET(MT_RXWI_RATE_PHY, rate)) { case MT_PHY_TYPE_OFDM: if (WARN_ON(idx >= 8)) idx = 0; @@ -436,7 +436,7 @@ mt7601u_rx_monitor_beacon(struct mt7601u_dev *dev, struct mt7601u_rxwi *rxwi, u16 rate, int rssi) { dev->bcn_freq_off = rxwi->freq_off; - dev->bcn_phy_mode = MT76_GET(MT_RXWI_RATE_PHY, rate); + dev->bcn_phy_mode = FIELD_GET(MT_RXWI_RATE_PHY, rate); dev->avg_rssi = (dev->avg_rssi * 15) / 16 + (rssi << 8); } @@ -458,7 +458,7 @@ u32 mt76_mac_process_rx(struct mt7601u_dev *dev, struct sk_buff *skb, u16 rate = le16_to_cpu(rxwi->rate); int rssi; - len = MT76_GET(MT_RXWI_CTL_MPDU_LEN, ctl); + len = FIELD_GET(MT_RXWI_CTL_MPDU_LEN, ctl); if (len < 10) return 0; @@ -542,8 +542,8 @@ int mt76_mac_wcid_set_key(struct mt7601u_dev *dev, u8 idx, val = mt7601u_rr(dev, MT_WCID_ATTR(idx)); val &= ~MT_WCID_ATTR_PKEY_MODE & ~MT_WCID_ATTR_PKEY_MODE_EXT; - val |= MT76_SET(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | - MT76_SET(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); + val |= FIELD_PREP(MT_WCID_ATTR_PKEY_MODE, cipher & 7) | + FIELD_PREP(MT_WCID_ATTR_PKEY_MODE_EXT, cipher >> 3); val &= ~MT_WCID_ATTR_PAIRWISE; val |= MT_WCID_ATTR_PAIRWISE * !!(key && key->flags & IEEE80211_KEY_FLAG_PAIRWISE); diff --git a/drivers/net/wireless/mediatek/mt7601u/main.c b/drivers/net/wireless/mediatek/mt7601u/main.c index e70dd9523911..43ebd460ba86 100644 --- a/drivers/net/wireless/mediatek/mt7601u/main.c +++ b/drivers/net/wireless/mediatek/mt7601u/main.c @@ -15,7 +15,6 @@ #include "mt7601u.h" #include "mac.h" #include -#include static int mt7601u_start(struct ieee80211_hw *hw) { diff --git a/drivers/net/wireless/mediatek/mt7601u/mcu.c b/drivers/net/wireless/mediatek/mt7601u/mcu.c index 91c4b3427965..dbdfb3f5c507 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mcu.c +++ b/drivers/net/wireless/mediatek/mt7601u/mcu.c @@ -43,8 +43,8 @@ static inline void mt7601u_dma_skb_wrap_cmd(struct sk_buff *skb, u8 seq, enum mcu_cmd cmd) { WARN_ON(mt7601u_dma_skb_wrap(skb, CPU_TX_PORT, DMA_COMMAND, - MT76_SET(MT_TXD_CMD_INFO_SEQ, seq) | - MT76_SET(MT_TXD_CMD_INFO_TYPE, cmd))); + FIELD_PREP(MT_TXD_CMD_INFO_SEQ, seq) | + FIELD_PREP(MT_TXD_CMD_INFO_TYPE, cmd))); } static inline void trace_mt_mcu_msg_send_cs(struct mt7601u_dev *dev, @@ -100,13 +100,13 @@ static int mt7601u_mcu_wait_resp(struct mt7601u_dev *dev, u8 seq) dev_err(dev->dev, "Error: MCU resp urb failed:%d\n", urb_status); - if (MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) + if (FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce) == seq && + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce) == CMD_DONE) return 0; - dev_err(dev->dev, "Error: MCU resp evt:%hhx seq:%hhx-%hhx!\n", - MT76_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), - seq, MT76_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); + dev_err(dev->dev, "Error: MCU resp evt:%lx seq:%hhx-%lx!\n", + FIELD_GET(MT_RXD_CMD_INFO_EVT_TYPE, rxfce), + seq, FIELD_GET(MT_RXD_CMD_INFO_CMD_SEQ, rxfce)); } dev_err(dev->dev, "Error: %s timed out\n", __func__); @@ -291,9 +291,9 @@ static int __mt7601u_dma_fw(struct mt7601u_dev *dev, u32 val; int ret; - reg = cpu_to_le32(MT76_SET(MT_TXD_INFO_TYPE, DMA_PACKET) | - MT76_SET(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | - MT76_SET(MT_TXD_INFO_LEN, len)); + reg = cpu_to_le32(FIELD_PREP(MT_TXD_INFO_TYPE, DMA_PACKET) | + FIELD_PREP(MT_TXD_INFO_D_PORT, CPU_TX_PORT) | + FIELD_PREP(MT_TXD_INFO_LEN, len)); memcpy(buf.buf, ®, sizeof(reg)); memcpy(buf.buf + sizeof(reg), data, len); memset(buf.buf + sizeof(reg) + len, 0, 8); diff --git a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h index 428bd2f10b7b..c7ec40475a5f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/mt7601u.h +++ b/drivers/net/wireless/mediatek/mt7601u/mt7601u.h @@ -15,6 +15,7 @@ #ifndef MT7601U_H #define MT7601U_H +#include #include #include #include @@ -24,7 +25,6 @@ #include #include "regs.h" -#include "util.h" #define MT_CALIBRATE_INTERVAL (4 * HZ) @@ -299,7 +299,7 @@ bool mt76_poll_msec(struct mt7601u_dev *dev, u32 offset, u32 mask, u32 val, /* Compatibility with mt76 */ #define mt76_rmw_field(_dev, _reg, _field, _val) \ - mt76_rmw(_dev, _reg, _field, MT76_SET(_field, _val)) + mt76_rmw(_dev, _reg, _field, FIELD_PREP(_field, _val)) static inline u32 mt76_rr(struct mt7601u_dev *dev, u32 offset) { diff --git a/drivers/net/wireless/mediatek/mt7601u/phy.c b/drivers/net/wireless/mediatek/mt7601u/phy.c index 1908af6add87..ca09a5d4305e 100644 --- a/drivers/net/wireless/mediatek/mt7601u/phy.c +++ b/drivers/net/wireless/mediatek/mt7601u/phy.c @@ -41,11 +41,12 @@ mt7601u_rf_wr(struct mt7601u_dev *dev, u8 bank, u8 offset, u8 value) goto out; } - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_DATA, value) | - MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_WR | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_DATA, value) | + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_WR | + MT_RF_CSR_CFG_KICK); trace_rf_write(dev, bank, offset, value); out: mutex_unlock(&dev->reg_atomic_mutex); @@ -74,17 +75,18 @@ mt7601u_rf_rr(struct mt7601u_dev *dev, u8 bank, u8 offset) if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; - mt7601u_wr(dev, MT_RF_CSR_CFG, MT76_SET(MT_RF_CSR_CFG_REG_BANK, bank) | - MT76_SET(MT_RF_CSR_CFG_REG_ID, offset) | - MT_RF_CSR_CFG_KICK); + mt7601u_wr(dev, MT_RF_CSR_CFG, + FIELD_PREP(MT_RF_CSR_CFG_REG_BANK, bank) | + FIELD_PREP(MT_RF_CSR_CFG_REG_ID, offset) | + MT_RF_CSR_CFG_KICK); if (!mt76_poll(dev, MT_RF_CSR_CFG, MT_RF_CSR_CFG_KICK, 0, 100)) goto out; val = mt7601u_rr(dev, MT_RF_CSR_CFG); - if (MT76_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && - MT76_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { - ret = MT76_GET(MT_RF_CSR_CFG_DATA, val); + if (FIELD_GET(MT_RF_CSR_CFG_REG_ID, val) == offset && + FIELD_GET(MT_RF_CSR_CFG_REG_BANK, val) == bank) { + ret = FIELD_GET(MT_RF_CSR_CFG_DATA, val); trace_rf_read(dev, bank, offset, ret); } out: @@ -139,8 +141,8 @@ static void mt7601u_bbp_wr(struct mt7601u_dev *dev, u8 offset, u8 val) } mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_VAL, val) | - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_VAL, val) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY); trace_bbp_write(dev, offset, val); out: @@ -163,7 +165,7 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; mt7601u_wr(dev, MT_BBP_CSR_CFG, - MT76_SET(MT_BBP_CSR_CFG_REG_NUM, offset) | + FIELD_PREP(MT_BBP_CSR_CFG_REG_NUM, offset) | MT_BBP_CSR_CFG_RW_MODE | MT_BBP_CSR_CFG_BUSY | MT_BBP_CSR_CFG_READ); @@ -171,8 +173,8 @@ static int mt7601u_bbp_rr(struct mt7601u_dev *dev, u8 offset) goto out; val = mt7601u_rr(dev, MT_BBP_CSR_CFG); - if (MT76_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { - ret = MT76_GET(MT_BBP_CSR_CFG_VAL, val); + if (FIELD_GET(MT_BBP_CSR_CFG_REG_NUM, val) == offset) { + ret = FIELD_GET(MT_BBP_CSR_CFG_VAL, val); trace_bbp_read(dev, offset, ret); } out: @@ -249,9 +251,9 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, /* bw40 */ { -2, 16, 34 } } }; - int bw = MT76_GET(MT_RXWI_RATE_BW, rate); - int aux_lna = MT76_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); - int lna_id = MT76_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); + int bw = FIELD_GET(MT_RXWI_RATE_BW, rate); + int aux_lna = FIELD_GET(MT_RXWI_ANT_AUX_LNA, rxwi->ant); + int lna_id = FIELD_GET(MT_RXWI_GAIN_RSSI_LNA_ID, rxwi->gain); int val; if (lna_id) /* LNA id can be 0, 2, 3. */ @@ -259,7 +261,7 @@ int mt7601u_phy_get_rssi(struct mt7601u_dev *dev, val = 8; val -= lna[aux_lna][bw][lna_id]; - val -= MT76_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); + val -= FIELD_GET(MT_RXWI_GAIN_RSSI_VAL, rxwi->gain); val -= dev->ee->lna_gain; val -= dev->ee->rssi_offset[0]; @@ -939,7 +941,7 @@ static int mt7601u_tssi_cal(struct mt7601u_dev *dev) dev_dbg(dev->dev, "final diff: %08x\n", diff_pwr); val = mt7601u_rr(dev, MT_TX_ALC_CFG_1); - curr_pwr = s6_to_int(MT76_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); + curr_pwr = s6_to_int(FIELD_GET(MT_TX_ALC_CFG_1_TEMP_COMP, val)); diff_pwr += curr_pwr; val = (val & ~MT_TX_ALC_CFG_1_TEMP_COMP) | int_to_s6(diff_pwr); mt7601u_wr(dev, MT_TX_ALC_CFG_1, val); diff --git a/drivers/net/wireless/mediatek/mt7601u/regs.h b/drivers/net/wireless/mediatek/mt7601u/regs.h index afd8978e83fa..27a429d90cec 100644 --- a/drivers/net/wireless/mediatek/mt7601u/regs.h +++ b/drivers/net/wireless/mediatek/mt7601u/regs.h @@ -17,10 +17,6 @@ #include -#ifndef GENMASK -#define GENMASK(h, l) (((U32_C(1) << ((h) - (l) + 1)) - 1) << (l)) -#endif - #define MT_ASIC_VERSION 0x0000 #define MT76XX_REV_E3 0x22 diff --git a/drivers/net/wireless/mediatek/mt7601u/tx.c b/drivers/net/wireless/mediatek/mt7601u/tx.c index a0a33dc8f6bc..ad77bec1ba0f 100644 --- a/drivers/net/wireless/mediatek/mt7601u/tx.c +++ b/drivers/net/wireless/mediatek/mt7601u/tx.c @@ -175,11 +175,12 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, ba_size = min_t(int, 63, ba_size); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) ba_size = 0; - txwi->ack_ctl |= MT76_SET(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); + txwi->ack_ctl |= FIELD_PREP(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size); - txwi->flags = cpu_to_le16(MT_TXWI_FLAGS_AMPDU | - MT76_SET(MT_TXWI_FLAGS_MPDU_DENSITY, - sta->ht_cap.ampdu_density)); + txwi->flags = + cpu_to_le16(MT_TXWI_FLAGS_AMPDU | + FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY, + sta->ht_cap.ampdu_density)); if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) txwi->flags = 0; } @@ -188,7 +189,7 @@ mt7601u_push_txwi(struct mt7601u_dev *dev, struct sk_buff *skb, is_probe = !!(info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE); pkt_id = mt7601u_tx_pktid_enc(dev, rate_ctl & 0x7, is_probe); - pkt_len |= MT76_SET(MT_TXWI_LEN_PKTID, pkt_id); + pkt_len |= FIELD_PREP(MT_TXWI_LEN_PKTID, pkt_id); txwi->len_ctl = cpu_to_le16(pkt_len); return txwi; @@ -285,9 +286,9 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, WARN_ON(cw_min > 0xf); WARN_ON(cw_max > 0xf); - val = MT76_SET(MT_EDCA_CFG_AIFSN, params->aifs) | - MT76_SET(MT_EDCA_CFG_CWMIN, cw_min) | - MT76_SET(MT_EDCA_CFG_CWMAX, cw_max); + val = FIELD_PREP(MT_EDCA_CFG_AIFSN, params->aifs) | + FIELD_PREP(MT_EDCA_CFG_CWMIN, cw_min) | + FIELD_PREP(MT_EDCA_CFG_CWMAX, cw_max); /* TODO: based on user-controlled EnableTxBurst var vendor drv sets * a really long txop on AC0 (see connect.c:2009) but only on * connect? When not connected should be 0. @@ -295,7 +296,7 @@ int mt7601u_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, if (!hw_q) val |= 0x60; else - val |= MT76_SET(MT_EDCA_CFG_TXOP, params->txop); + val |= FIELD_PREP(MT_EDCA_CFG_TXOP, params->txop); mt76_wr(dev, MT_EDCA_CFG_AC(hw_q), val); val = mt76_rr(dev, MT_WMM_TXOP(hw_q)); diff --git a/drivers/net/wireless/mediatek/mt7601u/util.h b/drivers/net/wireless/mediatek/mt7601u/util.h deleted file mode 100644 index b89140bf1210..000000000000 --- a/drivers/net/wireless/mediatek/mt7601u/util.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2014 Felix Fietkau - * Copyright (C) 2004 - 2009 Ivo van Doorn - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 - * as published by the Free Software Foundation - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __MT76_UTIL_H -#define __MT76_UTIL_H - -/* - * Power of two check, this will check - * if the mask that has been given contains and contiguous set of bits. - * Note that we cannot use the is_power_of_2() function since this - * check must be done at compile-time. - */ -#define is_power_of_two(x) ( !((x) & ((x)-1)) ) -#define low_bit_mask(x) ( ((x)-1) & ~(x) ) -#define is_valid_mask(x) is_power_of_two(1LU + (x) + low_bit_mask(x)) - -/* - * Macros to find first set bit in a variable. - * These macros behave the same as the __ffs() functions but - * the most important difference that this is done during - * compile-time rather then run-time. - */ -#define compile_ffs2(__x) \ - __builtin_choose_expr(((__x) & 0x1), 0, 1) - -#define compile_ffs4(__x) \ - __builtin_choose_expr(((__x) & 0x3), \ - (compile_ffs2((__x))), \ - (compile_ffs2((__x) >> 2) + 2)) - -#define compile_ffs8(__x) \ - __builtin_choose_expr(((__x) & 0xf), \ - (compile_ffs4((__x))), \ - (compile_ffs4((__x) >> 4) + 4)) - -#define compile_ffs16(__x) \ - __builtin_choose_expr(((__x) & 0xff), \ - (compile_ffs8((__x))), \ - (compile_ffs8((__x) >> 8) + 8)) - -#define compile_ffs32(__x) \ - __builtin_choose_expr(((__x) & 0xffff), \ - (compile_ffs16((__x))), \ - (compile_ffs16((__x) >> 16) + 16)) - -/* - * This macro will check the requirements for the FIELD{8,16,32} macros - * The mask should be a constant non-zero contiguous set of bits which - * does not exceed the given typelimit. - */ -#define FIELD_CHECK(__mask) \ - BUILD_BUG_ON(!(__mask) || !is_valid_mask(__mask)) - -#define MT76_SET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (((u32) (_val)) << compile_ffs32(_mask)) & _mask; \ - }) - -#define MT76_GET(_mask, _val) \ - ({ \ - FIELD_CHECK(_mask); \ - (u32) (((_val) & _mask) >> compile_ffs32(_mask)); \ - }) - -#endif diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c index 7cf26c6124d1..6005e14213ca 100644 --- a/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c +++ b/drivers/net/wireless/ralink/rt2x00/rt2x00usb.c @@ -831,8 +831,10 @@ int rt2x00usb_probe(struct usb_interface *usb_intf, rt2x00dev->anchor = devm_kmalloc(&usb_dev->dev, sizeof(struct usb_anchor), GFP_KERNEL); - if (!rt2x00dev->anchor) + if (!rt2x00dev->anchor) { + retval = -ENOMEM; goto exit_free_reg; + } init_usb_anchor(rt2x00dev->anchor); return 0; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h index 4341d56805f8..1016628926d2 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu.h @@ -29,6 +29,7 @@ #define RTL8XXXU_DEBUG_H2C 0x800 #define RTL8XXXU_DEBUG_ACTION 0x1000 #define RTL8XXXU_DEBUG_EFUSE 0x2000 +#define RTL8XXXU_DEBUG_INTERRUPT 0x4000 #define RTW_USB_CONTROL_MSG_TIMEOUT 500 #define RTL8XXXU_MAX_REG_POLL 500 @@ -43,6 +44,7 @@ #define TX_TOTAL_PAGE_NUM 0xf8 #define TX_TOTAL_PAGE_NUM_8192E 0xf3 +#define TX_TOTAL_PAGE_NUM_8723B 0xf7 /* (HPQ + LPQ + NPQ + PUBQ) = TX_TOTAL_PAGE_NUM */ #define TX_PAGE_NUM_PUBQ 0xe7 #define TX_PAGE_NUM_HI_PQ 0x0c @@ -54,6 +56,11 @@ #define TX_PAGE_NUM_LO_PQ_8192E 0x0c #define TX_PAGE_NUM_NORM_PQ_8192E 0x00 +#define TX_PAGE_NUM_PUBQ_8723B 0xe7 +#define TX_PAGE_NUM_HI_PQ_8723B 0x0c +#define TX_PAGE_NUM_LO_PQ_8723B 0x02 +#define TX_PAGE_NUM_NORM_PQ_8723B 0x02 + #define RTL_FW_PAGE_SIZE 4096 #define RTL8XXXU_FIRMWARE_POLL_MAX 1000 @@ -1312,7 +1319,7 @@ struct rtl8xxxu_fileops { int (*power_on) (struct rtl8xxxu_priv *priv); void (*power_off) (struct rtl8xxxu_priv *priv); void (*reset_8051) (struct rtl8xxxu_priv *priv); - int (*llt_init) (struct rtl8xxxu_priv *priv, u8 last_tx_page); + int (*llt_init) (struct rtl8xxxu_priv *priv); void (*init_phy_bb) (struct rtl8xxxu_priv *priv); int (*init_phy_rf) (struct rtl8xxxu_priv *priv); void (*phy_init_antenna_selection) (struct rtl8xxxu_priv *priv); @@ -1330,11 +1337,17 @@ struct rtl8xxxu_fileops { u32 ramask, int sgi); void (*report_connect) (struct rtl8xxxu_priv *priv, u8 macid, bool connect); + void (*fill_txdesc) (struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); int writeN_block_size; int rx_agg_buf_size; char tx_desc_size; char rx_desc_size; - char has_s0s1; + u8 has_s0s1:1; + u8 has_tx_report:1; + u8 gen2_thermal_meter:1; u32 adda_1t_init; u32 adda_1t_path_on; u32 adda_2t_path_on_a; @@ -1388,14 +1401,14 @@ int rtl8xxxu_load_firmware(struct rtl8xxxu_priv *priv, char *fw_name); void rtl8xxxu_firmware_self_reset(struct rtl8xxxu_priv *priv); void rtl8xxxu_power_off(struct rtl8xxxu_priv *priv); void rtl8xxxu_reset_8051(struct rtl8xxxu_priv *priv); -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen2_prepare_calibrate(struct rtl8xxxu_priv *priv, u8 start); int rtl8xxxu_flush_fifo(struct rtl8xxxu_priv *priv); int rtl8xxxu_gen2_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len); int rtl8xxxu_active_to_lps(struct rtl8xxxu_priv *priv); void rtl8xxxu_disabled_to_emu(struct rtl8xxxu_priv *priv); -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page); +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_phy_iq_calibrate(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_init_phy_bb(struct rtl8xxxu_priv *priv); void rtl8xxxu_gen1_set_tx_power(struct rtl8xxxu_priv *priv, @@ -1421,6 +1434,14 @@ int rtl8xxxu_parse_rxdesc24(struct rtl8xxxu_priv *priv, struct sk_buff *skb); int rtl8xxxu_gen2_channel_to_group(int channel); bool rtl8xxxu_gen2_simularity_compare(struct rtl8xxxu_priv *priv, int result[][8], int c1, int c2); +void rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); +void rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable); extern struct rtl8xxxu_fileops rtl8192cu_fops; extern struct rtl8xxxu_fileops rtl8192eu_fops; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c index 69d1a1453ede..f9e2050812ab 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192c.c @@ -567,6 +567,7 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 128, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), @@ -579,5 +580,9 @@ struct rtl8xxxu_fileops rtl8192cu_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; #endif diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index 9a1994f49b7b..df54d27e7851 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1396,6 +1396,114 @@ exit: return ret; } +static int rtl8192eu_active_to_lps(struct rtl8xxxu_priv *priv) +{ + struct device *dev = &priv->udev->dev; + u8 val8; + u16 val16; + u32 val32; + int retry, retval; + + rtl8xxxu_write8(priv, REG_TXPAUSE, 0xff); + + retry = 100; + retval = -EBUSY; + /* + * Poll 32 bit wide 0x05f8 for 0x00000000 to ensure no TX is pending. + */ + do { + val32 = rtl8xxxu_read32(priv, REG_SCH_TX_CMD); + if (!val32) { + retval = 0; + break; + } + } while (retry--); + + if (!retry) { + dev_warn(dev, "Failed to flush TX queue\n"); + retval = -EBUSY; + goto out; + } + + /* Disable CCK and OFDM, clock gated */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BBRSTB; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + udelay(2); + + /* Reset whole BB */ + val8 = rtl8xxxu_read8(priv, REG_SYS_FUNC); + val8 &= ~SYS_FUNC_BB_GLB_RSTN; + rtl8xxxu_write8(priv, REG_SYS_FUNC, val8); + + /* Reset MAC TRX */ + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= 0xff00; + val16 |= (CR_HCI_TXDMA_ENABLE | CR_HCI_RXDMA_ENABLE); + rtl8xxxu_write16(priv, REG_CR, val16); + + val16 = rtl8xxxu_read16(priv, REG_CR); + val16 &= ~CR_SECURITY_ENABLE; + rtl8xxxu_write16(priv, REG_CR, val16); + + val8 = rtl8xxxu_read8(priv, REG_DUAL_TSF_RST); + val8 |= DUAL_TSF_TX_OK; + rtl8xxxu_write8(priv, REG_DUAL_TSF_RST, val8); + +out: + return retval; +} + +static int rtl8192eu_active_to_emu(struct rtl8xxxu_priv *priv) +{ + u8 val8; + int count, ret = 0; + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0); + + /* Switch DPDT_SEL_P output from register 0x65[2] */ + val8 = rtl8xxxu_read8(priv, REG_LEDCFG2); + val8 &= ~LEDCFG2_DPDT_SELECT; + rtl8xxxu_write8(priv, REG_LEDCFG2, val8); + + /* 0x0005[1] = 1 turn off MAC by HW state machine*/ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 |= BIT(1); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + for (count = RTL8XXXU_MAX_REG_POLL; count; count--) { + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + if ((val8 & BIT(1)) == 0) + break; + udelay(10); + } + + if (!count) { + dev_warn(&priv->udev->dev, "%s: Disabling MAC timed out\n", + __func__); + ret = -EBUSY; + goto exit; + } + +exit: + return ret; +} + +static int rtl8192eu_emu_to_disabled(struct rtl8xxxu_priv *priv) +{ + u8 val8; + + /* 0x04[12:11] = 01 enable WL suspend */ + val8 = rtl8xxxu_read8(priv, REG_APS_FSMCO + 1); + val8 &= ~(BIT(3) | BIT(4)); + val8 |= BIT(3); + rtl8xxxu_write8(priv, REG_APS_FSMCO + 1, val8); + + return 0; +} + static int rtl8192eu_power_on(struct rtl8xxxu_priv *priv) { u16 val16; @@ -1446,6 +1554,40 @@ exit: return ret; } +void rtl8192eu_power_off(struct rtl8xxxu_priv *priv) +{ + u8 val8; + u16 val16; + + rtl8xxxu_flush_fifo(priv); + + val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); + val8 &= ~TX_REPORT_CTRL_TIMER_ENABLE; + rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); + + /* Turn off RF */ + rtl8xxxu_write8(priv, REG_RF_CTRL, 0x00); + + rtl8192eu_active_to_lps(priv); + + /* Reset Firmware if running in RAM */ + if (rtl8xxxu_read8(priv, REG_MCU_FW_DL) & MCU_FW_RAM_SEL) + rtl8xxxu_firmware_self_reset(priv); + + /* Reset MCU */ + val16 = rtl8xxxu_read16(priv, REG_SYS_FUNC); + val16 &= ~SYS_FUNC_CPU_ENABLE; + rtl8xxxu_write16(priv, REG_SYS_FUNC, val16); + + /* Reset MCU ready status */ + rtl8xxxu_write8(priv, REG_MCU_FW_DL, 0x00); + + rtl8xxxu_reset_8051(priv); + + rtl8192eu_active_to_emu(priv); + rtl8192eu_emu_to_disabled(priv); +} + static void rtl8192e_enable_rf(struct rtl8xxxu_priv *priv) { u32 val32; @@ -1487,7 +1629,7 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .parse_efuse = rtl8192eu_parse_efuse, .load_firmware = rtl8192eu_load_firmware, .power_on = rtl8192eu_power_on, - .power_off = rtl8xxxu_power_off, + .power_off = rtl8192eu_power_off, .reset_8051 = rtl8xxxu_reset_8051, .llt_init = rtl8xxxu_auto_llt_table, .init_phy_bb = rtl8192eu_init_phy_bb, @@ -1501,10 +1643,12 @@ struct rtl8xxxu_fileops rtl8192eu_fops = { .set_tx_power = rtl8192e_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 128, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 0, + .gen2_thermal_meter = 1, .adda_1t_init = 0x0fc01616, .adda_1t_path_on = 0x0fc01616, .adda_2t_path_on_a = 0x0fc01616, diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c index 686c551581b1..aef373028155 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723a.c @@ -384,6 +384,7 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .set_tx_power = rtl8xxxu_gen1_set_tx_power, .update_rate_mask = rtl8xxxu_update_rate_mask, .report_connect = rtl8xxxu_gen1_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v1, .writeN_block_size = 1024, .rx_agg_buf_size = 16000, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc32), @@ -396,4 +397,8 @@ struct rtl8xxxu_fileops rtl8723au_fops = { .pbp_rx = PBP_PAGE_SIZE_128, .pbp_tx = PBP_PAGE_SIZE_128, .mactable = rtl8xxxu_gen1_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM, + .page_num_hi = TX_PAGE_NUM_HI_PQ, + .page_num_lo = TX_PAGE_NUM_LO_PQ, + .page_num_norm = TX_PAGE_NUM_NORM_PQ, }; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c index 9d45afb0e3fd..6c086b5657e9 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8723b.c @@ -1662,10 +1662,13 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .set_tx_power = rtl8723b_set_tx_power, .update_rate_mask = rtl8xxxu_gen2_update_rate_mask, .report_connect = rtl8xxxu_gen2_report_connect, + .fill_txdesc = rtl8xxxu_fill_txdesc_v2, .writeN_block_size = 1024, .tx_desc_size = sizeof(struct rtl8xxxu_txdesc40), .rx_desc_size = sizeof(struct rtl8xxxu_rxdesc24), .has_s0s1 = 1, + .has_tx_report = 1, + .gen2_thermal_meter = 1, .adda_1t_init = 0x01c00014, .adda_1t_path_on = 0x01c00014, .adda_2t_path_on_a = 0x01c00014, @@ -1674,4 +1677,8 @@ struct rtl8xxxu_fileops rtl8723bu_fops = { .pbp_rx = PBP_PAGE_SIZE_256, .pbp_tx = PBP_PAGE_SIZE_256, .mactable = rtl8723b_mac_init_table, + .total_page_num = TX_TOTAL_PAGE_NUM_8723B, + .page_num_hi = TX_PAGE_NUM_HI_PQ_8723B, + .page_num_lo = TX_PAGE_NUM_LO_PQ_8723B, + .page_num_norm = TX_PAGE_NUM_NORM_PQ_8723B, }; diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c index 77048db3b32a..b2d7f6e69667 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_core.c @@ -894,7 +894,7 @@ int rtl8xxxu_write_rfreg(struct rtl8xxxu_priv *priv, return retval; } -int +static int rtl8xxxu_gen1_h2c_cmd(struct rtl8xxxu_priv *priv, struct h2c_cmd *h2c, int len) { struct device *dev = &priv->udev->dev; @@ -2472,10 +2472,13 @@ static int rtl8xxxu_llt_write(struct rtl8xxxu_priv *priv, u8 address, u8 data) return ret; } -int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_init_llt_table(struct rtl8xxxu_priv *priv) { int ret; int i; + u8 last_tx_page; + + last_tx_page = priv->fops->total_page_num; for (i = 0; i < last_tx_page; i++) { ret = rtl8xxxu_llt_write(priv, i, i + 1); @@ -2503,7 +2506,7 @@ exit: return ret; } -int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv, u8 last_tx_page) +int rtl8xxxu_auto_llt_table(struct rtl8xxxu_priv *priv) { u32 val32; int ret = 0; @@ -3847,28 +3850,6 @@ void rtl8xxxu_gen2_disable_rf(struct rtl8xxxu_priv *priv) rtl8xxxu_write32(priv, REG_RX_WAIT_CCA, val32); } -static void rtl8xxxu_old_init_queue_reserved_page(struct rtl8xxxu_priv *priv) -{ - u8 val8; - u32 val32; - - if (priv->ep_tx_normal_queue) - val8 = TX_PAGE_NUM_NORM_PQ; - else - val8 = 0; - - rtl8xxxu_write8(priv, REG_RQPN_NPQ, val8); - - val32 = (TX_PAGE_NUM_PUBQ << RQPN_PUB_PQ_SHIFT) | RQPN_LOAD; - - if (priv->ep_tx_high_queue) - val32 |= (TX_PAGE_NUM_HI_PQ << RQPN_HI_PQ_SHIFT); - if (priv->ep_tx_low_queue) - val32 |= (TX_PAGE_NUM_LO_PQ << RQPN_LO_PQ_SHIFT); - - rtl8xxxu_write32(priv, REG_RQPN, val32); -} - static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) { struct rtl8xxxu_fileops *fops = priv->fops; @@ -3891,7 +3872,7 @@ static void rtl8xxxu_init_queue_reserved_page(struct rtl8xxxu_priv *priv) val32 = (nq << RQPN_NPQ_SHIFT) | (eq << RQPN_EPQ_SHIFT); rtl8xxxu_write32(priv, REG_RQPN_NPQ, val32); - pubq = fops->total_page_num - hq - lq - nq; + pubq = fops->total_page_num - hq - lq - nq - 1; val32 = RQPN_LOAD; val32 |= (hq << RQPN_HI_PQ_SHIFT); @@ -3905,6 +3886,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) { struct rtl8xxxu_priv *priv = hw->priv; struct device *dev = &priv->udev->dev; + struct rtl8xxxu_fileops *fops = priv->fops; bool macpower; int ret; u8 val8; @@ -3923,18 +3905,14 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) else macpower = true; - ret = priv->fops->power_on(priv); + ret = fops->power_on(priv); if (ret < 0) { dev_warn(dev, "%s: Failed power on\n", __func__); goto exit; } - if (!macpower) { - if (priv->fops->total_page_num) - rtl8xxxu_init_queue_reserved_page(priv); - else - rtl8xxxu_old_init_queue_reserved_page(priv); - } + if (!macpower) + rtl8xxxu_init_queue_reserved_page(priv); ret = rtl8xxxu_init_queue_priority(priv); dev_dbg(dev, "%s: init_queue_priority %i\n", __func__, ret); @@ -3944,19 +3922,19 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set RX page boundary */ - rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, priv->fops->trxff_boundary); + rtl8xxxu_write16(priv, REG_TRXFF_BNDY + 2, fops->trxff_boundary); ret = rtl8xxxu_download_firmware(priv); - dev_dbg(dev, "%s: download_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: download_firmware %i\n", __func__, ret); if (ret) goto exit; ret = rtl8xxxu_start_firmware(priv); - dev_dbg(dev, "%s: start_fiwmare %i\n", __func__, ret); + dev_dbg(dev, "%s: start_firmware %i\n", __func__, ret); if (ret) goto exit; - if (priv->fops->phy_init_antenna_selection) - priv->fops->phy_init_antenna_selection(priv); + if (fops->phy_init_antenna_selection) + fops->phy_init_antenna_selection(priv); ret = rtl8xxxu_init_mac(priv); @@ -3969,7 +3947,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) if (ret) goto exit; - ret = priv->fops->init_phy_rf(priv); + ret = fops->init_phy_rf(priv); if (ret) goto exit; @@ -3994,13 +3972,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Set TX buffer boundary */ - if (priv->rtl_chip == RTL8192E) - val8 = TX_TOTAL_PAGE_NUM_8192E + 1; - else - val8 = TX_TOTAL_PAGE_NUM + 1; - - if (priv->rtl_chip == RTL8723B) - val8 -= 1; + val8 = fops->total_page_num + 1; rtl8xxxu_write8(priv, REG_TXPKTBUF_BCNQ_BDNY, val8); rtl8xxxu_write8(priv, REG_TXPKTBUF_MGQ_BDNY, val8); @@ -4013,14 +3985,14 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) * The vendor drivers set PBP for all devices, except 8192e. * There is no explanation for this in any of the sources. */ - val8 = (priv->fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | - (priv->fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); + val8 = (fops->pbp_rx << PBP_PAGE_SIZE_RX_SHIFT) | + (fops->pbp_tx << PBP_PAGE_SIZE_TX_SHIFT); if (priv->rtl_chip != RTL8192E) rtl8xxxu_write8(priv, REG_PBP, val8); dev_dbg(dev, "%s: macpower %i\n", __func__, macpower); if (!macpower) { - ret = priv->fops->llt_init(priv, TX_TOTAL_PAGE_NUM); + ret = fops->llt_init(priv); if (ret) { dev_warn(dev, "%s: LLT table init failed\n", __func__); goto exit; @@ -4029,13 +4001,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Chip specific quirks */ - priv->fops->usb_quirks(priv); + fops->usb_quirks(priv); /* - * Presumably this is for 8188EU as well - * Enable TX report and TX report timer + * Enable TX report and TX report timer for 8723bu/8188eu/... */ - if (priv->rtl_chip == RTL8723B) { + if (fops->has_tx_report) { val8 = rtl8xxxu_read8(priv, REG_TX_REPORT_CTRL); val8 |= TX_REPORT_CTRL_TIMER_ENABLE; rtl8xxxu_write8(priv, REG_TX_REPORT_CTRL, val8); @@ -4170,8 +4141,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write8(priv, REG_RSV_CTRL, val8); } - if (priv->fops->init_aggregation) - priv->fops->init_aggregation(priv); + if (fops->init_aggregation) + fops->init_aggregation(priv); /* * Enable CCK and OFDM block @@ -4188,7 +4159,7 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) /* * Start out with default power levels for channel 6, 20MHz */ - priv->fops->set_tx_power(priv, 1, false); + fops->set_tx_power(priv, 1, false); /* Let the 8051 take control of antenna setting */ if (priv->rtl_chip != RTL8192E) { @@ -4204,8 +4175,8 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8xxxu_write16(priv, REG_FAST_EDCA_CTRL, 0); - if (priv->fops->init_statistics) - priv->fops->init_statistics(priv); + if (fops->init_statistics) + fops->init_statistics(priv); if (priv->rtl_chip == RTL8192E) { /* @@ -4223,12 +4194,12 @@ static int rtl8xxxu_init_device(struct ieee80211_hw *hw) rtl8723a_phy_lc_calibrate(priv); - priv->fops->phy_iq_calibrate(priv); + fops->phy_iq_calibrate(priv); /* * This should enable thermal meter */ - if (priv->fops->tx_desc_size == sizeof(struct rtl8xxxu_txdesc40)) + if (fops->gen2_thermal_meter) rtl8xxxu_write_rfreg(priv, RF_A, RF6052_REG_T_METER_8723B, 0x37cf8); else @@ -4783,6 +4754,113 @@ static void rtl8xxxu_dump_action(struct device *dev, } } +/* + * Fill in v1 (gen1) specific TX descriptor bits. + * This format is used on 8188cu/8192cu/8723au + */ +void +rtl8xxxu_fill_txdesc_v1(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + u16 seq_number; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc->txdw5 = cpu_to_le32(rate); + + if (ieee80211_is_data(hdr->frame_control)) + tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); + + tx_desc->txdw3 = cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); + else + tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc->txdw5 = cpu_to_le32(rate); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_USE_DRIVER_RATE); + tx_desc->txdw5 |= cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); + } + + if (ieee80211_is_data_qos(hdr->frame_control)) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); + + if (short_preamble) + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); + + if (sgi) + tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC32_RTS_RATE_SHIFT); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); + tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); + } +} + +/* + * Fill in v2 (gen2) specific TX descriptor bits. + * This format is used on 8192eu/8723bu + */ +void +rtl8xxxu_fill_txdesc_v2(struct ieee80211_hdr *hdr, + struct rtl8xxxu_txdesc32 *tx_desc32, u32 rate, + u16 rate_flag, bool sgi, bool short_preamble, + bool ampdu_enable) +{ + struct rtl8xxxu_txdesc40 *tx_desc40; + u16 seq_number; + + tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc32; + + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); + + tx_desc40->txdw4 = cpu_to_le32(rate); + if (ieee80211_is_data(hdr->frame_control)) { + tx_desc40->txdw4 |= cpu_to_le32(0x1f << + TXDESC40_DATA_RATE_FB_SHIFT); + } + + tx_desc40->txdw9 = cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); + + if (ampdu_enable) + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); + else + tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); + + if (ieee80211_is_mgmt(hdr->frame_control)) { + tx_desc40->txdw4 = cpu_to_le32(rate); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_USE_DRIVER_RATE); + tx_desc40->txdw4 |= + cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); + tx_desc40->txdw4 |= cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); + } + + if (short_preamble) + tx_desc40->txdw5 |= cpu_to_le32(TXDESC40_SHORT_PREAMBLE); + + if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { + /* + * Use RTS rate 24M - does the mac80211 tell + * us which to use? + */ + tx_desc40->txdw4 |= cpu_to_le32(DESC_RATE_24M << + TXDESC40_RTS_RATE_SHIFT); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); + tx_desc40->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); + } +} + static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_tx_control *control, struct sk_buff *skb) @@ -4792,7 +4870,6 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, struct ieee80211_rate *tx_rate = ieee80211_get_tx_rate(hw, tx_info); struct rtl8xxxu_priv *priv = hw->priv; struct rtl8xxxu_txdesc32 *tx_desc; - struct rtl8xxxu_txdesc40 *tx_desc40; struct rtl8xxxu_tx_urb *tx_urb; struct ieee80211_sta *sta = NULL; struct ieee80211_vif *vif = tx_info->control.vif; @@ -4803,7 +4880,7 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, u16 rate_flag = tx_info->control.rates[0].flags; int tx_desc_size = priv->fops->tx_desc_size; int ret; - bool usedesc40, ampdu_enable; + bool usedesc40, ampdu_enable, sgi = false, short_preamble = false; if (skb_headroom(skb) < tx_desc_size) { dev_warn(dev, @@ -4881,107 +4958,26 @@ static void rtl8xxxu_tx(struct ieee80211_hw *hw, } } - if (rate_flag & IEEE80211_TX_RC_MCS) + if (rate_flag & IEEE80211_TX_RC_MCS && + !ieee80211_is_mgmt(hdr->frame_control)) rate = tx_info->control.rates[0].idx + DESC_RATE_MCS0; else rate = tx_rate->hw_value; + if (rate_flag & IEEE80211_TX_RC_SHORT_GI || + (ieee80211_is_data_qos(hdr->frame_control) && + sta && sta->ht_cap.cap & + (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) + sgi = true; + + if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || + (sta && vif && vif->bss_conf.use_short_preamble)) + short_preamble = true; + seq_number = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl)); - if (!usedesc40) { - tx_desc->txdw5 = cpu_to_le32(rate); - if (ieee80211_is_data(hdr->frame_control)) - tx_desc->txdw5 |= cpu_to_le32(0x0001ff00); - - tx_desc->txdw3 = - cpu_to_le32((u32)seq_number << TXDESC32_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_ENABLE); - else - tx_desc->txdw1 |= cpu_to_le32(TXDESC32_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc->txdw5 = cpu_to_le32(tx_rate->hw_value); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_USE_DRIVER_RATE); - tx_desc->txdw5 |= - cpu_to_le32(6 << TXDESC32_RETRY_LIMIT_SHIFT); - tx_desc->txdw5 |= - cpu_to_le32(TXDESC32_RETRY_LIMIT_ENABLE); - } - - if (ieee80211_is_data_qos(hdr->frame_control)) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_QOS); - - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_SHORT_PREAMBLE); - - if (rate_flag & IEEE80211_TX_RC_SHORT_GI || - (ieee80211_is_data_qos(hdr->frame_control) && - sta && sta->ht_cap.cap & - (IEEE80211_HT_CAP_SGI_40 | IEEE80211_HT_CAP_SGI_20))) { - tx_desc->txdw5 |= cpu_to_le32(TXDESC32_SHORT_GI); - } - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC32_RTS_RATE_SHIFT); - tx_desc->txdw4 |= - cpu_to_le32(TXDESC32_RTS_CTS_ENABLE); - tx_desc->txdw4 |= cpu_to_le32(TXDESC32_HW_RTS_ENABLE); - } - } else { - tx_desc40 = (struct rtl8xxxu_txdesc40 *)tx_desc; - - tx_desc40->txdw4 = cpu_to_le32(rate); - if (ieee80211_is_data(hdr->frame_control)) { - tx_desc->txdw4 |= - cpu_to_le32(0x1f << - TXDESC40_DATA_RATE_FB_SHIFT); - } - - tx_desc40->txdw9 = - cpu_to_le32((u32)seq_number << TXDESC40_SEQ_SHIFT); - - if (ampdu_enable) - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_ENABLE); - else - tx_desc40->txdw2 |= cpu_to_le32(TXDESC40_AGG_BREAK); - - if (ieee80211_is_mgmt(hdr->frame_control)) { - tx_desc40->txdw4 = cpu_to_le32(tx_rate->hw_value); - tx_desc40->txdw3 |= - cpu_to_le32(TXDESC40_USE_DRIVER_RATE); - tx_desc40->txdw4 |= - cpu_to_le32(6 << TXDESC40_RETRY_LIMIT_SHIFT); - tx_desc40->txdw4 |= - cpu_to_le32(TXDESC40_RETRY_LIMIT_ENABLE); - } - - if (rate_flag & IEEE80211_TX_RC_USE_SHORT_PREAMBLE || - (sta && vif && vif->bss_conf.use_short_preamble)) - tx_desc40->txdw5 |= - cpu_to_le32(TXDESC40_SHORT_PREAMBLE); - - if (rate_flag & IEEE80211_TX_RC_USE_RTS_CTS) { - /* - * Use RTS rate 24M - does the mac80211 tell - * us which to use? - */ - tx_desc->txdw4 |= - cpu_to_le32(DESC_RATE_24M << - TXDESC40_RTS_RATE_SHIFT); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_RTS_CTS_ENABLE); - tx_desc->txdw3 |= cpu_to_le32(TXDESC40_HW_RTS_ENABLE); - } - } + priv->fops->fill_txdesc(hdr, tx_desc, rate, rate_flag, + sgi, short_preamble, ampdu_enable); rtl8xxxu_calc_tx_desc_csum(tx_desc); @@ -5379,7 +5375,8 @@ static void rtl8xxxu_int_complete(struct urb *urb) struct device *dev = &priv->udev->dev; int ret; - dev_dbg(dev, "%s: status %i\n", __func__, urb->status); + if (rtl8xxxu_debug & RTL8XXXU_DEBUG_INTERRUPT) + dev_dbg(dev, "%s: status %i\n", __func__, urb->status); if (urb->status == 0) { usb_anchor_urb(urb, &priv->int_anchor); ret = usb_submit_urb(urb, GFP_ATOMIC); @@ -5704,7 +5701,7 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, switch (action) { case IEEE80211_AMPDU_TX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_START\n", __func__); ampdu_factor = sta->ht_cap.ampdu_factor; ampdu_density = sta->ht_cap.ampdu_density; rtl8xxxu_set_ampdu_factor(priv, ampdu_factor); @@ -5714,21 +5711,21 @@ rtl8xxxu_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif, ampdu_factor, ampdu_density); break; case IEEE80211_AMPDU_TX_STOP_FLUSH: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT: - dev_info(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", + dev_dbg(dev, "%s: IEEE80211_AMPDU_TX_STOP_FLUSH_CONT\n", __func__); rtl8xxxu_set_ampdu_factor(priv, 0); rtl8xxxu_set_ampdu_min_space(priv, 0); break; case IEEE80211_AMPDU_RX_START: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_START\n", __func__); break; case IEEE80211_AMPDU_RX_STOP: - dev_info(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); + dev_dbg(dev, "%s: IEEE80211_AMPDU_RX_STOP\n", __func__); break; default: break; @@ -5947,7 +5944,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, struct ieee80211_hw *hw; struct usb_device *udev; struct ieee80211_supported_band *sband; - int ret = 0; + int ret; int untested = 1; udev = usb_get_dev(interface_to_usbdev(interface)); @@ -5971,6 +5968,18 @@ static int rtl8xxxu_probe(struct usb_interface *interface, if (id->idProduct == 0x1004) untested = 0; break; + case 0x20f4: + if (id->idProduct == 0x648b) + untested = 0; + break; + case 0x2001: + if (id->idProduct == 0x3308) + untested = 0; + break; + case 0x2357: + if (id->idProduct == 0x0109) + untested = 0; + break; default: break; } @@ -5987,6 +5996,7 @@ static int rtl8xxxu_probe(struct usb_interface *interface, hw = ieee80211_alloc_hw(sizeof(struct rtl8xxxu_priv), &rtl8xxxu_ops); if (!hw) { ret = -ENOMEM; + priv = NULL; goto exit; } @@ -6035,6 +6045,8 @@ static int rtl8xxxu_probe(struct usb_interface *interface, } ret = rtl8xxxu_init_device(hw); + if (ret) + goto exit; hw->wiphy->max_scan_ssids = 1; hw->wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN; @@ -6085,9 +6097,20 @@ static int rtl8xxxu_probe(struct usb_interface *interface, goto exit; } + return 0; + exit: - if (ret < 0) - usb_put_dev(udev); + usb_set_intfdata(interface, NULL); + + if (priv) { + kfree(priv->fw_data); + mutex_destroy(&priv->usb_buf_mutex); + mutex_destroy(&priv->h2c_mutex); + } + usb_put_dev(udev); + + ieee80211_free_hw(hw); + return ret; } @@ -6111,6 +6134,11 @@ static void rtl8xxxu_disconnect(struct usb_interface *interface) mutex_destroy(&priv->usb_buf_mutex); mutex_destroy(&priv->h2c_mutex); + if (priv->udev->state != USB_STATE_NOTATTACHED) { + dev_info(&priv->udev->dev, + "Device still attached, trying to reset\n"); + usb_reset_device(priv->udev); + } usb_put_dev(priv->udev); ieee80211_free_hw(hw); } @@ -6124,6 +6152,9 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8723au_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x818b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192eu_fops}, +/* Tested by Myckel Habets */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2357, 0x0109, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192eu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0xb720, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8723bu_fops}, #ifdef CONFIG_RTL8XXXU_UNTESTED @@ -6140,6 +6171,12 @@ static struct usb_device_id dev_table[] = { /* Tested by Andrea Merello */ {USB_DEVICE_AND_INTERFACE_INFO(0x050d, 0x1004, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Jocelyn Mayer */ +{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, +/* Tested by Stefano Bravi */ +{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), + .driver_info = (unsigned long)&rtl8192cu_fops}, /* Currently untested 8188 series devices */ {USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_REALTEK, 0x8191, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, @@ -6187,8 +6224,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x13d3, 0x3357, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x3308, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2001, 0x330b, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0x4902, 0xff, 0xff, 0xff), @@ -6199,8 +6234,6 @@ static struct usb_device_id dev_table[] = { .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x2019, 0xed17, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, -{USB_DEVICE_AND_INTERFACE_INFO(0x20f4, 0x648b, 0xff, 0xff, 0xff), - .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4855, 0x0090, 0xff, 0xff, 0xff), .driver_info = (unsigned long)&rtl8192cu_fops}, {USB_DEVICE_AND_INTERFACE_INFO(0x4856, 0x0091, 0xff, 0xff, 0xff), diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h index 921c5653fff2..315ccfb2dff5 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_regs.h @@ -213,10 +213,66 @@ #define REG_HMBOX_EXT_1 0x008a #define REG_HMBOX_EXT_2 0x008c #define REG_HMBOX_EXT_3 0x008e + /* Interrupt registers for 8192e/8723bu/8812 */ #define REG_HIMR0 0x00b0 +#define IMR0_TXCCK BIT(30) /* TXRPT interrupt when CCX bit + of the packet is set */ +#define IMR0_PSTIMEOUT BIT(29) /* Power Save Time Out Int */ +#define IMR0_GTINT4 BIT(28) /* Set when GTIMER4 expires */ +#define IMR0_GTINT3 BIT(27) /* Set when GTIMER3 expires */ +#define IMR0_TBDER BIT(26) /* Transmit Beacon0 Error */ +#define IMR0_TBDOK BIT(25) /* Transmit Beacon0 OK */ +#define IMR0_TSF_BIT32_TOGGLE BIT(24) /* TSF Timer BIT32 toggle + indication interrupt */ +#define IMR0_BCNDMAINT0 BIT(20) /* Beacon DMA Interrupt 0 */ +#define IMR0_BCNDERR0 BIT(16) /* Beacon Queue DMA Error 0 */ +#define IMR0_HSISR_IND_ON_INT BIT(15) /* HSISR Indicator (HSIMR & + HSISR is true) */ +#define IMR0_BCNDMAINT_E BIT(14) /* Beacon DMA Interrupt + Extension for Win7 */ +#define IMR0_ATIMEND BIT(12) /* CTWidnow End or + ATIM Window End */ +#define IMR0_HISR1_IND_INT BIT(11) /* HISR1 Indicator + (HISR1 & HIMR1 is true) */ +#define IMR0_C2HCMD BIT(10) /* CPU to Host Command INT + Status, Write 1 to clear */ +#define IMR0_CPWM2 BIT(9) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_CPWM BIT(8) /* CPU power Mode exchange INT + Status, Write 1 to clear */ +#define IMR0_HIGHDOK BIT(7) /* High Queue DMA OK */ +#define IMR0_MGNTDOK BIT(6) /* Management Queue DMA OK */ +#define IMR0_BKDOK BIT(5) /* AC_BK DMA OK */ +#define IMR0_BEDOK BIT(4) /* AC_BE DMA OK */ +#define IMR0_VIDOK BIT(3) /* AC_VI DMA OK */ +#define IMR0_VODOK BIT(2) /* AC_VO DMA OK */ +#define IMR0_RDU BIT(1) /* Rx Descriptor Unavailable */ +#define IMR0_ROK BIT(0) /* Receive DMA OK */ #define REG_HISR0 0x00b4 #define REG_HIMR1 0x00b8 +#define IMR1_BCNDMAINT7 BIT(27) /* Beacon DMA Interrupt 7 */ +#define IMR1_BCNDMAINT6 BIT(26) /* Beacon DMA Interrupt 6 */ +#define IMR1_BCNDMAINT5 BIT(25) /* Beacon DMA Interrupt 5 */ +#define IMR1_BCNDMAINT4 BIT(24) /* Beacon DMA Interrupt 4 */ +#define IMR1_BCNDMAINT3 BIT(23) /* Beacon DMA Interrupt 3 */ +#define IMR1_BCNDMAINT2 BIT(22) /* Beacon DMA Interrupt 2 */ +#define IMR1_BCNDMAINT1 BIT(21) /* Beacon DMA Interrupt 1 */ +#define IMR1_BCNDERR7 BIT(20) /* Beacon Queue DMA Err Int 7 */ +#define IMR1_BCNDERR6 BIT(19) /* Beacon Queue DMA Err Int 6 */ +#define IMR1_BCNDERR5 BIT(18) /* Beacon Queue DMA Err Int 5 */ +#define IMR1_BCNDERR4 BIT(17) /* Beacon Queue DMA Err Int 4 */ +#define IMR1_BCNDERR3 BIT(16) /* Beacon Queue DMA Err Int 3 */ +#define IMR1_BCNDERR2 BIT(15) /* Beacon Queue DMA Err Int 2 */ +#define IMR1_BCNDERR1 BIT(14) /* Beacon Queue DMA Err Int 1 */ +#define IMR1_ATIMEND_E BIT(13) /* ATIM Window End Extension + for Win7 */ +#define IMR1_TXERR BIT(11) /* Tx Error Flag Int Status, + write 1 to clear */ +#define IMR1_RXERR BIT(10) /* Rx Error Flag Int Status, + write 1 to clear */ +#define IMR1_TXFOVW BIT(9) /* Transmit FIFO Overflow */ +#define IMR1_RXFOVW BIT(8) /* Receive FIFO Overflow */ #define REG_HISR1 0x00bc /* Host suspend counter on FPGA platform */ @@ -620,6 +676,7 @@ #define REG_SCH_TXCMD 0x05d0 /* define REG_FW_TSF_SYNC_CNT 0x04a0 */ +#define REG_SCH_TX_CMD 0x05f8 #define REG_FW_RESET_TSF_CNT_1 0x05fc #define REG_FW_RESET_TSF_CNT_0 0x05fd #define REG_FW_BCN_DIS_CNT 0x05fe @@ -780,6 +837,10 @@ #define FPGA_RF_MODE_OFDM BIT(25) #define REG_FPGA0_TX_INFO 0x0804 +#define FPGA0_TX_INFO_OFDM_PATH_A BIT(0) +#define FPGA0_TX_INFO_OFDM_PATH_B BIT(1) +#define FPGA0_TX_INFO_OFDM_PATH_C BIT(2) +#define FPGA0_TX_INFO_OFDM_PATH_D BIT(3) #define REG_FPGA0_PSD_FUNC 0x0808 #define REG_FPGA0_TX_GAIN 0x080c #define REG_FPGA0_RF_TIMING1 0x0810 diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c index 41f77f8a309e..f95760c13c56 100644 --- a/drivers/net/wireless/realtek/rtlwifi/core.c +++ b/drivers/net/wireless/realtek/rtlwifi/core.c @@ -765,7 +765,8 @@ static int rtl_op_config(struct ieee80211_hw *hw, u32 changed) mac->bw_40 = false; mac->bw_80 = false; RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + channel_type); break; } } @@ -1135,7 +1136,7 @@ static void rtl_op_bss_info_changed(struct ieee80211_hw *hw, mac->mode = WIRELESS_MODE_AC_24G; } - if (vif->type == NL80211_IFTYPE_STATION && sta) + if (vif->type == NL80211_IFTYPE_STATION) rtlpriv->cfg->ops->update_rate_tbl(hw, sta, 0); rcu_read_unlock(); diff --git a/drivers/net/wireless/realtek/rtlwifi/pci.c b/drivers/net/wireless/realtek/rtlwifi/pci.c index d12586d4f845..0dfa9eac3926 100644 --- a/drivers/net/wireless/realtek/rtlwifi/pci.c +++ b/drivers/net/wireless/realtek/rtlwifi/pci.c @@ -179,7 +179,8 @@ static void _rtl_pci_update_default_setting(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlpci->const_support_pciaspm); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/ps.c b/drivers/net/wireless/realtek/rtlwifi/ps.c index 9a64f9b703e5..18d979affc18 100644 --- a/drivers/net/wireless/realtek/rtlwifi/ps.c +++ b/drivers/net/wireless/realtek/rtlwifi/ps.c @@ -151,7 +151,7 @@ static bool rtl_ps_set_rf_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", state_toset); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/regd.c b/drivers/net/wireless/realtek/rtlwifi/regd.c index 3524441fd516..6ee6bf8e7eaf 100644 --- a/drivers/net/wireless/realtek/rtlwifi/regd.c +++ b/drivers/net/wireless/realtek/rtlwifi/regd.c @@ -345,9 +345,9 @@ static const struct ieee80211_regdomain *_rtl_regdomain_select( return &rtl_regdom_no_midband; case COUNTRY_CODE_IC: return &rtl_regdom_11; - case COUNTRY_CODE_ETSI: case COUNTRY_CODE_TELEC_NETGEAR: return &rtl_regdom_60_64; + case COUNTRY_CODE_ETSI: case COUNTRY_CODE_SPAIN: case COUNTRY_CODE_FRANCE: case COUNTRY_CODE_ISRAEL: @@ -406,6 +406,8 @@ static u8 channel_plan_to_country_code(u8 channelplan) return COUNTRY_CODE_WORLD_WIDE_13; case 0x22: return COUNTRY_CODE_IC; + case 0x25: + return COUNTRY_CODE_ETSI; case 0x32: return COUNTRY_CODE_TELEC_NETGEAR; case 0x41: diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c index 629125658b87..5360d5332359 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/fw.c @@ -334,7 +334,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl88e_check_fw_read_last_h2c(hw, boxnum); @@ -405,7 +405,7 @@ static void _rtl88e_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c index 4ab6201daf1a..37d6efc3d240 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/hw.c @@ -355,9 +355,11 @@ void rtl88ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -571,7 +573,8 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -735,7 +738,7 @@ void rtl88ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2352,7 +2355,7 @@ void rtl88ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c index b504bd092fc4..f05c2c674165 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/led.c @@ -62,7 +62,7 @@ void rtl88ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl88ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c index 7498a1218cba..fffaa92eda81 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/phy.c @@ -1346,7 +1346,8 @@ static bool _rtl88e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2128,7 +2129,7 @@ bool rtl88e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2166,7 +2167,8 @@ static void rtl88e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2319,7 +2321,7 @@ static bool _rtl88ee_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c index 47e32cb0ec1a..e7b11b40e68d 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c @@ -280,7 +280,7 @@ static struct rtl_mod_params rtl88ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl88ee_hal_cfg = { +static const struct rtl_hal_cfg rtl88ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl88e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c index 43fcb25c885f..7d152466152b 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/fw_common.c @@ -352,7 +352,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -456,7 +456,7 @@ static void _rtl92c_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c index 60ab2ec4f4ef..27e3d5f9ca34 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192c/phy_common.c @@ -910,7 +910,8 @@ bool _rtl92c_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1567,7 +1568,7 @@ bool rtl92c_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1605,7 +1606,8 @@ void rtl92c_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c index 244607951e28..a47be73a0980 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/hw.c @@ -141,9 +141,11 @@ void rtl92ce_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -367,7 +369,8 @@ void rtl92ce_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -2154,7 +2157,7 @@ void rtl92ce_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c index 8283e9b27639..24e483ba3fa4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/led.c @@ -62,7 +62,7 @@ void rtl92ce_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -97,7 +97,7 @@ void rtl92ce_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c index 1ee5a6ae9960..46d0d945f283 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/phy.c @@ -300,12 +300,9 @@ bool rtl92c_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -554,7 +551,7 @@ static bool _rtl92ce_phy_set_rf_power_state(struct ieee80211_hw *hw, } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c index 4780bdc63b2b..87aa209ae325 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ce/sw.c @@ -258,7 +258,7 @@ static struct rtl_mod_params rtl92ce_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ce_hal_cfg = { +static const struct rtl_hal_cfg rtl92ce_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92c_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c index 8789752f8143..ae8f055483fa 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/hw.c @@ -1560,7 +1560,7 @@ void rtl92cu_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -1931,7 +1931,7 @@ void rtl92cu_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c index 75a2deb23af1..8514ab652520 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/led.c @@ -62,7 +62,7 @@ void rtl92cu_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -95,7 +95,7 @@ void rtl92cu_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c index c972fa50926d..4b2976465905 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192cu/phy.c @@ -277,12 +277,9 @@ bool rtl92cu_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; default: break; @@ -517,7 +514,7 @@ static bool _rtl92cu_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c index 62ef8209718f..8de29cc3ced0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/fw.c @@ -435,7 +435,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", boxnum); break; } isfw_read = _rtl92d_check_fw_read_last_h2c(hw, boxnum); @@ -512,7 +512,7 @@ static void _rtl92d_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", cmd_len); break; } bwrite_success = true; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c index b0f632462335..d91f8bbfe7a0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/hw.c @@ -164,9 +164,11 @@ void rtl92de_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) case HW_VAR_INT_AC: *((bool *)(val)) = rtlpriv->dm.disable_tx_int; break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -361,7 +363,8 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -502,7 +505,7 @@ void rtl92de_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -1757,7 +1760,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) return; if (rtl_get_hwinfo(hw, rtlpriv, HWSET_MAX_SIZE, hwinfo, params)) - return; + goto exit; _rtl92de_efuse_update_chip_version(hw); _rtl92de_read_macphymode_and_bandtype(hw, hwinfo); @@ -1790,6 +1793,7 @@ static void _rtl92de_read_adapter_info(struct ieee80211_hw *hw) break; } rtlefuse->txpwr_fromeprom = true; +exit: kfree(hwinfo); } @@ -2170,7 +2174,7 @@ void rtl92de_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c index 76a57ae4af3e..811ba57eb9bb 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/led.c @@ -71,7 +71,7 @@ void rtl92de_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -106,7 +106,7 @@ void rtl92de_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c index d334d2a5ea63..2a1edfd21b96 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/phy.c @@ -588,7 +588,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -604,7 +604,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_Array_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_Array_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_array_table[i], agctab_array_table[i + 1]); } @@ -620,7 +620,7 @@ static bool _rtl92d_phy_config_bb_with_headerfile(struct ieee80211_hw *hw, * setting. */ udelay(1); RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE, - "The Rtl819XAGCTAB_5GArray_Table[0] is %ul Rtl819XPHY_REGArray[1] is %ul\n", + "The Rtl819XAGCTAB_5GArray_Table[0] is %u Rtl819XPHY_REGArray[1] is %u\n", agctab_5garray_table[i], agctab_5garray_table[i + 1]); } @@ -836,12 +836,9 @@ bool rtl92d_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2850,7 +2847,8 @@ static bool _rtl92d_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } break; @@ -2963,7 +2961,8 @@ static void rtl92d_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2994,7 +2993,7 @@ bool rtl92d_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3182,7 +3181,7 @@ bool rtl92d_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c index c6e09a19de1a..0538a4d09568 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192de/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92de_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92de_hal_cfg = { +static const struct rtl_hal_cfg rtl92de_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8192de", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c index 0708eedd9671..b3f6a9ed15d4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/fw.c @@ -344,7 +344,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -433,7 +433,7 @@ static void _rtl92ee_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c index b07af8d15273..ebf663e1a81a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/hw.c @@ -338,9 +338,11 @@ void rtl92ee_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -566,7 +568,8 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -685,7 +688,7 @@ void rtl92ee_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -2463,7 +2466,7 @@ void rtl92ee_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_DMESG, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c index 8388e371c8e2..47da05dd3076 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/led.c @@ -61,7 +61,7 @@ void rtl92ee_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -91,7 +91,7 @@ void rtl92ee_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c index beafc9a10ad8..5ad7e753c357 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c @@ -1927,7 +1927,8 @@ static bool _rtl92ee_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -3001,7 +3002,7 @@ bool rtl92ee_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -3041,7 +3042,8 @@ static void rtl92ee_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -3187,7 +3189,7 @@ static bool _rtl92ee_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c index c31c6bfb536d..ac299cbe59b0 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c @@ -262,7 +262,7 @@ static struct rtl_mod_params rtl92ee_mod_params = { .debug = DBG_EMERG, }; -static struct rtl_hal_cfg rtl92ee_hal_cfg = { +static const struct rtl_hal_cfg rtl92ee_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl92ee_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c index ddfa0aee5bf8..52e4430edb54 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/hw.c @@ -77,9 +77,11 @@ void rtl92se_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((bool *)(val)) = rtlpriv->dm.current_mrc_switch; break; } + case HAL_DEF_WOWLAN: + break; default: { RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } } @@ -297,7 +299,8 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -433,7 +436,7 @@ void rtl92se_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", variable); break; } @@ -2465,7 +2468,7 @@ void rtl92se_set_key(struct ieee80211_hw *hw, u32 key_index, u8 *p_macaddr, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c index 44949b5cbb87..9849cb988186 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/led.c @@ -68,7 +68,7 @@ void rtl92se_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -104,7 +104,7 @@ void rtl92se_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c index 881821f4e243..4bb75581ab38 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/phy.c @@ -442,7 +442,8 @@ static bool _rtl92s_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -648,7 +649,7 @@ bool rtl92s_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not processed\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c index 31baca41ac2f..5e8e02d5de8a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192se/sw.c @@ -306,7 +306,7 @@ static struct rtl_mod_params rtl92se_mod_params = { /* Because memory R/W bursting will cause system hang/crash * for 92se, so we don't read back after every write action */ -static struct rtl_hal_cfg rtl92se_hal_cfg = { +static const struct rtl_hal_cfg rtl92se_hal_cfg = { .bar_id = 1, .write_readback = false, .name = "rtl92s_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c index b7c0d38ee5b5..1186755e55b8 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/fw.c @@ -124,7 +124,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -230,7 +230,7 @@ static void _rtl8723e_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c index b88c7ee72dbf..f8be0bd7e326 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c @@ -141,9 +141,11 @@ void rtl8723e_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -366,7 +368,8 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -546,7 +549,7 @@ void rtl8723e_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", variable); break; } } @@ -1654,7 +1657,7 @@ static void _rtl8723e_read_adapter_info(struct ieee80211_hw *hw, rtlefuse->autoload_failflag, hwinfo); if (rtlhal->oem_id != RT_CID_DEFAULT) - return; + goto exit; switch (rtlefuse->eeprom_oemid) { case EEPROM_CID_DEFAULT: @@ -2225,7 +2228,7 @@ void rtl8723e_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c index 13173351cbfd..c7be9342136c 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/led.c @@ -63,7 +63,7 @@ void rtl8723e_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -105,7 +105,7 @@ void rtl8723e_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c index 601b78efedfb..17b58cb32d55 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/phy.c @@ -1023,7 +1023,8 @@ static bool _rtl8723e_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -1499,7 +1500,7 @@ bool rtl8723e_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -1536,7 +1537,8 @@ static void rtl8723e_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -1682,7 +1684,7 @@ static bool _rtl8723e_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c index ff49a8c0ff61..89c828ad89f4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723e_mod_params = { .disable_watchdog = false, }; -static struct rtl_hal_cfg rtl8723e_hal_cfg = { +static const struct rtl_hal_cfg rtl8723e_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723e_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c index d5da0f3c1217..8c5c27ce8e05 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/fw.c @@ -122,7 +122,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -195,7 +195,7 @@ static void _rtl8723be_fill_h2c_command(struct ieee80211_hw *hw, u8 element_id, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c index 82e4476cab23..aba60c3145c5 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/hw.c @@ -348,9 +348,11 @@ void rtl8723be_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) *((u64 *)(val)) = tsf; } break; + case HAL_DEF_WOWLAN: + break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -607,7 +609,8 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -723,8 +726,7 @@ void rtl8723be_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", - variable); + "switch case %#x not processed\n", variable); break; } } @@ -2565,7 +2567,7 @@ void rtl8723be_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c index 4196efb723a2..497913eb3b37 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/led.c @@ -58,7 +58,7 @@ void rtl8723be_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -100,7 +100,7 @@ void rtl8723be_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c index 285818df149b..3cc2232f25ca 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/phy.c @@ -837,7 +837,7 @@ bool rtl8723be_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -1507,7 +1507,8 @@ static bool _rtl8723be_phy_sw_chnl_step_by_step(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + currentcmd->cmdid); break; } @@ -2515,7 +2516,7 @@ bool rtl8723be_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -2553,7 +2554,8 @@ static void rtl8723be_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -2705,7 +2707,7 @@ static bool _rtl8723be_phy_set_rf_power_state(struct ieee80211_hw *hw, default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c index 2101793438ed..20b53f035483 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c @@ -276,7 +276,7 @@ static struct rtl_mod_params rtl8723be_mod_params = { .ant_sel = 0, }; -static struct rtl_hal_cfg rtl8723be_hal_cfg = { +static const struct rtl_hal_cfg rtl8723be_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8723be_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c index a4fc70e8c9c0..b665446351a4 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/fw.c @@ -392,7 +392,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", boxnum); break; } @@ -481,7 +481,7 @@ static void _rtl8821ae_fill_h2c_command(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", cmd_len); break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c index 0cddf1ad0fff..1281ebe0c30a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/hw.c @@ -480,7 +480,7 @@ void rtl8821ae_get_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -671,7 +671,8 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", + e_aci); break; } } @@ -800,7 +801,7 @@ void rtl8821ae_set_hw_reg(struct ieee80211_hw *hw, u8 variable, u8 *val) break; } default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process %x\n", variable); + "switch case %#x not processed\n", variable); break; } } @@ -3934,7 +3935,7 @@ void rtl8821ae_set_key(struct ieee80211_hw *hw, u32 key_index, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", enc_algo); enc_algo = CAM_TKIP; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c index ba1946a0280e..fcb3b28c6b8f 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/led.c @@ -60,7 +60,7 @@ void rtl8821ae_sw_led_on(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = true; @@ -133,7 +133,7 @@ void rtl8821ae_sw_led_off(struct ieee80211_hw *hw, struct rtl_led *pled) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_LOUD, - "switch case not process\n"); + "switch case %#x not processed\n", pled->ledpin); break; } pled->ledon = false; diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c index a71bfe38e7e1..5dad402171c2 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/phy.c @@ -2063,12 +2063,9 @@ bool rtl8812ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, } break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -2133,16 +2130,10 @@ bool rtl8821ae_phy_config_rf_with_headerfile(struct ieee80211_hw *hw, break; case RF90_PATH_B: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_C: - RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); - break; case RF90_PATH_D: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpath); break; } return true; @@ -4670,7 +4661,7 @@ bool rtl8821ae_phy_set_io_cmd(struct ieee80211_hw *hw, enum io_type iotype) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", iotype); break; } } while (false); @@ -4714,7 +4705,8 @@ static void rtl8821ae_phy_set_io(struct ieee80211_hw *hw) break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", + rtlphy->current_io_type); break; } rtlphy->set_io_inprogress = false; @@ -4820,7 +4812,7 @@ static bool _rtl8821ae_phy_set_rf_power_state(struct ieee80211_hw *hw, break; default: RT_TRACE(rtlpriv, COMP_ERR, DBG_EMERG, - "switch case not process\n"); + "switch case %#x not processed\n", rfpwr_state); bresult = false; break; } diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c index 4159f9b14db6..22f687b1f133 100644 --- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c +++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c @@ -316,7 +316,7 @@ static struct rtl_mod_params rtl8821ae_mod_params = { .disable_watchdog = 0, }; -static struct rtl_hal_cfg rtl8821ae_hal_cfg = { +static const struct rtl_hal_cfg rtl8821ae_hal_cfg = { .bar_id = 2, .write_readback = true, .name = "rtl8821ae_pci", diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c index 41617b7b0822..32aa5c1d070a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/usb.c +++ b/drivers/net/wireless/realtek/rtlwifi/usb.c @@ -739,11 +739,8 @@ static int _rtl_usb_receive(struct ieee80211_hw *hw) for (i = 0; i < rtlusb->rx_urb_num; i++) { err = -ENOMEM; urb = usb_alloc_urb(0, GFP_KERNEL); - if (!urb) { - RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG, - "Failed to alloc URB!!\n"); + if (!urb) goto err_out; - } err = _rtl_prep_rx_urb(hw, rtlusb, urb, GFP_KERNEL); if (err < 0) { @@ -907,15 +904,12 @@ static void _rtl_tx_complete(struct urb *urb) static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw, struct sk_buff *skb, u32 ep_num) { - struct rtl_priv *rtlpriv = rtl_priv(hw); struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw)); struct urb *_urb; WARN_ON(NULL == skb); _urb = usb_alloc_urb(0, GFP_ATOMIC); if (!_urb) { - RT_TRACE(rtlpriv, COMP_USB, DBG_EMERG, - "Can't allocate URB for bulk out!\n"); kfree_skb(skb); return NULL; } diff --git a/drivers/net/wireless/realtek/rtlwifi/wifi.h b/drivers/net/wireless/realtek/rtlwifi/wifi.h index c5086c2229aa..595f7d5d091a 100644 --- a/drivers/net/wireless/realtek/rtlwifi/wifi.h +++ b/drivers/net/wireless/realtek/rtlwifi/wifi.h @@ -394,110 +394,110 @@ enum io_type { }; enum hw_variables { - HW_VAR_ETHER_ADDR, - HW_VAR_MULTICAST_REG, - HW_VAR_BASIC_RATE, - HW_VAR_BSSID, - HW_VAR_MEDIA_STATUS, - HW_VAR_SECURITY_CONF, - HW_VAR_BEACON_INTERVAL, - HW_VAR_ATIM_WINDOW, - HW_VAR_LISTEN_INTERVAL, - HW_VAR_CS_COUNTER, - HW_VAR_DEFAULTKEY0, - HW_VAR_DEFAULTKEY1, - HW_VAR_DEFAULTKEY2, - HW_VAR_DEFAULTKEY3, - HW_VAR_SIFS, - HW_VAR_R2T_SIFS, - HW_VAR_DIFS, - HW_VAR_EIFS, - HW_VAR_SLOT_TIME, - HW_VAR_ACK_PREAMBLE, - HW_VAR_CW_CONFIG, - HW_VAR_CW_VALUES, - HW_VAR_RATE_FALLBACK_CONTROL, - HW_VAR_CONTENTION_WINDOW, - HW_VAR_RETRY_COUNT, - HW_VAR_TR_SWITCH, - HW_VAR_COMMAND, - HW_VAR_WPA_CONFIG, - HW_VAR_AMPDU_MIN_SPACE, - HW_VAR_SHORTGI_DENSITY, - HW_VAR_AMPDU_FACTOR, - HW_VAR_MCS_RATE_AVAILABLE, - HW_VAR_AC_PARAM, - HW_VAR_ACM_CTRL, - HW_VAR_DIS_Req_Qsize, - HW_VAR_CCX_CHNL_LOAD, - HW_VAR_CCX_NOISE_HISTOGRAM, - HW_VAR_CCX_CLM_NHM, - HW_VAR_TxOPLimit, - HW_VAR_TURBO_MODE, - HW_VAR_RF_STATE, - HW_VAR_RF_OFF_BY_HW, - HW_VAR_BUS_SPEED, - HW_VAR_SET_DEV_POWER, + HW_VAR_ETHER_ADDR = 0x0, + HW_VAR_MULTICAST_REG = 0x1, + HW_VAR_BASIC_RATE = 0x2, + HW_VAR_BSSID = 0x3, + HW_VAR_MEDIA_STATUS= 0x4, + HW_VAR_SECURITY_CONF= 0x5, + HW_VAR_BEACON_INTERVAL = 0x6, + HW_VAR_ATIM_WINDOW = 0x7, + HW_VAR_LISTEN_INTERVAL = 0x8, + HW_VAR_CS_COUNTER = 0x9, + HW_VAR_DEFAULTKEY0 = 0xa, + HW_VAR_DEFAULTKEY1 = 0xb, + HW_VAR_DEFAULTKEY2 = 0xc, + HW_VAR_DEFAULTKEY3 = 0xd, + HW_VAR_SIFS = 0xe, + HW_VAR_R2T_SIFS = 0xf, + HW_VAR_DIFS = 0x10, + HW_VAR_EIFS = 0x11, + HW_VAR_SLOT_TIME = 0x12, + HW_VAR_ACK_PREAMBLE = 0x13, + HW_VAR_CW_CONFIG = 0x14, + HW_VAR_CW_VALUES = 0x15, + HW_VAR_RATE_FALLBACK_CONTROL= 0x16, + HW_VAR_CONTENTION_WINDOW = 0x17, + HW_VAR_RETRY_COUNT = 0x18, + HW_VAR_TR_SWITCH = 0x19, + HW_VAR_COMMAND = 0x1a, + HW_VAR_WPA_CONFIG = 0x1b, + HW_VAR_AMPDU_MIN_SPACE = 0x1c, + HW_VAR_SHORTGI_DENSITY = 0x1d, + HW_VAR_AMPDU_FACTOR = 0x1e, + HW_VAR_MCS_RATE_AVAILABLE = 0x1f, + HW_VAR_AC_PARAM = 0x20, + HW_VAR_ACM_CTRL = 0x21, + HW_VAR_DIS_Req_Qsize = 0x22, + HW_VAR_CCX_CHNL_LOAD = 0x23, + HW_VAR_CCX_NOISE_HISTOGRAM = 0x24, + HW_VAR_CCX_CLM_NHM = 0x25, + HW_VAR_TxOPLimit = 0x26, + HW_VAR_TURBO_MODE = 0x27, + HW_VAR_RF_STATE = 0x28, + HW_VAR_RF_OFF_BY_HW = 0x29, + HW_VAR_BUS_SPEED = 0x2a, + HW_VAR_SET_DEV_POWER = 0x2b, - HW_VAR_RCR, - HW_VAR_RATR_0, - HW_VAR_RRSR, - HW_VAR_CPU_RST, - HW_VAR_CHECK_BSSID, - HW_VAR_LBK_MODE, - HW_VAR_AES_11N_FIX, - HW_VAR_USB_RX_AGGR, - HW_VAR_USER_CONTROL_TURBO_MODE, - HW_VAR_RETRY_LIMIT, - HW_VAR_INIT_TX_RATE, - HW_VAR_TX_RATE_REG, - HW_VAR_EFUSE_USAGE, - HW_VAR_EFUSE_BYTES, - HW_VAR_AUTOLOAD_STATUS, - HW_VAR_RF_2R_DISABLE, - HW_VAR_SET_RPWM, - HW_VAR_H2C_FW_PWRMODE, - HW_VAR_H2C_FW_JOINBSSRPT, - HW_VAR_H2C_FW_MEDIASTATUSRPT, - HW_VAR_H2C_FW_P2P_PS_OFFLOAD, - HW_VAR_FW_PSMODE_STATUS, - HW_VAR_INIT_RTS_RATE, - HW_VAR_RESUME_CLK_ON, - HW_VAR_FW_LPS_ACTION, - HW_VAR_1X1_RECV_COMBINE, - HW_VAR_STOP_SEND_BEACON, - HW_VAR_TSF_TIMER, - HW_VAR_IO_CMD, + HW_VAR_RCR = 0x2c, + HW_VAR_RATR_0 = 0x2d, + HW_VAR_RRSR = 0x2e, + HW_VAR_CPU_RST = 0x2f, + HW_VAR_CHECK_BSSID = 0x30, + HW_VAR_LBK_MODE = 0x31, + HW_VAR_AES_11N_FIX = 0x32, + HW_VAR_USB_RX_AGGR = 0x33, + HW_VAR_USER_CONTROL_TURBO_MODE = 0x34, + HW_VAR_RETRY_LIMIT = 0x35, + HW_VAR_INIT_TX_RATE = 0x36, + HW_VAR_TX_RATE_REG = 0x37, + HW_VAR_EFUSE_USAGE = 0x38, + HW_VAR_EFUSE_BYTES = 0x39, + HW_VAR_AUTOLOAD_STATUS = 0x3a, + HW_VAR_RF_2R_DISABLE = 0x3b, + HW_VAR_SET_RPWM = 0x3c, + HW_VAR_H2C_FW_PWRMODE = 0x3d, + HW_VAR_H2C_FW_JOINBSSRPT = 0x3e, + HW_VAR_H2C_FW_MEDIASTATUSRPT = 0x3f, + HW_VAR_H2C_FW_P2P_PS_OFFLOAD = 0x40, + HW_VAR_FW_PSMODE_STATUS = 0x41, + HW_VAR_INIT_RTS_RATE = 0x42, + HW_VAR_RESUME_CLK_ON = 0x43, + HW_VAR_FW_LPS_ACTION = 0x44, + HW_VAR_1X1_RECV_COMBINE = 0x45, + HW_VAR_STOP_SEND_BEACON = 0x46, + HW_VAR_TSF_TIMER = 0x47, + HW_VAR_IO_CMD = 0x48, - HW_VAR_RF_RECOVERY, - HW_VAR_H2C_FW_UPDATE_GTK, - HW_VAR_WF_MASK, - HW_VAR_WF_CRC, - HW_VAR_WF_IS_MAC_ADDR, - HW_VAR_H2C_FW_OFFLOAD, - HW_VAR_RESET_WFCRC, + HW_VAR_RF_RECOVERY = 0x49, + HW_VAR_H2C_FW_UPDATE_GTK = 0x4a, + HW_VAR_WF_MASK = 0x4b, + HW_VAR_WF_CRC = 0x4c, + HW_VAR_WF_IS_MAC_ADDR = 0x4d, + HW_VAR_H2C_FW_OFFLOAD = 0x4e, + HW_VAR_RESET_WFCRC = 0x4f, - HW_VAR_HANDLE_FW_C2H, - HW_VAR_DL_FW_RSVD_PAGE, - HW_VAR_AID, - HW_VAR_HW_SEQ_ENABLE, - HW_VAR_CORRECT_TSF, - HW_VAR_BCN_VALID, - HW_VAR_FWLPS_RF_ON, - HW_VAR_DUAL_TSF_RST, - HW_VAR_SWITCH_EPHY_WoWLAN, - HW_VAR_INT_MIGRATION, - HW_VAR_INT_AC, - HW_VAR_RF_TIMING, + HW_VAR_HANDLE_FW_C2H = 0x50, + HW_VAR_DL_FW_RSVD_PAGE = 0x51, + HW_VAR_AID = 0x52, + HW_VAR_HW_SEQ_ENABLE = 0x53, + HW_VAR_CORRECT_TSF = 0x54, + HW_VAR_BCN_VALID = 0x55, + HW_VAR_FWLPS_RF_ON = 0x56, + HW_VAR_DUAL_TSF_RST = 0x57, + HW_VAR_SWITCH_EPHY_WoWLAN = 0x58, + HW_VAR_INT_MIGRATION = 0x59, + HW_VAR_INT_AC = 0x5a, + HW_VAR_RF_TIMING = 0x5b, - HAL_DEF_WOWLAN, - HW_VAR_MRC, - HW_VAR_KEEP_ALIVE, - HW_VAR_NAV_UPPER, + HAL_DEF_WOWLAN = 0x5c, + HW_VAR_MRC = 0x5d, + HW_VAR_KEEP_ALIVE = 0x5e, + HW_VAR_NAV_UPPER = 0x5f, - HW_VAR_MGT_FILTER, - HW_VAR_CTRL_FILTER, - HW_VAR_DATA_FILTER, + HW_VAR_MGT_FILTER = 0x60, + HW_VAR_CTRL_FILTER = 0x61, + HW_VAR_DATA_FILTER = 0x62, }; enum rt_media_status { diff --git a/drivers/net/wireless/ti/wl18xx/acx.c b/drivers/net/wireless/ti/wl18xx/acx.c index 4be0409308cb..b5525a38264b 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.c +++ b/drivers/net/wireless/ti/wl18xx/acx.c @@ -309,3 +309,32 @@ out: kfree(acx); return ret; } + +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl) +{ + struct acx_time_sync_cfg *acx; + int ret; + + wl1271_debug(DEBUG_ACX, "acx time sync cfg: mode %d, addr: %pM", + wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC], + wl->zone_master_mac_addr); + + acx = kzalloc(sizeof(*acx), GFP_KERNEL); + if (!acx) { + ret = -ENOMEM; + goto out; + } + + acx->sync_mode = wl->conf.sg.params[WL18XX_CONF_SG_TIME_SYNC]; + memcpy(acx->zone_mac_addr, wl->zone_master_mac_addr, ETH_ALEN); + + ret = wl1271_cmd_configure(wl, ACX_TIME_SYNC_CFG, + acx, sizeof(*acx)); + if (ret < 0) { + wl1271_warning("acx time sync cfg failed: %d", ret); + goto out; + } +out: + kfree(acx); + return ret; +} diff --git a/drivers/net/wireless/ti/wl18xx/acx.h b/drivers/net/wireless/ti/wl18xx/acx.h index 342a2993ef98..2edbbbfd8421 100644 --- a/drivers/net/wireless/ti/wl18xx/acx.h +++ b/drivers/net/wireless/ti/wl18xx/acx.h @@ -37,6 +37,7 @@ enum { ACX_RX_BA_FILTER = 0x0058, ACX_AP_SLEEP_CFG = 0x0059, ACX_DYNAMIC_TRACES_CFG = 0x005A, + ACX_TIME_SYNC_CFG = 0x005B, }; /* numbers of bits the length field takes (add 1 for the actual number) */ @@ -388,6 +389,17 @@ struct acx_dynamic_fw_traces_cfg { __le32 dynamic_fw_traces; } __packed; +/* + * ACX_TIME_SYNC_CFG + * configure the time sync parameters + */ +struct acx_time_sync_cfg { + struct acx_header header; + u8 sync_mode; + u8 zone_mac_addr[ETH_ALEN]; + u8 padding[1]; +} __packed; + int wl18xx_acx_host_if_cfg_bitmap(struct wl1271 *wl, u32 host_cfg_bitmap, u32 sdio_blk_size, u32 extra_mem_blks, u32 len_field_size); @@ -402,5 +414,6 @@ int wl18xx_acx_interrupt_notify_config(struct wl1271 *wl, bool action); int wl18xx_acx_rx_ba_filter(struct wl1271 *wl, bool action); int wl18xx_acx_ap_sleep(struct wl1271 *wl); int wl18xx_acx_dynamic_fw_traces(struct wl1271 *wl); +int wl18xx_acx_time_sync_cfg(struct wl1271 *wl); #endif /* __WL18XX_ACX_H__ */ diff --git a/drivers/net/wireless/ti/wl18xx/event.c b/drivers/net/wireless/ti/wl18xx/event.c index 2c5df43b8ed9..b36ce185c9f2 100644 --- a/drivers/net/wireless/ti/wl18xx/event.c +++ b/drivers/net/wireless/ti/wl18xx/event.c @@ -22,6 +22,7 @@ #include #include "event.h" #include "scan.h" +#include "conf.h" #include "../wlcore/cmd.h" #include "../wlcore/debug.h" #include "../wlcore/vendor_cmd.h" diff --git a/drivers/net/wireless/ti/wl18xx/main.c b/drivers/net/wireless/ti/wl18xx/main.c index 00a04dfc03d1..06d6943b257c 100644 --- a/drivers/net/wireless/ti/wl18xx/main.c +++ b/drivers/net/wireless/ti/wl18xx/main.c @@ -1397,25 +1397,24 @@ out: return ret; } -#define WL18XX_CONF_FILE_NAME "ti-connectivity/wl18xx-conf.bin" - static int wl18xx_load_conf_file(struct device *dev, struct wlcore_conf *conf, - struct wl18xx_priv_conf *priv_conf) + struct wl18xx_priv_conf *priv_conf, + const char *file) { struct wlcore_conf_file *conf_file; const struct firmware *fw; int ret; - ret = request_firmware(&fw, WL18XX_CONF_FILE_NAME, dev); + ret = request_firmware(&fw, file, dev); if (ret < 0) { wl1271_error("could not get configuration binary %s: %d", - WL18XX_CONF_FILE_NAME, ret); + file, ret); return ret; } if (fw->size != WL18XX_CONF_SIZE) { - wl1271_error("configuration binary file size is wrong, expected %zu got %zu", - WL18XX_CONF_SIZE, fw->size); + wl1271_error("%s configuration binary size is wrong, expected %zu got %zu", + file, WL18XX_CONF_SIZE, fw->size); ret = -EINVAL; goto out_release; } @@ -1448,9 +1447,12 @@ out_release: static int wl18xx_conf_init(struct wl1271 *wl, struct device *dev) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdata = dev_get_platdata(&pdev->dev); struct wl18xx_priv *priv = wl->priv; - if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf) < 0) { + if (wl18xx_load_conf_file(dev, &wl->conf, &priv->conf, + pdata->family->cfg_name) < 0) { wl1271_warning("falling back to default config"); /* apply driver default configuration */ @@ -2141,4 +2143,3 @@ MODULE_PARM_DESC(num_rx_desc_param, MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Luciano Coelho "); MODULE_FIRMWARE(WL18XX_FW_NAME); -MODULE_FIRMWARE(WL18XX_CONF_FILE_NAME); diff --git a/drivers/net/wireless/ti/wlcore/boot.c b/drivers/net/wireless/ti/wlcore/boot.c index f75d30444117..f00509ea8aca 100644 --- a/drivers/net/wireless/ti/wlcore/boot.c +++ b/drivers/net/wireless/ti/wlcore/boot.c @@ -282,6 +282,9 @@ EXPORT_SYMBOL_GPL(wlcore_boot_upload_firmware); int wlcore_boot_upload_nvs(struct wl1271 *wl) { + struct platform_device *pdev = wl->pdev; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name = "unknown"; size_t nvs_len, burst_len; int i; u32 dest_addr, val; @@ -293,6 +296,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) return -ENODEV; } + if (pdev_data && pdev_data->family) + nvs_name = pdev_data->family->nvs_name; + if (wl->quirks & WLCORE_QUIRK_LEGACY_NVS) { struct wl1271_nvs_file *nvs = (struct wl1271_nvs_file *)wl->nvs; @@ -310,8 +316,9 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (wl->nvs_len != sizeof(struct wl1271_nvs_file) && (wl->nvs_len != WL1271_INI_LEGACY_NVS_FILE_SIZE || wl->enable_11a)) { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, sizeof(struct wl1271_nvs_file)); + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, + sizeof(struct wl1271_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; wl->nvs_len = 0; @@ -328,8 +335,8 @@ int wlcore_boot_upload_nvs(struct wl1271 *wl) if (nvs->general_params.dual_mode_select) wl->enable_11a = true; } else { - wl1271_error("nvs size is not as expected: %zu != %zu", - wl->nvs_len, + wl1271_error("%s size is not as expected: %zu != %zu", + nvs_name, wl->nvs_len, sizeof(struct wl128x_nvs_file)); kfree(wl->nvs); wl->nvs = NULL; diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c index 9e1f2d9c9865..471521a0db7b 100644 --- a/drivers/net/wireless/ti/wlcore/main.c +++ b/drivers/net/wireless/ti/wlcore/main.c @@ -4986,7 +4986,6 @@ static int wl12xx_sta_add(struct wl1271 *wl, return ret; wl_sta = (struct wl1271_station *)sta->drv_priv; - wl_sta->wl = wl; hlid = wl_sta->hlid; ret = wl12xx_cmd_add_peer(wl, wlvif, sta, hlid); @@ -6414,9 +6413,12 @@ static void wlcore_nvs_cb(const struct firmware *fw, void *context) goto out; } wl->nvs_len = fw->size; - } else { + } else if (pdev_data->family->nvs_name) { wl1271_debug(DEBUG_BOOT, "Could not get nvs file %s", - WL12XX_NVS_NAME); + pdev_data->family->nvs_name); + wl->nvs = NULL; + wl->nvs_len = 0; + } else { wl->nvs = NULL; wl->nvs_len = 0; } @@ -6511,21 +6513,29 @@ out: int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev) { - int ret; + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); + const char *nvs_name; + int ret = 0; - if (!wl->ops || !wl->ptable) + if (!wl->ops || !wl->ptable || !pdev_data) return -EINVAL; wl->dev = &pdev->dev; wl->pdev = pdev; platform_set_drvdata(pdev, wl); - ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, - WL12XX_NVS_NAME, &pdev->dev, GFP_KERNEL, - wl, wlcore_nvs_cb); - if (ret < 0) { - wl1271_error("request_firmware_nowait failed: %d", ret); - complete_all(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) { + nvs_name = pdev_data->family->nvs_name; + ret = request_firmware_nowait(THIS_MODULE, FW_ACTION_HOTPLUG, + nvs_name, &pdev->dev, GFP_KERNEL, + wl, wlcore_nvs_cb); + if (ret < 0) { + wl1271_error("request_firmware_nowait failed for %s: %d", + nvs_name, ret); + complete_all(&wl->nvs_loading_complete); + } + } else { + wlcore_nvs_cb(NULL, wl); } return ret; @@ -6534,9 +6544,11 @@ EXPORT_SYMBOL_GPL(wlcore_probe); int wlcore_remove(struct platform_device *pdev) { + struct wlcore_platdev_data *pdev_data = dev_get_platdata(&pdev->dev); struct wl1271 *wl = platform_get_drvdata(pdev); - wait_for_completion(&wl->nvs_loading_complete); + if (pdev_data->family && pdev_data->family->nvs_name) + wait_for_completion(&wl->nvs_loading_complete); if (!wl->initialized) return 0; @@ -6573,4 +6585,3 @@ MODULE_PARM_DESC(no_recovery, "Prevent HW recovery. FW will remain stuck."); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Luciano Coelho "); MODULE_AUTHOR("Juuso Oikarinen "); -MODULE_FIRMWARE(WL12XX_NVS_NAME); diff --git a/drivers/net/wireless/ti/wlcore/sdio.c b/drivers/net/wireless/ti/wlcore/sdio.c index 5839acbbc782..a6e94b1a12cb 100644 --- a/drivers/net/wireless/ti/wlcore/sdio.c +++ b/drivers/net/wireless/ti/wlcore/sdio.c @@ -216,17 +216,33 @@ static struct wl1271_if_operations sdio_ops = { }; #ifdef CONFIG_OF + +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", +}; + +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; + +static const struct wilink_family_data wl18xx_data = { + .name = "wl18xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", +}; + static const struct of_device_id wlcore_sdio_of_match_table[] = { - { .compatible = "ti,wl1271" }, - { .compatible = "ti,wl1273" }, - { .compatible = "ti,wl1281" }, - { .compatible = "ti,wl1283" }, - { .compatible = "ti,wl1801" }, - { .compatible = "ti,wl1805" }, - { .compatible = "ti,wl1807" }, - { .compatible = "ti,wl1831" }, - { .compatible = "ti,wl1835" }, - { .compatible = "ti,wl1837" }, + { .compatible = "ti,wl1271", .data = &wl127x_data }, + { .compatible = "ti,wl1273", .data = &wl127x_data }, + { .compatible = "ti,wl1281", .data = &wl128x_data }, + { .compatible = "ti,wl1283", .data = &wl128x_data }, + { .compatible = "ti,wl1801", .data = &wl18xx_data }, + { .compatible = "ti,wl1805", .data = &wl18xx_data }, + { .compatible = "ti,wl1807", .data = &wl18xx_data }, + { .compatible = "ti,wl1831", .data = &wl18xx_data }, + { .compatible = "ti,wl1835", .data = &wl18xx_data }, + { .compatible = "ti,wl1837", .data = &wl18xx_data }, { } }; @@ -234,9 +250,13 @@ static int wlcore_probe_of(struct device *dev, int *irq, struct wlcore_platdev_data *pdev_data) { struct device_node *np = dev->of_node; + const struct of_device_id *of_id; - if (!np || !of_match_node(wlcore_sdio_of_match_table, np)) - return -ENODATA; + of_id = of_match_node(wlcore_sdio_of_match_table, np); + if (!of_id) + return -ENODEV; + + pdev_data->family = of_id->data; *irq = irq_of_parse_and_map(np, 0); if (!*irq) { @@ -263,7 +283,7 @@ static int wlcore_probe_of(struct device *dev, int *irq, static int wl1271_probe(struct sdio_func *func, const struct sdio_device_id *id) { - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct wl12xx_sdio_glue *glue; struct resource res[1]; mmc_pm_flag_t mmcflags; @@ -275,14 +295,15 @@ static int wl1271_probe(struct sdio_func *func, if (func->num != 0x02) return -ENODEV; - memset(&pdev_data, 0x00, sizeof(pdev_data)); - pdev_data.if_ops = &sdio_ops; + pdev_data = devm_kzalloc(&func->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - glue = kzalloc(sizeof(*glue), GFP_KERNEL); - if (!glue) { - dev_err(&func->dev, "can't allocate glue\n"); - goto out; - } + pdev_data->if_ops = &sdio_ops; + + glue = devm_kzalloc(&func->dev, sizeof(*glue), GFP_KERNEL); + if (!glue) + return -ENOMEM; glue->dev = &func->dev; @@ -292,16 +313,16 @@ static int wl1271_probe(struct sdio_func *func, /* Use block mode for transferring over one block size of data */ func->card->quirks |= MMC_QUIRK_BLKSZ_FOR_BYTE_MODE; - ret = wlcore_probe_of(&func->dev, &irq, &pdev_data); + ret = wlcore_probe_of(&func->dev, &irq, pdev_data); if (ret) - goto out_free_glue; + goto out; /* if sdio can keep power while host is suspended, enable wow */ mmcflags = sdio_get_host_pm_caps(func); dev_dbg(glue->dev, "sdio PM caps = 0x%x\n", mmcflags); if (mmcflags & MMC_PM_KEEP_POWER) - pdev_data.pwr_in_suspend = true; + pdev_data->pwr_in_suspend = true; sdio_set_drvdata(func, glue); @@ -323,7 +344,7 @@ static int wl1271_probe(struct sdio_func *func, if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device"); ret = -ENOMEM; - goto out_free_glue; + goto out; } glue->core->dev.parent = &func->dev; @@ -341,8 +362,8 @@ static int wl1271_probe(struct sdio_func *func, goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; @@ -358,9 +379,6 @@ static int wl1271_probe(struct sdio_func *func, out_dev_put: platform_device_put(glue->core); -out_free_glue: - kfree(glue); - out: return ret; } diff --git a/drivers/net/wireless/ti/wlcore/spi.c b/drivers/net/wireless/ti/wlcore/spi.c index 6d24040889b8..f949ad2bd898 100644 --- a/drivers/net/wireless/ti/wlcore/spi.c +++ b/drivers/net/wireless/ti/wlcore/spi.c @@ -79,19 +79,19 @@ #define WSPI_MAX_NUM_OF_CHUNKS \ ((SPI_AGGR_BUFFER_SIZE / WSPI_MAX_CHUNK_SIZE) + 1) - -struct wilink_familiy_data { - char name[8]; +static const struct wilink_family_data wl127x_data = { + .name = "wl127x", + .nvs_name = "ti-connectivity/wl127x-nvs.bin", }; -const struct wilink_familiy_data *wilink_data; +static const struct wilink_family_data wl128x_data = { + .name = "wl128x", + .nvs_name = "ti-connectivity/wl128x-nvs.bin", +}; -static const struct wilink_familiy_data wl18xx_data = { +static const struct wilink_family_data wl18xx_data = { .name = "wl18xx", -}; - -static const struct wilink_familiy_data wl12xx_data = { - .name = "wl12xx", + .cfg_name = "ti-connectivity/wl18xx-conf.bin", }; struct wl12xx_spi_glue { @@ -429,10 +429,10 @@ static struct wl1271_if_operations spi_ops = { }; static const struct of_device_id wlcore_spi_of_match_table[] = { - { .compatible = "ti,wl1271", .data = &wl12xx_data}, - { .compatible = "ti,wl1273", .data = &wl12xx_data}, - { .compatible = "ti,wl1281", .data = &wl12xx_data}, - { .compatible = "ti,wl1283", .data = &wl12xx_data}, + { .compatible = "ti,wl1271", .data = &wl127x_data}, + { .compatible = "ti,wl1273", .data = &wl127x_data}, + { .compatible = "ti,wl1281", .data = &wl128x_data}, + { .compatible = "ti,wl1283", .data = &wl128x_data}, { .compatible = "ti,wl1801", .data = &wl18xx_data}, { .compatible = "ti,wl1805", .data = &wl18xx_data}, { .compatible = "ti,wl1807", .data = &wl18xx_data}, @@ -460,9 +460,9 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, if (!of_id) return -ENODEV; - wilink_data = of_id->data; - dev_info(&spi->dev, "selected chip familiy is %s\n", - wilink_data->name); + pdev_data->family = of_id->data; + dev_info(&spi->dev, "selected chip family is %s\n", + pdev_data->family->name); if (of_find_property(dt_node, "clock-xtal", NULL)) pdev_data->ref_clock_xtal = true; @@ -479,13 +479,15 @@ static int wlcore_probe_of(struct spi_device *spi, struct wl12xx_spi_glue *glue, static int wl1271_probe(struct spi_device *spi) { struct wl12xx_spi_glue *glue; - struct wlcore_platdev_data pdev_data; + struct wlcore_platdev_data *pdev_data; struct resource res[1]; int ret; - memset(&pdev_data, 0x00, sizeof(pdev_data)); + pdev_data = devm_kzalloc(&spi->dev, sizeof(*pdev_data), GFP_KERNEL); + if (!pdev_data) + return -ENOMEM; - pdev_data.if_ops = &spi_ops; + pdev_data->if_ops = &spi_ops; glue = devm_kzalloc(&spi->dev, sizeof(*glue), GFP_KERNEL); if (!glue) { @@ -509,7 +511,7 @@ static int wl1271_probe(struct spi_device *spi) return PTR_ERR(glue->reg); } - ret = wlcore_probe_of(spi, glue, &pdev_data); + ret = wlcore_probe_of(spi, glue, pdev_data); if (ret) { dev_err(glue->dev, "can't get device tree parameters (%d)\n", ret); @@ -522,7 +524,7 @@ static int wl1271_probe(struct spi_device *spi) return ret; } - glue->core = platform_device_alloc(wilink_data->name, + glue->core = platform_device_alloc(pdev_data->family->name, PLATFORM_DEVID_AUTO); if (!glue->core) { dev_err(glue->dev, "can't allocate platform_device\n"); @@ -543,8 +545,8 @@ static int wl1271_probe(struct spi_device *spi) goto out_dev_put; } - ret = platform_device_add_data(glue->core, &pdev_data, - sizeof(pdev_data)); + ret = platform_device_add_data(glue->core, pdev_data, + sizeof(*pdev_data)); if (ret) { dev_err(glue->dev, "can't add platform data\n"); goto out_dev_put; diff --git a/drivers/net/wireless/ti/wlcore/wlcore.h b/drivers/net/wireless/ti/wlcore/wlcore.h index 8f28aa02230c..1827546ba807 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore.h +++ b/drivers/net/wireless/ti/wlcore/wlcore.h @@ -501,6 +501,9 @@ struct wl1271 { /* dynamic fw traces */ u32 dynamic_fw_traces; + + /* time sync zone master */ + u8 zone_master_mac_addr[ETH_ALEN]; }; int wlcore_probe(struct wl1271 *wl, struct platform_device *pdev); diff --git a/drivers/net/wireless/ti/wlcore/wlcore_i.h b/drivers/net/wireless/ti/wlcore/wlcore_i.h index 242b4e37b94c..e840985385fc 100644 --- a/drivers/net/wireless/ti/wlcore/wlcore_i.h +++ b/drivers/net/wireless/ti/wlcore/wlcore_i.h @@ -35,12 +35,11 @@ #include "conf.h" #include "ini.h" -/* - * wl127x and wl128x are using the same NVS file name. However, the - * ini parameters between them are different. The driver validates - * the correct NVS size in wl1271_boot_upload_nvs(). - */ -#define WL12XX_NVS_NAME "ti-connectivity/wl1271-nvs.bin" +struct wilink_family_data { + const char *name; + const char *nvs_name; /* wl12xx nvs file */ + const char *cfg_name; /* wl18xx cfg file */ +}; #define WL1271_TX_SECURITY_LO16(s) ((u16)((s) & 0xffff)) #define WL1271_TX_SECURITY_HI32(s) ((u32)(((s) >> 16) & 0xffffffff)) @@ -208,6 +207,7 @@ struct wl1271_if_operations { struct wlcore_platdev_data { struct wl1271_if_operations *if_ops; + const struct wilink_family_data *family; bool ref_clock_xtal; /* specify whether the clock is XTAL or not */ u32 ref_clock_freq; /* in Hertz */ @@ -347,7 +347,6 @@ struct wl1271_station { * Used in both AP and STA mode. */ u64 total_freed_pkts; - struct wl1271 *wl; }; struct wl12xx_vif { diff --git a/drivers/net/wireless/wl3501_cs.c b/drivers/net/wireless/wl3501_cs.c index 82d94f83b6b4..932f3f81e8cf 100644 --- a/drivers/net/wireless/wl3501_cs.c +++ b/drivers/net/wireless/wl3501_cs.c @@ -1258,7 +1258,9 @@ static int wl3501_reset(struct net_device *dev) { struct wl3501_card *this = netdev_priv(dev); int rc = -ENODEV; + unsigned long flags; + spin_lock_irqsave(&this->lock, flags); wl3501_block_interrupt(this); if (wl3501_init_firmware(this)) { @@ -1280,20 +1282,17 @@ static int wl3501_reset(struct net_device *dev) pr_debug("%s: device reset", dev->name); rc = 0; out: + spin_unlock_irqrestore(&this->lock, flags); return rc; } static void wl3501_tx_timeout(struct net_device *dev) { - struct wl3501_card *this = netdev_priv(dev); struct net_device_stats *stats = &dev->stats; - unsigned long flags; int rc; stats->tx_errors++; - spin_lock_irqsave(&this->lock, flags); rc = wl3501_reset(dev); - spin_unlock_irqrestore(&this->lock, flags); if (rc) printk(KERN_ERR "%s: Error %d resetting card on Tx timeout!\n", dev->name, rc); diff --git a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c index a912dc051111..c5effd6c6be9 100644 --- a/drivers/net/wireless/zydas/zd1211rw/zd_usb.c +++ b/drivers/net/wireless/zydas/zd1211rw/zd_usb.c @@ -193,7 +193,7 @@ static int upload_code(struct usb_device *udev, 0, 0, p, sizeof(ret), 5000 /* ms */); if (r != sizeof(ret)) { dev_err(&udev->dev, - "control request firmeware confirmation failed." + "control request firmware confirmation failed." " Return value %d\n", r); if (r >= 0) r = -ENODEV; diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h index 84d6cbdd11b2..b38fb2cf3364 100644 --- a/drivers/net/xen-netback/common.h +++ b/drivers/net/xen-netback/common.h @@ -292,8 +292,6 @@ struct xenvif { #endif struct xen_netif_ctrl_back_ring ctrl; - struct task_struct *ctrl_task; - wait_queue_head_t ctrl_wq; unsigned int ctrl_irq; /* Miscellaneous private stuff. */ @@ -359,7 +357,7 @@ void xenvif_kick_thread(struct xenvif_queue *queue); int xenvif_dealloc_kthread(void *data); -int xenvif_ctrl_kthread(void *data); +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data); void xenvif_rx_queue_tail(struct xenvif_queue *queue, struct sk_buff *skb); diff --git a/drivers/net/xen-netback/hash.c b/drivers/net/xen-netback/hash.c index fb87cb39a56b..613bac057650 100644 --- a/drivers/net/xen-netback/hash.c +++ b/drivers/net/xen-netback/hash.c @@ -32,15 +32,6 @@ #include #include -static void xenvif_del_hash(struct rcu_head *rcu) -{ - struct xenvif_hash_cache_entry *entry; - - entry = container_of(rcu, struct xenvif_hash_cache_entry, rcu); - - kfree(entry); -} - static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, unsigned int len, u32 val) { @@ -76,7 +67,7 @@ static void xenvif_add_hash(struct xenvif *vif, const u8 *tag, if (++vif->hash.cache.count > xenvif_hash_cache_size) { list_del_rcu(&oldest->link); vif->hash.cache.count--; - call_rcu(&oldest->rcu, xenvif_del_hash); + kfree_rcu(oldest, rcu); } } @@ -114,7 +105,7 @@ static void xenvif_flush_hash(struct xenvif *vif) list_for_each_entry_rcu(entry, &vif->hash.cache.list, link) { list_del_rcu(&entry->link); vif->hash.cache.count--; - call_rcu(&entry->rcu, xenvif_del_hash); + kfree_rcu(entry, rcu); } spin_unlock_irqrestore(&vif->hash.cache.lock, flags); diff --git a/drivers/net/xen-netback/interface.c b/drivers/net/xen-netback/interface.c index 83deeebfc2d1..fb50c6d5f6c3 100644 --- a/drivers/net/xen-netback/interface.c +++ b/drivers/net/xen-netback/interface.c @@ -128,15 +128,6 @@ irqreturn_t xenvif_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -irqreturn_t xenvif_ctrl_interrupt(int irq, void *dev_id) -{ - struct xenvif *vif = dev_id; - - wake_up(&vif->ctrl_wq); - - return IRQ_HANDLED; -} - int xenvif_queue_stopped(struct xenvif_queue *queue) { struct net_device *dev = queue->vif->dev; @@ -570,8 +561,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, struct net_device *dev = vif->dev; void *addr; struct xen_netif_ctrl_sring *shared; - struct task_struct *task; - int err = -ENOMEM; + int err; err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), &ring_ref, 1, &addr); @@ -581,11 +571,7 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, shared = (struct xen_netif_ctrl_sring *)addr; BACK_RING_INIT(&vif->ctrl, shared, XEN_PAGE_SIZE); - init_waitqueue_head(&vif->ctrl_wq); - - err = bind_interdomain_evtchn_to_irqhandler(vif->domid, evtchn, - xenvif_ctrl_interrupt, - 0, dev->name, vif); + err = bind_interdomain_evtchn_to_irq(vif->domid, evtchn); if (err < 0) goto err_unmap; @@ -593,19 +579,13 @@ int xenvif_connect_ctrl(struct xenvif *vif, grant_ref_t ring_ref, xenvif_init_hash(vif); - task = kthread_create(xenvif_ctrl_kthread, (void *)vif, - "%s-control", dev->name); - if (IS_ERR(task)) { - pr_warn("Could not allocate kthread for %s\n", dev->name); - err = PTR_ERR(task); + err = request_threaded_irq(vif->ctrl_irq, NULL, xenvif_ctrl_irq_fn, + IRQF_ONESHOT, "xen-netback-ctrl", vif); + if (err) { + pr_warn("Could not setup irq handler for %s\n", dev->name); goto err_deinit; } - get_task_struct(task); - vif->ctrl_task = task; - - wake_up_process(vif->ctrl_task); - return 0; err_deinit: @@ -774,12 +754,6 @@ void xenvif_disconnect_data(struct xenvif *vif) void xenvif_disconnect_ctrl(struct xenvif *vif) { - if (vif->ctrl_task) { - kthread_stop(vif->ctrl_task); - put_task_struct(vif->ctrl_task); - vif->ctrl_task = NULL; - } - if (vif->ctrl_irq) { xenvif_deinit_hash(vif); unbind_from_irqhandler(vif->ctrl_irq, vif); diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c index edbae0b1e8f0..3d0c989384b5 100644 --- a/drivers/net/xen-netback/netback.c +++ b/drivers/net/xen-netback/netback.c @@ -2359,24 +2359,14 @@ static bool xenvif_ctrl_work_todo(struct xenvif *vif) return 0; } -int xenvif_ctrl_kthread(void *data) +irqreturn_t xenvif_ctrl_irq_fn(int irq, void *data) { struct xenvif *vif = data; - for (;;) { - wait_event_interruptible(vif->ctrl_wq, - xenvif_ctrl_work_todo(vif) || - kthread_should_stop()); - if (kthread_should_stop()) - break; + while (xenvif_ctrl_work_todo(vif)) + xenvif_ctrl_action(vif); - while (xenvif_ctrl_work_todo(vif)) - xenvif_ctrl_action(vif); - - cond_resched(); - } - - return 0; + return IRQ_HANDLED; } static int __init netback_init(void) diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c index 96ccd4e943db..e17879dd5d5a 100644 --- a/drivers/net/xen-netfront.c +++ b/drivers/net/xen-netfront.c @@ -565,6 +565,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) struct netfront_queue *queue = NULL; unsigned int num_queues = dev->real_num_tx_queues; u16 queue_index; + struct sk_buff *nskb; /* Drop the packet if no queues are set up */ if (num_queues < 1) @@ -593,6 +594,20 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev) page = virt_to_page(skb->data); offset = offset_in_page(skb->data); + + /* The first req should be at least ETH_HLEN size or the packet will be + * dropped by netback. + */ + if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) { + nskb = skb_copy(skb, GFP_ATOMIC); + if (!nskb) + goto drop; + dev_kfree_skb_any(skb); + skb = nskb; + page = virt_to_page(skb->data); + offset = offset_in_page(skb->data); + } + len = skb_headlen(skb); spin_lock_irqsave(&queue->tx_lock, flags); diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index 2e481b9e8ea5..86280b7e41f3 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -263,6 +263,7 @@ no_sysfs: no_device: mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); + ida_simple_remove(&ptp_clocks_map, index); no_slot: kfree(ptp); no_memory: diff --git a/drivers/ptp/ptp_ixp46x.c b/drivers/ptp/ptp_ixp46x.c index ee4f183ef9ee..344a3bac210b 100644 --- a/drivers/ptp/ptp_ixp46x.c +++ b/drivers/ptp/ptp_ixp46x.c @@ -268,18 +268,19 @@ static int setup_interrupt(int gpio) return err; irq = gpio_to_irq(gpio); + if (irq < 0) + return irq; - if (NO_IRQ == irq) - return NO_IRQ; - - if (irq_set_irq_type(irq, IRQF_TRIGGER_FALLING)) { + err = irq_set_irq_type(irq, IRQF_TRIGGER_FALLING); + if (err) { pr_err("cannot set trigger type for irq %d\n", irq); - return NO_IRQ; + return err; } - if (request_irq(irq, isr, 0, DRIVER, &ixp_clock)) { + err = request_irq(irq, isr, 0, DRIVER, &ixp_clock); + if (err) { pr_err("request_irq failed for irq %d\n", irq); - return NO_IRQ; + return err; } return irq; diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c index e4ba2d2616cd..7c0d7af0d3b7 100644 --- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c +++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c @@ -84,6 +84,9 @@ static inline int send_tx_flowc_wr(struct cxgbi_sock *); static const struct cxgb4_uld_info cxgb4i_uld_info = { .name = DRV_MODULE_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = false, .add = t4_uld_add, .rx_handler = t4_uld_rx_handler, .state_change = t4_uld_state_change, diff --git a/drivers/staging/octeon/ethernet-mdio.c b/drivers/staging/octeon/ethernet-mdio.c index e13a4ab46977..1fde9c824948 100644 --- a/drivers/staging/octeon/ethernet-mdio.c +++ b/drivers/staging/octeon/ethernet-mdio.c @@ -34,48 +34,23 @@ static void cvm_oct_get_drvinfo(struct net_device *dev, strlcpy(info->bus_info, "Builtin", sizeof(info->bus_info)); } -static int cvm_oct_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct octeon_ethernet *priv = netdev_priv(dev); - - if (priv->phydev) - return phy_ethtool_gset(priv->phydev, cmd); - - return -EINVAL; -} - -static int cvm_oct_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct octeon_ethernet *priv = netdev_priv(dev); - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (priv->phydev) - return phy_ethtool_sset(priv->phydev, cmd); - - return -EINVAL; -} - static int cvm_oct_nway_reset(struct net_device *dev) { - struct octeon_ethernet *priv = netdev_priv(dev); - if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (priv->phydev) - return phy_start_aneg(priv->phydev); + if (dev->phydev) + return phy_start_aneg(dev->phydev); return -EINVAL; } const struct ethtool_ops cvm_oct_ethtool_ops = { .get_drvinfo = cvm_oct_get_drvinfo, - .get_settings = cvm_oct_get_settings, - .set_settings = cvm_oct_set_settings, .nway_reset = cvm_oct_nway_reset, .get_link = ethtool_op_get_link, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; /** @@ -88,15 +63,13 @@ const struct ethtool_ops cvm_oct_ethtool_ops = { */ int cvm_oct_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) { - struct octeon_ethernet *priv = netdev_priv(dev); - if (!netif_running(dev)) return -EINVAL; - if (!priv->phydev) + if (!dev->phydev) return -EINVAL; - return phy_mii_ioctl(priv->phydev, rq, cmd); + return phy_mii_ioctl(dev->phydev, rq, cmd); } void cvm_oct_note_carrier(struct octeon_ethernet *priv, @@ -119,9 +92,9 @@ void cvm_oct_adjust_link(struct net_device *dev) cvmx_helper_link_info_t link_info; link_info.u64 = 0; - link_info.s.link_up = priv->phydev->link ? 1 : 0; - link_info.s.full_duplex = priv->phydev->duplex ? 1 : 0; - link_info.s.speed = priv->phydev->speed; + link_info.s.link_up = dev->phydev->link ? 1 : 0; + link_info.s.full_duplex = dev->phydev->duplex ? 1 : 0; + link_info.s.speed = dev->phydev->speed; priv->link_info = link_info.u64; /* @@ -130,8 +103,8 @@ void cvm_oct_adjust_link(struct net_device *dev) if (priv->poll) priv->poll(dev); - if (priv->last_link != priv->phydev->link) { - priv->last_link = priv->phydev->link; + if (priv->last_link != dev->phydev->link) { + priv->last_link = dev->phydev->link; cvmx_helper_link_set(priv->port, link_info); cvm_oct_note_carrier(priv, link_info); } @@ -151,9 +124,8 @@ int cvm_oct_common_stop(struct net_device *dev) priv->poll = NULL; - if (priv->phydev) - phy_disconnect(priv->phydev); - priv->phydev = NULL; + if (dev->phydev) + phy_disconnect(dev->phydev); if (priv->last_link) { link_info.u64 = 0; @@ -176,6 +148,7 @@ int cvm_oct_phy_setup_device(struct net_device *dev) { struct octeon_ethernet *priv = netdev_priv(dev); struct device_node *phy_node; + struct phy_device *phydev = NULL; if (!priv->of_node) goto no_phy; @@ -193,14 +166,14 @@ int cvm_oct_phy_setup_device(struct net_device *dev) if (!phy_node) goto no_phy; - priv->phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0, - PHY_INTERFACE_MODE_GMII); + phydev = of_phy_connect(dev, phy_node, cvm_oct_adjust_link, 0, + PHY_INTERFACE_MODE_GMII); - if (!priv->phydev) + if (!phydev) return -ENODEV; priv->last_link = 0; - phy_start_aneg(priv->phydev); + phy_start_aneg(phydev); return 0; no_phy: diff --git a/drivers/staging/octeon/ethernet-rgmii.c b/drivers/staging/octeon/ethernet-rgmii.c index 91b148cfcbdb..48846dffc8e1 100644 --- a/drivers/staging/octeon/ethernet-rgmii.c +++ b/drivers/staging/octeon/ethernet-rgmii.c @@ -145,7 +145,7 @@ int cvm_oct_rgmii_open(struct net_device *dev) if (ret) return ret; - if (priv->phydev) { + if (dev->phydev) { /* * In phydev mode, we need still periodic polling for the * preamble error checking, and we also need to call this diff --git a/drivers/staging/octeon/ethernet.c b/drivers/staging/octeon/ethernet.c index e9cd5f242921..45d576361319 100644 --- a/drivers/staging/octeon/ethernet.c +++ b/drivers/staging/octeon/ethernet.c @@ -457,10 +457,8 @@ int cvm_oct_common_init(struct net_device *dev) void cvm_oct_common_uninit(struct net_device *dev) { - struct octeon_ethernet *priv = netdev_priv(dev); - - if (priv->phydev) - phy_disconnect(priv->phydev); + if (dev->phydev) + phy_disconnect(dev->phydev); } int cvm_oct_common_open(struct net_device *dev, @@ -484,10 +482,10 @@ int cvm_oct_common_open(struct net_device *dev, if (octeon_is_simulation()) return 0; - if (priv->phydev) { - int r = phy_read_status(priv->phydev); + if (dev->phydev) { + int r = phy_read_status(dev->phydev); - if (r == 0 && priv->phydev->link == 0) + if (r == 0 && dev->phydev->link == 0) netif_carrier_off(dev); cvm_oct_adjust_link(dev); } else { diff --git a/drivers/staging/octeon/octeon-ethernet.h b/drivers/staging/octeon/octeon-ethernet.h index 6275c15e0035..d533aefe085a 100644 --- a/drivers/staging/octeon/octeon-ethernet.h +++ b/drivers/staging/octeon/octeon-ethernet.h @@ -40,7 +40,6 @@ struct octeon_ethernet { struct sk_buff_head tx_free_list[16]; /* Device statistics */ struct net_device_stats stats; - struct phy_device *phydev; unsigned int last_speed; unsigned int last_link; /* Last negotiated link state */ diff --git a/drivers/target/iscsi/cxgbit/cxgbit_cm.c b/drivers/target/iscsi/cxgbit/cxgbit_cm.c index 0ae0b131abfc..2fb1bf1a26c5 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_cm.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_cm.c @@ -24,6 +24,7 @@ #include #include +#include #include "cxgbit.h" #include "clip_tbl.h" @@ -72,15 +73,6 @@ out: return wr_waitp->ret; } -/* Returns whether a CPL status conveys negative advice. - */ -static int cxgbit_is_neg_adv(unsigned int status) -{ - return status == CPL_ERR_RTX_NEG_ADVICE || - status == CPL_ERR_PERSIST_NEG_ADVICE || - status == CPL_ERR_KEEPALV_NEG_ADVICE; -} - static int cxgbit_np_hashfn(const struct cxgbit_np *cnp) { return ((unsigned long)cnp >> 10) & (NP_INFO_HASH_SIZE - 1); @@ -623,21 +615,14 @@ void cxgbit_free_np(struct iscsi_np *np) static void cxgbit_send_halfclose(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_close_con_req *req; - unsigned int len = roundup(sizeof(struct cpl_close_con_req), 16); + u32 len = roundup(sizeof(struct cpl_close_con_req), 16); skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_close_con_req *)__skb_put(skb, len); - memset(req, 0, len); - - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, - csk->tid)); - req->rsvd = 0; + cxgb_mk_close_con_req(skb, len, csk->tid, csk->txq_idx, + NULL, NULL); cxgbit_skcb_flags(skb) |= SKCBF_TX_FLAG_COMPL; __skb_queue_tail(&csk->txq, skb); @@ -662,9 +647,8 @@ static void cxgbit_abort_arp_failure(void *handle, struct sk_buff *skb) static int cxgbit_send_abort_req(struct cxgbit_sock *csk) { - struct cpl_abort_req *req; - unsigned int len = roundup(sizeof(*req), 16); struct sk_buff *skb; + u32 len = roundup(sizeof(struct cpl_abort_req), 16); pr_debug("%s: csk %p tid %u; state %d\n", __func__, csk, csk->tid, csk->com.state); @@ -675,15 +659,9 @@ static int cxgbit_send_abort_req(struct cxgbit_sock *csk) cxgbit_send_tx_flowc_wr(csk); skb = __skb_dequeue(&csk->skbq); - req = (struct cpl_abort_req *)__skb_put(skb, len); - memset(req, 0, len); + cxgb_mk_abort_req(skb, len, csk->tid, csk->txq_idx, + csk->com.cdev, cxgbit_abort_arp_failure); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - t4_set_arp_err_handler(skb, csk->com.cdev, cxgbit_abort_arp_failure); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_REQ, - csk->tid)); - req->cmd = CPL_ABORT_SEND_RST; return cxgbit_l2t_send(csk->com.cdev, skb, csk->l2t); } @@ -789,109 +767,6 @@ void _cxgbit_free_csk(struct kref *kref) kfree(csk); } -static void -cxgbit_get_tuple_info(struct cpl_pass_accept_req *req, int *iptype, - __u8 *local_ip, __u8 *peer_ip, __be16 *local_port, - __be16 *peer_port) -{ - u32 eth_len = ETH_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - u32 ip_len = IP_HDR_LEN_G(be32_to_cpu(req->hdr_len)); - struct iphdr *ip = (struct iphdr *)((u8 *)(req + 1) + eth_len); - struct ipv6hdr *ip6 = (struct ipv6hdr *)((u8 *)(req + 1) + eth_len); - struct tcphdr *tcp = (struct tcphdr *) - ((u8 *)(req + 1) + eth_len + ip_len); - - if (ip->version == 4) { - pr_debug("%s saddr 0x%x daddr 0x%x sport %u dport %u\n", - __func__, - ntohl(ip->saddr), ntohl(ip->daddr), - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 4; - memcpy(peer_ip, &ip->saddr, 4); - memcpy(local_ip, &ip->daddr, 4); - } else { - pr_debug("%s saddr %pI6 daddr %pI6 sport %u dport %u\n", - __func__, - ip6->saddr.s6_addr, ip6->daddr.s6_addr, - ntohs(tcp->source), - ntohs(tcp->dest)); - *iptype = 6; - memcpy(peer_ip, ip6->saddr.s6_addr, 16); - memcpy(local_ip, ip6->daddr.s6_addr, 16); - } - - *peer_port = tcp->source; - *local_port = tcp->dest; -} - -static int -cxgbit_our_interface(struct cxgbit_device *cdev, struct net_device *egress_dev) -{ - u8 i; - - egress_dev = cxgbit_get_real_dev(egress_dev); - for (i = 0; i < cdev->lldi.nports; i++) - if (cdev->lldi.ports[i] == egress_dev) - return 1; - return 0; -} - -static struct dst_entry * -cxgbit_find_route6(struct cxgbit_device *cdev, __u8 *local_ip, __u8 *peer_ip, - __be16 local_port, __be16 peer_port, u8 tos, - __u32 sin6_scope_id) -{ - struct dst_entry *dst = NULL; - - if (IS_ENABLED(CONFIG_IPV6)) { - struct flowi6 fl6; - - memset(&fl6, 0, sizeof(fl6)); - memcpy(&fl6.daddr, peer_ip, 16); - memcpy(&fl6.saddr, local_ip, 16); - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) - fl6.flowi6_oif = sin6_scope_id; - dst = ip6_route_output(&init_net, NULL, &fl6); - if (!dst) - goto out; - if (!cxgbit_our_interface(cdev, ip6_dst_idev(dst)->dev) && - !(ip6_dst_idev(dst)->dev->flags & IFF_LOOPBACK)) { - dst_release(dst); - dst = NULL; - } - } -out: - return dst; -} - -static struct dst_entry * -cxgbit_find_route(struct cxgbit_device *cdev, __be32 local_ip, __be32 peer_ip, - __be16 local_port, __be16 peer_port, u8 tos) -{ - struct rtable *rt; - struct flowi4 fl4; - struct neighbour *n; - - rt = ip_route_output_ports(&init_net, &fl4, NULL, peer_ip, - local_ip, - peer_port, local_port, IPPROTO_TCP, - tos, 0); - if (IS_ERR(rt)) - return NULL; - n = dst_neigh_lookup(&rt->dst, &peer_ip); - if (!n) - return NULL; - if (!cxgbit_our_interface(cdev, n->dev) && - !(n->dev->flags & IFF_LOOPBACK)) { - neigh_release(n); - dst_release(&rt->dst); - return NULL; - } - neigh_release(n); - return &rt->dst; -} - static void cxgbit_set_tcp_window(struct cxgbit_sock *csk, struct port_info *pi) { unsigned int linkspeed; @@ -1072,21 +947,14 @@ int cxgbit_ofld_send(struct cxgbit_device *cdev, struct sk_buff *skb) static void cxgbit_release_tid(struct cxgbit_device *cdev, u32 tid) { - struct cpl_tid_release *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_tid_release), 16); struct sk_buff *skb; skb = alloc_skb(len, GFP_ATOMIC); if (!skb) return; - req = (struct cpl_tid_release *)__skb_put(skb, len); - memset(req, 0, len); - - INIT_TP_WR(req, tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID( - CPL_TID_RELEASE, tid)); - set_wr_txq(skb, CPL_PRIORITY_SETUP, 0); + cxgb_mk_tid_release(skb, len, tid, 0); cxgbit_ofld_send(cdev, skb); } @@ -1108,20 +976,6 @@ cxgbit_l2t_send(struct cxgbit_device *cdev, struct sk_buff *skb, return ret < 0 ? ret : 0; } -static void -cxgbit_best_mtu(const unsigned short *mtus, unsigned short mtu, - unsigned int *idx, int use_ts, int ipv6) -{ - unsigned short hdr_size = (ipv6 ? sizeof(struct ipv6hdr) : - sizeof(struct iphdr)) + - sizeof(struct tcphdr) + - (use_ts ? round_up(TCPOLEN_TIMESTAMP, - 4) : 0); - unsigned short data_size = mtu - hdr_size; - - cxgb4_best_aligned_mtu(mtus, hdr_size, data_size, 8, idx); -} - static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) { if (csk->com.state != CSK_STATE_ESTABLISHED) { @@ -1140,22 +994,18 @@ static void cxgbit_send_rx_credits(struct cxgbit_sock *csk, struct sk_buff *skb) int cxgbit_rx_data_ack(struct cxgbit_sock *csk) { struct sk_buff *skb; - struct cpl_rx_data_ack *req; - unsigned int len = roundup(sizeof(*req), 16); + u32 len = roundup(sizeof(struct cpl_rx_data_ack), 16); + u32 credit_dack; skb = alloc_skb(len, GFP_KERNEL); if (!skb) return -1; - req = (struct cpl_rx_data_ack *)__skb_put(skb, len); - memset(req, 0, len); + credit_dack = RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | + RX_CREDITS_V(csk->rx_credits); - set_wr_txq(skb, CPL_PRIORITY_ACK, csk->ctrlq_idx); - INIT_TP_WR(req, csk->tid); - OPCODE_TID(req) = cpu_to_be32(MK_OPCODE_TID(CPL_RX_DATA_ACK, - csk->tid)); - req->credit_dack = cpu_to_be32(RX_DACK_CHANGE_F | RX_DACK_MODE_V(1) | - RX_CREDITS_V(csk->rx_credits)); + cxgb_mk_rx_data_ack(skb, len, csk->tid, csk->ctrlq_idx, + credit_dack); csk->rx_credits = 0; @@ -1210,15 +1060,6 @@ out: return -ENOMEM; } -static u32 cxgbit_compute_wscale(u32 win) -{ - u32 wscale = 0; - - while (wscale < 14 && (65535 << wscale) < win) - wscale++; - return wscale; -} - static void cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) { @@ -1246,10 +1087,10 @@ cxgbit_pass_accept_rpl(struct cxgbit_sock *csk, struct cpl_pass_accept_req *req) INIT_TP_WR(rpl5, csk->tid); OPCODE_TID(rpl5) = cpu_to_be32(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, csk->tid)); - cxgbit_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, - req->tcpopt.tstamp, - (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); - wscale = cxgbit_compute_wscale(csk->rcv_win); + cxgb_best_mtu(csk->com.cdev->lldi.mtus, csk->mtu, &mtu_idx, + req->tcpopt.tstamp, + (csk->com.remote_addr.ss_family == AF_INET) ? 0 : 1); + wscale = cxgb_compute_wscale(csk->rcv_win); /* * Specify the largest window that will fit in opt0. The * remainder will be specified in the rx_data_ack. @@ -1340,8 +1181,8 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) goto rel_skb; } - cxgbit_get_tuple_info(req, &iptype, local_ip, peer_ip, - &local_port, &peer_port); + cxgb_get_4tuple(req, cdev->lldi.adapter_type, &iptype, local_ip, + peer_ip, &local_port, &peer_port); /* Find output route */ if (iptype == 4) { @@ -1350,21 +1191,23 @@ cxgbit_pass_accept_req(struct cxgbit_device *cdev, struct sk_buff *skb) , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route(cdev, *(__be32 *)local_ip, - *(__be32 *)peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid))); + dst = cxgb_find_route(&cdev->lldi, cxgbit_get_real_dev, + *(__be32 *)local_ip, + *(__be32 *)peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid))); } else { pr_debug("%s parent sock %p tid %u laddr %pI6 raddr %pI6 " "lport %d rport %d peer_mss %d\n" , __func__, cnp, tid, local_ip, peer_ip, ntohs(local_port), ntohs(peer_port), peer_mss); - dst = cxgbit_find_route6(cdev, local_ip, peer_ip, - local_port, peer_port, - PASS_OPEN_TOS_G(ntohl(req->tos_stid)), - ((struct sockaddr_in6 *) - &cnp->com.local_addr)->sin6_scope_id); + dst = cxgb_find_route6(&cdev->lldi, cxgbit_get_real_dev, + local_ip, peer_ip, + local_port, peer_port, + PASS_OPEN_TOS_G(ntohl(req->tos_stid)), + ((struct sockaddr_in6 *) + &cnp->com.local_addr)->sin6_scope_id); } if (!dst) { pr_err("%s - failed to find dst entry!\n", @@ -1795,16 +1638,15 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) { struct cpl_abort_req_rss *hdr = cplhdr(skb); unsigned int tid = GET_TID(hdr); - struct cpl_abort_rpl *rpl; struct sk_buff *rpl_skb; bool release = false; bool wakeup_thread = false; - unsigned int len = roundup(sizeof(*rpl), 16); + u32 len = roundup(sizeof(struct cpl_abort_rpl), 16); pr_debug("%s: csk %p; tid %u; state %d\n", __func__, csk, tid, csk->com.state); - if (cxgbit_is_neg_adv(hdr->status)) { + if (cxgb_is_neg_adv(hdr->status)) { pr_err("%s: got neg advise %d on tid %u\n", __func__, hdr->status, tid); goto rel_skb; @@ -1839,14 +1681,8 @@ static void cxgbit_abort_req_rss(struct cxgbit_sock *csk, struct sk_buff *skb) cxgbit_send_tx_flowc_wr(csk); rpl_skb = __skb_dequeue(&csk->skbq); - set_wr_txq(skb, CPL_PRIORITY_DATA, csk->txq_idx); - rpl = (struct cpl_abort_rpl *)__skb_put(rpl_skb, len); - memset(rpl, 0, len); - - INIT_TP_WR(rpl, csk->tid); - OPCODE_TID(rpl) = cpu_to_be32(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); - rpl->cmd = CPL_ABORT_NO_RST; + cxgb_mk_abort_rpl(rpl_skb, len, csk->tid, csk->txq_idx); cxgbit_ofld_send(csk->com.cdev, rpl_skb); if (wakeup_thread) { diff --git a/drivers/target/iscsi/cxgbit/cxgbit_main.c b/drivers/target/iscsi/cxgbit/cxgbit_main.c index 27dd11aff934..ad26b9372f10 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_main.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_main.c @@ -652,6 +652,9 @@ static struct iscsit_transport cxgbit_transport = { static struct cxgb4_uld_info cxgbit_uld_info = { .name = DRV_NAME, + .nrxq = MAX_ULD_QSETS, + .rxq_size = 1024, + .lro = true, .add = cxgbit_uld_add, .state_change = cxgbit_uld_state_change, .lro_rx_handler = cxgbit_uld_lro_rx_handler, diff --git a/fs/afs/callback.c b/fs/afs/callback.c index 7ef637d7f3a5..1e9d2f84e5b5 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -461,8 +461,8 @@ static void afs_callback_updater(struct work_struct *work) */ int __init afs_callback_update_init(void) { - afs_callback_update_worker = - create_singlethread_workqueue("kafs_callbackd"); + afs_callback_update_worker = alloc_ordered_workqueue("kafs_callbackd", + WQ_MEM_RECLAIM); return afs_callback_update_worker ? 0 : -ENOMEM; } diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 85737e96ab8b..2037e7a77a37 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -17,19 +17,12 @@ #include "internal.h" #include "afs_cm.h" -#if 0 -struct workqueue_struct *afs_cm_workqueue; -#endif /* 0 */ - -static int afs_deliver_cb_init_call_back_state(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_init_call_back_state3(struct afs_call *, - struct sk_buff *, bool); -static int afs_deliver_cb_probe(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_callback(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_probe_uuid(struct afs_call *, struct sk_buff *, bool); -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *, - struct sk_buff *, bool); +static int afs_deliver_cb_init_call_back_state(struct afs_call *); +static int afs_deliver_cb_init_call_back_state3(struct afs_call *); +static int afs_deliver_cb_probe(struct afs_call *); +static int afs_deliver_cb_callback(struct afs_call *); +static int afs_deliver_cb_probe_uuid(struct afs_call *); +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *); static void afs_cm_destructor(struct afs_call *); /* @@ -134,7 +127,7 @@ static void afs_cm_destructor(struct afs_call *call) * received. The step number here must match the final number in * afs_deliver_cb_callback(). */ - if (call->unmarshall == 6) { + if (call->unmarshall == 5) { ASSERT(call->server && call->count && call->request); afs_break_callbacks(call->server, call->count, call->request); } @@ -168,27 +161,27 @@ static void SRXAFSCB_CallBack(struct work_struct *work) /* * deliver request data to a CB.CallBack call */ -static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cb_callback(struct afs_call *call) { + struct sockaddr_rxrpc srx; struct afs_callback *cb; struct afs_server *server; - struct in_addr addr; __be32 *bp; u32 tmp; int ret, loop; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); call->offset = 0; call->unmarshall++; /* extract the FID array and its count in two steps */ case 1: _debug("extract FID count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -205,8 +198,8 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, case 2: _debug("extract FID array"); - ret = afs_extract_data(call, skb, last, call->buffer, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, true); if (ret < 0) return ret; @@ -232,7 +225,7 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, /* extract the callback array and its count in two steps */ case 3: _debug("extract CB count"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -242,13 +235,11 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, return -EBADMSG; call->offset = 0; call->unmarshall++; - if (tmp == 0) - goto empty_cb_array; case 4: _debug("extract CB array"); - ret = afs_extract_data(call, skb, last, call->request, - call->count * 3 * 4); + ret = afs_extract_data(call, call->buffer, + call->count * 3 * 4, false); if (ret < 0) return ret; @@ -261,15 +252,9 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, cb->type = ntohl(*bp++); } - empty_cb_array: call->offset = 0; call->unmarshall++; - case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - /* Record that the message was unmarshalled successfully so * that the call destructor can know do the callback breaking * work, even if the final ACK isn't received. @@ -278,17 +263,15 @@ static int afs_deliver_cb_callback(struct afs_call *call, struct sk_buff *skb, * updated also. */ call->unmarshall++; - case 6: + case 5: break; } - call->state = AFS_CALL_REPLYING; /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -315,17 +298,17 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) /* * deliver request data to a CB.InitCallBackState call */ -static int afs_deliver_cb_init_call_back_state(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; @@ -334,8 +317,7 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); + server = afs_find_server(&srx); if (!server) return -ENOTCONN; call->server = server; @@ -348,110 +330,20 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call, /* * deliver request data to a CB.InitCallBackState3 call */ -static int afs_deliver_cb_init_call_back_state3(struct afs_call *call, - struct sk_buff *skb, - bool last) +static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { + struct sockaddr_rxrpc srx; struct afs_server *server; - struct in_addr addr; - - _enter(",{%u},%d", skb->len, last); - - /* There are some arguments that we ignore */ - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; - - /* we'll need the file server record as that tells us which set of - * vnodes to operate upon */ - memcpy(&addr, &ip_hdr(skb)->saddr, 4); - server = afs_find_server(&addr); - if (!server) - return -ENOTCONN; - call->server = server; - - INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); - queue_work(afs_wq, &call->work); - return 0; -} - -/* - * allow the fileserver to see if the cache manager is still alive - */ -static void SRXAFSCB_Probe(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, work); - - _enter(""); - afs_send_empty_reply(call); - _leave(""); -} - -/* - * deliver request data to a CB.Probe call - */ -static int afs_deliver_cb_probe(struct afs_call *call, struct sk_buff *skb, - bool last) -{ - int ret; - - _enter(",{%u},%d", skb->len, last); - - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; - - /* no unmarshalling required */ - call->state = AFS_CALL_REPLYING; - - INIT_WORK(&call->work, SRXAFSCB_Probe); - queue_work(afs_wq, &call->work); - return 0; -} - -/* - * allow the fileserver to quickly find out if the fileserver has been rebooted - */ -static void SRXAFSCB_ProbeUuid(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, work); - struct afs_uuid *r = call->request; - - struct { - __be32 match; - } reply; - - _enter(""); - - - if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) - reply.match = htonl(0); - else - reply.match = htonl(1); - - afs_send_simple_reply(call, &reply, sizeof(reply)); - _leave(""); -} - -/* - * deliver request data to a CB.ProbeUuid call - */ -static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, - bool last) -{ struct afs_uuid *r; unsigned loop; __be32 *b; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; + rxrpc_kernel_get_peer(afs_socket, call->rxcall, &srx); + + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -463,8 +355,8 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, case 1: _debug("extract UUID"); - ret = afs_extract_data(call, skb, last, call->buffer, - 11 * sizeof(__be32)); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); switch (ret) { case 0: break; case -EAGAIN: return 0; @@ -491,16 +383,133 @@ static int afs_deliver_cb_probe_uuid(struct afs_call *call, struct sk_buff *skb, call->unmarshall++; case 2: - _debug("trailer"); - if (skb->len != 0) - return -EBADMSG; break; } - ret = afs_data_complete(call, skb, last); + /* no unmarshalling required */ + call->state = AFS_CALL_REPLYING; + + /* we'll need the file server record as that tells us which set of + * vnodes to operate upon */ + server = afs_find_server(&srx); + if (!server) + return -ENOTCONN; + call->server = server; + + INIT_WORK(&call->work, SRXAFSCB_InitCallBackState); + queue_work(afs_wq, &call->work); + return 0; +} + +/* + * allow the fileserver to see if the cache manager is still alive + */ +static void SRXAFSCB_Probe(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + + _enter(""); + afs_send_empty_reply(call); + _leave(""); +} + +/* + * deliver request data to a CB.Probe call + */ +static int afs_deliver_cb_probe(struct afs_call *call) +{ + int ret; + + _enter(""); + + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; + /* no unmarshalling required */ + call->state = AFS_CALL_REPLYING; + + INIT_WORK(&call->work, SRXAFSCB_Probe); + queue_work(afs_wq, &call->work); + return 0; +} + +/* + * allow the fileserver to quickly find out if the fileserver has been rebooted + */ +static void SRXAFSCB_ProbeUuid(struct work_struct *work) +{ + struct afs_call *call = container_of(work, struct afs_call, work); + struct afs_uuid *r = call->request; + + struct { + __be32 match; + } reply; + + _enter(""); + + if (memcmp(r, &afs_uuid, sizeof(afs_uuid)) == 0) + reply.match = htonl(0); + else + reply.match = htonl(1); + + afs_send_simple_reply(call, &reply, sizeof(reply)); + _leave(""); +} + +/* + * deliver request data to a CB.ProbeUuid call + */ +static int afs_deliver_cb_probe_uuid(struct afs_call *call) +{ + struct afs_uuid *r; + unsigned loop; + __be32 *b; + int ret; + + _enter("{%u}", call->unmarshall); + + switch (call->unmarshall) { + case 0: + call->offset = 0; + call->buffer = kmalloc(11 * sizeof(__be32), GFP_KERNEL); + if (!call->buffer) + return -ENOMEM; + call->unmarshall++; + + case 1: + _debug("extract UUID"); + ret = afs_extract_data(call, call->buffer, + 11 * sizeof(__be32), false); + switch (ret) { + case 0: break; + case -EAGAIN: return 0; + default: return ret; + } + + _debug("unmarshall UUID"); + call->request = kmalloc(sizeof(struct afs_uuid), GFP_KERNEL); + if (!call->request) + return -ENOMEM; + + b = call->buffer; + r = call->request; + r->time_low = ntohl(b[0]); + r->time_mid = ntohl(b[1]); + r->time_hi_and_version = ntohl(b[2]); + r->clock_seq_hi_and_reserved = ntohl(b[3]); + r->clock_seq_low = ntohl(b[4]); + + for (loop = 0; loop < 6; loop++) + r->node[loop] = ntohl(b[loop + 5]); + + call->offset = 0; + call->unmarshall++; + + case 2: + break; + } + call->state = AFS_CALL_REPLYING; INIT_WORK(&call->work, SRXAFSCB_ProbeUuid); @@ -574,14 +583,13 @@ static void SRXAFSCB_TellMeAboutYourself(struct work_struct *work) /* * deliver request data to a CB.TellMeAboutYourself call */ -static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_cb_tell_me_about_yourself(struct afs_call *call) { int ret; - _enter(",{%u},%d", skb->len, last); + _enter(""); - ret = afs_data_complete(call, skb, last); + ret = afs_extract_data(call, NULL, 0, false); if (ret < 0) return ret; diff --git a/fs/afs/flock.c b/fs/afs/flock.c index d91a9c9cfbd0..3191dff2c156 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -36,8 +36,8 @@ static int afs_init_lock_manager(void) if (!afs_lock_manager) { mutex_lock(&afs_lock_manager_mutex); if (!afs_lock_manager) { - afs_lock_manager = - create_singlethread_workqueue("kafs_lockd"); + afs_lock_manager = alloc_workqueue("kafs_lockd", + WQ_MEM_RECLAIM, 0); if (!afs_lock_manager) ret = -ENOMEM; } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index 9312b92e54be..96f4d764d1a6 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -235,16 +235,15 @@ static void xdr_decode_AFSFetchVolumeStatus(const __be32 **_bp, /* * deliver reply data to an FS.FetchStatus */ -static int afs_deliver_fs_fetch_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_status(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -307,8 +306,7 @@ int afs_fs_fetch_file_status(struct afs_server *server, /* * deliver reply data to an FS.FetchData */ -static int afs_deliver_fs_fetch_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_fetch_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; @@ -316,7 +314,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, void *buffer; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -332,7 +330,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, * client) */ case 1: _debug("extract data length (MSW)"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -347,7 +345,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the returned data length */ case 2: _debug("extract data length"); - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -363,10 +361,10 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, _debug("extract data"); if (call->count > 0) { page = call->reply3; - buffer = kmap_atomic(page); - ret = afs_extract_data(call, skb, last, buffer, - call->count); - kunmap_atomic(buffer); + buffer = kmap(page); + ret = afs_extract_data(call, buffer, + call->count, true); + kunmap(buffer); if (ret < 0) return ret; } @@ -376,8 +374,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, /* extract the metadata */ case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - (21 + 3 + 6) * 4); + ret = afs_extract_data(call, call->buffer, + (21 + 3 + 6) * 4, false); if (ret < 0) return ret; @@ -391,18 +389,15 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call, call->unmarshall++; case 5: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } if (call->count < PAGE_SIZE) { _debug("clear"); page = call->reply3; - buffer = kmap_atomic(page); + buffer = kmap(page); memset(buffer + call->count, 0, PAGE_SIZE - call->count); - kunmap_atomic(buffer); + kunmap(buffer); } _leave(" = 0 [done]"); @@ -515,13 +510,12 @@ int afs_fs_fetch_data(struct afs_server *server, /* * deliver reply data to an FS.GiveUpCallBacks */ -static int afs_deliver_fs_give_up_callbacks(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_give_up_callbacks(struct afs_call *call) { - _enter(",{%u},%d", skb->len, last); + _enter(""); /* shouldn't be any reply data */ - return afs_data_complete(call, skb, last); + return afs_extract_data(call, NULL, 0, false); } /* @@ -599,16 +593,15 @@ int afs_fs_give_up_callbacks(struct afs_server *server, /* * deliver reply data to an FS.CreateFile or an FS.MakeDir */ -static int afs_deliver_fs_create_vnode(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_create_vnode(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -696,16 +689,15 @@ int afs_fs_create(struct afs_server *server, /* * deliver reply data to an FS.RemoveFile or FS.RemoveDir */ -static int afs_deliver_fs_remove(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_remove(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -777,16 +769,15 @@ int afs_fs_remove(struct afs_server *server, /* * deliver reply data to an FS.Link */ -static int afs_deliver_fs_link(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_link(struct afs_call *call) { struct afs_vnode *dvnode = call->reply, *vnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -863,16 +854,15 @@ int afs_fs_link(struct afs_server *server, /* * deliver reply data to an FS.Symlink */ -static int afs_deliver_fs_symlink(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_symlink(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -968,16 +958,15 @@ int afs_fs_symlink(struct afs_server *server, /* * deliver reply data to an FS.Rename */ -static int afs_deliver_fs_rename(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_rename(struct afs_call *call) { struct afs_vnode *orig_dvnode = call->reply, *new_dvnode = call->reply2; const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1072,16 +1061,15 @@ int afs_fs_rename(struct afs_server *server, /* * deliver reply data to an FS.StoreData */ -static int afs_deliver_fs_store_data(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_data(struct afs_call *call) { struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1251,17 +1239,16 @@ int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, /* * deliver reply data to an FS.StoreStatus */ -static int afs_deliver_fs_store_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_store_status(struct afs_call *call) { afs_dataversion_t *store_version; struct afs_vnode *vnode = call->reply; const __be32 *bp; int ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; @@ -1443,14 +1430,13 @@ int afs_fs_setattr(struct afs_server *server, struct key *key, /* * deliver reply data to an FS.GetVolumeStatus */ -static int afs_deliver_fs_get_volume_status(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_get_volume_status(struct afs_call *call) { const __be32 *bp; char *p; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); switch (call->unmarshall) { case 0: @@ -1460,8 +1446,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the returned status record */ case 1: _debug("extract status"); - ret = afs_extract_data(call, skb, last, call->buffer, - 12 * 4); + ret = afs_extract_data(call, call->buffer, + 12 * 4, true); if (ret < 0) return ret; @@ -1472,7 +1458,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the volume name length */ case 2: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1487,8 +1473,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 3: _debug("extract volname"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1508,8 +1494,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 4: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1519,7 +1505,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the offline message length */ case 5: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1534,8 +1520,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 6: _debug("extract offline"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1555,8 +1541,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->count = 4 - (call->count & 3); case 7: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, true); if (ret < 0) return ret; @@ -1566,7 +1552,7 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, /* extract the message of the day length */ case 8: - ret = afs_extract_data(call, skb, last, &call->tmp, 4); + ret = afs_extract_data(call, &call->tmp, 4, true); if (ret < 0) return ret; @@ -1581,8 +1567,8 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, case 9: _debug("extract motd"); if (call->count > 0) { - ret = afs_extract_data(call, skb, last, call->reply3, - call->count); + ret = afs_extract_data(call, call->reply3, + call->count, true); if (ret < 0) return ret; } @@ -1595,26 +1581,17 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call, call->unmarshall++; /* extract the message of the day padding */ - if ((call->count & 3) == 0) { - call->unmarshall++; - goto no_motd_padding; - } - call->count = 4 - (call->count & 3); + call->count = (4 - (call->count & 3)) & 3; case 10: - ret = afs_extract_data(call, skb, last, call->buffer, - call->count); + ret = afs_extract_data(call, call->buffer, + call->count, false); if (ret < 0) return ret; call->offset = 0; call->unmarshall++; - no_motd_padding: - case 11: - ret = afs_data_complete(call, skb, last); - if (ret < 0) - return ret; break; } @@ -1685,15 +1662,14 @@ int afs_fs_get_volume_status(struct afs_server *server, /* * deliver reply data to an FS.SetLock, FS.ExtendLock or FS.ReleaseLock */ -static int afs_deliver_fs_xxxx_lock(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_fs_xxxx_lock(struct afs_call *call) { const __be32 *bp; int ret; - _enter("{%u},{%u},%d", call->unmarshall, skb->len, last); + _enter("{%u}", call->unmarshall); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/fs/afs/internal.h b/fs/afs/internal.h index df976b2a7f40..5497c8496055 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -13,13 +13,13 @@ #include #include #include -#include #include #include #include #include #include #include +#include #include "afs.h" #include "afs_vl.h" @@ -56,7 +56,7 @@ struct afs_mount_params { */ struct afs_wait_mode { /* RxRPC received message notification */ - void (*rx_wakeup)(struct afs_call *call); + rxrpc_notify_rx_t notify_rx; /* synchronous call waiter and call dispatched notification */ int (*wait)(struct afs_call *call); @@ -75,10 +75,8 @@ struct afs_call { const struct afs_call_type *type; /* type of call */ const struct afs_wait_mode *wait_mode; /* completion wait mode */ wait_queue_head_t waitq; /* processes awaiting completion */ - void (*async_workfn)(struct afs_call *call); /* asynchronous work function */ struct work_struct async_work; /* asynchronous work processor */ struct work_struct work; /* actual work processor */ - struct sk_buff_head rx_queue; /* received packets */ struct rxrpc_call *rxcall; /* RxRPC call handle */ struct key *key; /* security for this call */ struct afs_server *server; /* server affected by incoming CM call */ @@ -92,6 +90,7 @@ struct afs_call { void *reply4; /* reply buffer (fourth part) */ pgoff_t first; /* first page in mapping to deal with */ pgoff_t last; /* last page in mapping to deal with */ + size_t offset; /* offset into received data store */ enum { /* call state */ AFS_CALL_REQUESTING, /* request is being sent for outgoing call */ AFS_CALL_AWAIT_REPLY, /* awaiting reply to outgoing call */ @@ -99,21 +98,18 @@ struct afs_call { AFS_CALL_AWAIT_REQUEST, /* awaiting request data on incoming call */ AFS_CALL_REPLYING, /* replying to incoming call */ AFS_CALL_AWAIT_ACK, /* awaiting final ACK of incoming call */ - AFS_CALL_COMPLETE, /* successfully completed */ - AFS_CALL_BUSY, /* server was busy */ - AFS_CALL_ABORTED, /* call was aborted */ - AFS_CALL_ERROR, /* call failed due to error */ + AFS_CALL_COMPLETE, /* Completed or failed */ } state; int error; /* error code */ + u32 abort_code; /* Remote abort ID or 0 */ unsigned request_size; /* size of request data */ unsigned reply_max; /* maximum size of reply */ - unsigned reply_size; /* current size of reply */ unsigned first_offset; /* offset into mapping[first] */ unsigned last_to; /* amount of mapping[last] */ - unsigned offset; /* offset into received data store */ unsigned char unmarshall; /* unmarshalling phase */ bool incoming; /* T if incoming call */ bool send_pages; /* T if data from mapping should be sent */ + bool need_attention; /* T if RxRPC poked us */ u16 service_id; /* RxRPC service ID to call */ __be16 port; /* target UDP port */ __be32 operation_ID; /* operation ID for an incoming call */ @@ -128,8 +124,7 @@ struct afs_call_type { /* deliver request or reply data to an call * - returning an error will cause the call to be aborted */ - int (*deliver)(struct afs_call *call, struct sk_buff *skb, - bool last); + int (*deliver)(struct afs_call *call); /* map an abort code to an error number */ int (*abort_to_error)(u32 abort_code); @@ -607,29 +602,22 @@ extern void afs_proc_cell_remove(struct afs_cell *); /* * rxrpc.c */ +extern struct socket *afs_socket; + extern int afs_open_socket(void); extern void afs_close_socket(void); -extern void afs_data_consumed(struct afs_call *, struct sk_buff *); extern int afs_make_call(struct in_addr *, struct afs_call *, gfp_t, const struct afs_wait_mode *); extern struct afs_call *afs_alloc_flat_call(const struct afs_call_type *, size_t, size_t); extern void afs_flat_call_destructor(struct afs_call *); -extern int afs_transfer_reply(struct afs_call *, struct sk_buff *, bool); extern void afs_send_empty_reply(struct afs_call *); extern void afs_send_simple_reply(struct afs_call *, const void *, size_t); -extern int afs_extract_data(struct afs_call *, struct sk_buff *, bool, void *, - size_t); +extern int afs_extract_data(struct afs_call *, void *, size_t, bool); -static inline int afs_data_complete(struct afs_call *call, struct sk_buff *skb, - bool last) +static inline int afs_transfer_reply(struct afs_call *call) { - if (skb->len > 0) - return -EBADMSG; - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - return 0; + return afs_extract_data(call, call->buffer, call->reply_max, false); } /* @@ -654,7 +642,7 @@ do { \ extern struct afs_server *afs_lookup_server(struct afs_cell *, const struct in_addr *); -extern struct afs_server *afs_find_server(const struct in_addr *); +extern struct afs_server *afs_find_server(const struct sockaddr_rxrpc *); extern void afs_put_server(struct afs_server *); extern void __exit afs_purge_servers(void); diff --git a/fs/afs/main.c b/fs/afs/main.c index 35de0c04729f..0b187ef3b5b7 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" MODULE_DESCRIPTION("AFS Client File System"); diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 14d04c848465..59bdaa7527b6 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -16,34 +16,36 @@ #include "internal.h" #include "afs_cm.h" -static struct socket *afs_socket; /* my RxRPC socket */ +struct socket *afs_socket; /* my RxRPC socket */ static struct workqueue_struct *afs_async_calls; +static struct afs_call *afs_spare_incoming_call; static atomic_t afs_outstanding_calls; -static atomic_t afs_outstanding_skbs; -static void afs_wake_up_call_waiter(struct afs_call *); +static void afs_free_call(struct afs_call *); +static void afs_wake_up_call_waiter(struct sock *, struct rxrpc_call *, unsigned long); static int afs_wait_for_call_to_complete(struct afs_call *); -static void afs_wake_up_async_call(struct afs_call *); +static void afs_wake_up_async_call(struct sock *, struct rxrpc_call *, unsigned long); static int afs_dont_wait_for_call_to_complete(struct afs_call *); -static void afs_process_async_call(struct afs_call *); -static void afs_rx_interceptor(struct sock *, unsigned long, struct sk_buff *); -static int afs_deliver_cm_op_id(struct afs_call *, struct sk_buff *, bool); +static void afs_process_async_call(struct work_struct *); +static void afs_rx_new_call(struct sock *, struct rxrpc_call *, unsigned long); +static void afs_rx_discard_new_call(struct rxrpc_call *, unsigned long); +static int afs_deliver_cm_op_id(struct afs_call *); /* synchronous call management */ const struct afs_wait_mode afs_sync_call = { - .rx_wakeup = afs_wake_up_call_waiter, + .notify_rx = afs_wake_up_call_waiter, .wait = afs_wait_for_call_to_complete, }; /* asynchronous call management */ const struct afs_wait_mode afs_async_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, .wait = afs_dont_wait_for_call_to_complete, }; /* asynchronous incoming call management */ static const struct afs_wait_mode afs_async_incoming_call = { - .rx_wakeup = afs_wake_up_async_call, + .notify_rx = afs_wake_up_async_call, }; /* asynchronous incoming call initial processing */ @@ -53,17 +55,9 @@ static const struct afs_call_type afs_RXCMxxxx = { .abort_to_error = afs_abort_to_error, }; -static void afs_collect_incoming_call(struct work_struct *); +static void afs_charge_preallocation(struct work_struct *); -static struct sk_buff_head afs_incoming_calls; -static DECLARE_WORK(afs_collect_incoming_call_work, afs_collect_incoming_call); - -static void afs_async_workfn(struct work_struct *work) -{ - struct afs_call *call = container_of(work, struct afs_call, async_work); - - call->async_workfn(call); -} +static DECLARE_WORK(afs_charge_preallocation_work, afs_charge_preallocation); static int afs_wait_atomic_t(atomic_t *p) { @@ -83,10 +77,8 @@ int afs_open_socket(void) _enter(""); - skb_queue_head_init(&afs_incoming_calls); - ret = -ENOMEM; - afs_async_calls = create_singlethread_workqueue("kafsd"); + afs_async_calls = alloc_workqueue("kafsd", WQ_MEM_RECLAIM, 0); if (!afs_async_calls) goto error_0; @@ -110,13 +102,15 @@ int afs_open_socket(void) if (ret < 0) goto error_2; + rxrpc_kernel_new_call_notification(socket, afs_rx_new_call, + afs_rx_discard_new_call); + ret = kernel_listen(socket, INT_MAX); if (ret < 0) goto error_2; - rxrpc_kernel_intercept_rx_messages(socket, afs_rx_interceptor); - afs_socket = socket; + afs_charge_preallocation(NULL); _leave(" = 0"); return 0; @@ -136,51 +130,27 @@ void afs_close_socket(void) { _enter(""); + if (afs_spare_incoming_call) { + atomic_inc(&afs_outstanding_calls); + afs_free_call(afs_spare_incoming_call); + afs_spare_incoming_call = NULL; + } + + _debug("outstanding %u", atomic_read(&afs_outstanding_calls)); wait_on_atomic_t(&afs_outstanding_calls, afs_wait_atomic_t, TASK_UNINTERRUPTIBLE); _debug("no outstanding calls"); + flush_workqueue(afs_async_calls); + kernel_sock_shutdown(afs_socket, SHUT_RDWR); + flush_workqueue(afs_async_calls); sock_release(afs_socket); _debug("dework"); destroy_workqueue(afs_async_calls); - - ASSERTCMP(atomic_read(&afs_outstanding_skbs), ==, 0); _leave(""); } -/* - * Note that the data in a socket buffer is now consumed. - */ -void afs_data_consumed(struct afs_call *call, struct sk_buff *skb) -{ - if (!skb) { - _debug("DLVR NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("DLVR %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - rxrpc_kernel_data_consumed(call->rxcall, skb); - } -} - -/* - * free a socket buffer - */ -static void afs_free_skb(struct sk_buff *skb) -{ - if (!skb) { - _debug("FREE NULL [%d]", atomic_read(&afs_outstanding_skbs)); - dump_stack(); - } else { - _debug("FREE %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - if (atomic_dec_return(&afs_outstanding_skbs) == -1) - BUG(); - rxrpc_kernel_free_skb(skb); - } -} - /* * free a call */ @@ -191,7 +161,6 @@ static void afs_free_call(struct afs_call *call) ASSERTCMP(call->rxcall, ==, NULL); ASSERT(!work_pending(&call->async_work)); - ASSERT(skb_queue_empty(&call->rx_queue)); ASSERT(call->type->name != NULL); kfree(call->request); @@ -207,7 +176,7 @@ static void afs_free_call(struct afs_call *call) static void afs_end_call_nofree(struct afs_call *call) { if (call->rxcall) { - rxrpc_kernel_end_call(call->rxcall); + rxrpc_kernel_end_call(afs_socket, call->rxcall); call->rxcall = NULL; } if (call->type->destructor) @@ -227,7 +196,7 @@ static void afs_end_call(struct afs_call *call) * allocate a call with flat request and reply buffers */ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, - size_t request_size, size_t reply_size) + size_t request_size, size_t reply_max) { struct afs_call *call; @@ -241,7 +210,7 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, call->type = type; call->request_size = request_size; - call->reply_max = reply_size; + call->reply_max = reply_max; if (request_size) { call->request = kmalloc(request_size, GFP_NOFS); @@ -249,14 +218,13 @@ struct afs_call *afs_alloc_flat_call(const struct afs_call_type *type, goto nomem_free; } - if (reply_size) { - call->buffer = kmalloc(reply_size, GFP_NOFS); + if (reply_max) { + call->buffer = kmalloc(reply_max, GFP_NOFS); if (!call->buffer) goto nomem_free; } init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); return call; nomem_free: @@ -325,8 +293,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg, * returns from sending the request */ if (first + loop >= last) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(call->rxcall, msg, - to - offset); + ret = rxrpc_kernel_send_data(afs_socket, call->rxcall, + msg, to - offset); kunmap(pages[loop]); if (ret < 0) break; @@ -354,7 +322,6 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, struct msghdr msg; struct kvec iov[1]; int ret; - struct sk_buff *skb; _enter("%x,{%d},", addr->s_addr, ntohs(call->port)); @@ -366,8 +333,7 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, atomic_read(&afs_outstanding_calls)); call->wait_mode = wait_mode; - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); memset(&srx, 0, sizeof(srx)); srx.srx_family = AF_RXRPC; @@ -380,7 +346,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, /* create a call */ rxcall = rxrpc_kernel_begin_call(afs_socket, &srx, call->key, - (unsigned long) call, gfp); + (unsigned long) call, gfp, + wait_mode->notify_rx); call->key = NULL; if (IS_ERR(rxcall)) { ret = PTR_ERR(rxcall); @@ -406,7 +373,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, * request */ if (!call->send_pages) call->state = AFS_CALL_AWAIT_REPLY; - ret = rxrpc_kernel_send_data(rxcall, &msg, call->request_size); + ret = rxrpc_kernel_send_data(afs_socket, rxcall, + &msg, call->request_size); if (ret < 0) goto error_do_abort; @@ -421,150 +389,85 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp, return wait_mode->wait(call); error_do_abort: - rxrpc_kernel_abort_call(rxcall, RX_USER_ABORT); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); + rxrpc_kernel_abort_call(afs_socket, rxcall, RX_USER_ABORT, -ret, "KSD"); error_kill_call: afs_end_call(call); _leave(" = %d", ret); return ret; } -/* - * Handles intercepted messages that were arriving in the socket's Rx queue. - * - * Called from the AF_RXRPC call processor in waitqueue process context. For - * each call, it is guaranteed this will be called in order of packet to be - * delivered. - */ -static void afs_rx_interceptor(struct sock *sk, unsigned long user_call_ID, - struct sk_buff *skb) -{ - struct afs_call *call = (struct afs_call *) user_call_ID; - - _enter("%p,,%u", call, skb->mark); - - _debug("ICPT %p{%u} [%d]", - skb, skb->mark, atomic_read(&afs_outstanding_skbs)); - - ASSERTCMP(sk, ==, afs_socket->sk); - atomic_inc(&afs_outstanding_skbs); - - if (!call) { - /* its an incoming call for our callback service */ - skb_queue_tail(&afs_incoming_calls, skb); - queue_work(afs_wq, &afs_collect_incoming_call_work); - } else { - /* route the messages directly to the appropriate call */ - skb_queue_tail(&call->rx_queue, skb); - call->wait_mode->rx_wakeup(call); - } - - _leave(""); -} - /* * deliver messages to a call */ static void afs_deliver_to_call(struct afs_call *call) { - struct sk_buff *skb; - bool last; u32 abort_code; int ret; - _enter(""); + _enter("%s", call->type->name); - while ((call->state == AFS_CALL_AWAIT_REPLY || - call->state == AFS_CALL_AWAIT_OP_ID || - call->state == AFS_CALL_AWAIT_REQUEST || - call->state == AFS_CALL_AWAIT_ACK) && - (skb = skb_dequeue(&call->rx_queue))) { - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: - _debug("Rcv DATA"); - last = rxrpc_kernel_is_data_last(skb); - ret = call->type->deliver(call, skb, last); - switch (ret) { - case -EAGAIN: - if (last) { - _debug("short data"); - goto unmarshal_error; - } - break; - case 0: - ASSERT(last); - if (call->state == AFS_CALL_AWAIT_REPLY) - call->state = AFS_CALL_COMPLETE; - break; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - goto do_abort; - case -ENOTSUPP: - abort_code = RX_INVALID_OPERATION; - goto do_abort; - default: - unmarshal_error: - abort_code = RXGEN_CC_UNMARSHAL; - if (call->state != AFS_CALL_AWAIT_REPLY) - abort_code = RXGEN_SS_UNMARSHAL; - do_abort: - rxrpc_kernel_abort_call(call->rxcall, - abort_code); - call->error = ret; - call->state = AFS_CALL_ERROR; - break; + while (call->state == AFS_CALL_AWAIT_REPLY || + call->state == AFS_CALL_AWAIT_OP_ID || + call->state == AFS_CALL_AWAIT_REQUEST || + call->state == AFS_CALL_AWAIT_ACK + ) { + if (call->state == AFS_CALL_AWAIT_ACK) { + size_t offset = 0; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + NULL, 0, &offset, false, + &call->abort_code); + if (ret == -EINPROGRESS || ret == -EAGAIN) + return; + if (ret == 1) { + call->state = AFS_CALL_COMPLETE; + goto done; } - break; - case RXRPC_SKB_MARK_FINAL_ACK: - _debug("Rcv ACK"); - call->state = AFS_CALL_COMPLETE; - break; - case RXRPC_SKB_MARK_BUSY: - _debug("Rcv BUSY"); - call->error = -EBUSY; - call->state = AFS_CALL_BUSY; - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Rcv ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = rxrpc_kernel_get_abort_code(skb); - call->error = call->type->abort_to_error(abort_code); - call->state = AFS_CALL_ABORTED; - _debug("Loc ABORT %u -> %d", abort_code, call->error); - break; - case RXRPC_SKB_MARK_NET_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv NET ERROR %d", call->error); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - call->error = -rxrpc_kernel_get_error_number(skb); - call->state = AFS_CALL_ERROR; - _debug("Rcv LOCAL ERROR %d", call->error); - break; - default: - BUG(); - break; + return; } - afs_free_skb(skb); - } - - /* make sure the queue is empty if the call is done with (we might have - * aborted the call early because of an unmarshalling error) */ - if (call->state >= AFS_CALL_COMPLETE) { - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); - if (call->incoming) - afs_end_call(call); + ret = call->type->deliver(call); + switch (ret) { + case 0: + if (call->state == AFS_CALL_AWAIT_REPLY) + call->state = AFS_CALL_COMPLETE; + goto done; + case -EINPROGRESS: + case -EAGAIN: + goto out; + case -ENOTCONN: + abort_code = RX_CALL_DEAD; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code, -ret, "KNC"); + goto do_abort; + case -ENOTSUPP: + abort_code = RX_INVALID_OPERATION; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code, -ret, "KIV"); + goto do_abort; + case -ENODATA: + case -EBADMSG: + case -EMSGSIZE: + default: + abort_code = RXGEN_CC_UNMARSHAL; + if (call->state != AFS_CALL_AWAIT_REPLY) + abort_code = RXGEN_SS_UNMARSHAL; + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + abort_code, EBADMSG, "KUM"); + goto do_abort; + } } +done: + if (call->state == AFS_CALL_COMPLETE && call->incoming) + afs_end_call(call); +out: _leave(""); + return; + +do_abort: + call->error = ret; + call->state = AFS_CALL_COMPLETE; + goto done; } /* @@ -572,7 +475,7 @@ static void afs_deliver_to_call(struct afs_call *call) */ static int afs_wait_for_call_to_complete(struct afs_call *call) { - struct sk_buff *skb; + const char *abort_why; int ret; DECLARE_WAITQUEUE(myself, current); @@ -584,15 +487,18 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) set_current_state(TASK_INTERRUPTIBLE); /* deliver any messages that are in the queue */ - if (!skb_queue_empty(&call->rx_queue)) { + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; __set_current_state(TASK_RUNNING); afs_deliver_to_call(call); continue; } + abort_why = "KWC"; ret = call->error; - if (call->state >= AFS_CALL_COMPLETE) + if (call->state == AFS_CALL_COMPLETE) break; + abort_why = "KWI"; ret = -EINTR; if (signal_pending(current)) break; @@ -605,9 +511,8 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* kill the call */ if (call->state < AFS_CALL_COMPLETE) { _debug("call incomplete"); - rxrpc_kernel_abort_call(call->rxcall, RX_CALL_DEAD); - while ((skb = skb_dequeue(&call->rx_queue))) - afs_free_skb(skb); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_CALL_DEAD, -ret, abort_why); } _debug("call complete"); @@ -619,17 +524,24 @@ static int afs_wait_for_call_to_complete(struct afs_call *call) /* * wake up a waiting call */ -static void afs_wake_up_call_waiter(struct afs_call *call) +static void afs_wake_up_call_waiter(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; wake_up(&call->waitq); } /* * wake up an asynchronous call */ -static void afs_wake_up_async_call(struct afs_call *call) +static void afs_wake_up_async_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) { - _enter(""); + struct afs_call *call = (struct afs_call *)call_user_ID; + + call->need_attention = true; queue_work(afs_async_calls, &call->async_work); } @@ -647,8 +559,10 @@ static int afs_dont_wait_for_call_to_complete(struct afs_call *call) /* * delete an asynchronous call */ -static void afs_delete_async_call(struct afs_call *call) +static void afs_delete_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); afs_free_call(call); @@ -658,17 +572,19 @@ static void afs_delete_async_call(struct afs_call *call) /* * perform processing on an asynchronous call - * - on a multiple-thread workqueue this work item may try to run on several - * CPUs at the same time */ -static void afs_process_async_call(struct afs_call *call) +static void afs_process_async_call(struct work_struct *work) { + struct afs_call *call = container_of(work, struct afs_call, async_work); + _enter(""); - if (!skb_queue_empty(&call->rx_queue)) + if (call->state < AFS_CALL_COMPLETE && call->need_attention) { + call->need_attention = false; afs_deliver_to_call(call); + } - if (call->state >= AFS_CALL_COMPLETE && call->wait_mode) { + if (call->state == AFS_CALL_COMPLETE && call->wait_mode) { if (call->wait_mode->async_complete) call->wait_mode->async_complete(call->reply, call->error); @@ -679,122 +595,93 @@ static void afs_process_async_call(struct afs_call *call) /* we can't just delete the call because the work item may be * queued */ - call->async_workfn = afs_delete_async_call; + call->async_work.func = afs_delete_async_call; queue_work(afs_async_calls, &call->async_work); } _leave(""); } -/* - * Empty a socket buffer into a flat reply buffer. - */ -int afs_transfer_reply(struct afs_call *call, struct sk_buff *skb, bool last) +static void afs_rx_attach(struct rxrpc_call *rxcall, unsigned long user_call_ID) { - size_t len = skb->len; + struct afs_call *call = (struct afs_call *)user_call_ID; - if (len > call->reply_max - call->reply_size) { - _leave(" = -EBADMSG [%zu > %u]", - len, call->reply_max - call->reply_size); - return -EBADMSG; - } - - if (len > 0) { - if (skb_copy_bits(skb, 0, call->buffer + call->reply_size, - len) < 0) - BUG(); - call->reply_size += len; - } - - afs_data_consumed(call, skb); - if (!last) - return -EAGAIN; - - if (call->reply_size != call->reply_max) { - _leave(" = -EBADMSG [%u != %u]", - call->reply_size, call->reply_max); - return -EBADMSG; - } - return 0; + call->rxcall = rxcall; } /* - * accept the backlog of incoming calls + * Charge the incoming call preallocation. */ -static void afs_collect_incoming_call(struct work_struct *work) +static void afs_charge_preallocation(struct work_struct *work) { - struct rxrpc_call *rxcall; - struct afs_call *call = NULL; - struct sk_buff *skb; - - while ((skb = skb_dequeue(&afs_incoming_calls))) { - _debug("new call"); - - /* don't need the notification */ - afs_free_skb(skb); + struct afs_call *call = afs_spare_incoming_call; + for (;;) { if (!call) { call = kzalloc(sizeof(struct afs_call), GFP_KERNEL); - if (!call) { - rxrpc_kernel_reject_call(afs_socket); - return; - } + if (!call) + break; - call->async_workfn = afs_process_async_call; - INIT_WORK(&call->async_work, afs_async_workfn); + INIT_WORK(&call->async_work, afs_process_async_call); call->wait_mode = &afs_async_incoming_call; call->type = &afs_RXCMxxxx; init_waitqueue_head(&call->waitq); - skb_queue_head_init(&call->rx_queue); call->state = AFS_CALL_AWAIT_OP_ID; - - _debug("CALL %p{%s} [%d]", - call, call->type->name, - atomic_read(&afs_outstanding_calls)); - atomic_inc(&afs_outstanding_calls); } - rxcall = rxrpc_kernel_accept_call(afs_socket, - (unsigned long) call); - if (!IS_ERR(rxcall)) { - call->rxcall = rxcall; - call = NULL; - } + if (rxrpc_kernel_charge_accept(afs_socket, + afs_wake_up_async_call, + afs_rx_attach, + (unsigned long)call, + GFP_KERNEL) < 0) + break; + call = NULL; } + afs_spare_incoming_call = call; +} - if (call) - afs_free_call(call); +/* + * Discard a preallocated call when a socket is shut down. + */ +static void afs_rx_discard_new_call(struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + struct afs_call *call = (struct afs_call *)user_call_ID; + + atomic_inc(&afs_outstanding_calls); + call->rxcall = NULL; + afs_free_call(call); +} + +/* + * Notification of an incoming call. + */ +static void afs_rx_new_call(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long user_call_ID) +{ + atomic_inc(&afs_outstanding_calls); + queue_work(afs_wq, &afs_charge_preallocation_work); } /* * Grab the operation ID from an incoming cache manager call. The socket * buffer is discarded on error or if we don't yet have sufficient data. */ -static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, - bool last) +static int afs_deliver_cm_op_id(struct afs_call *call) { - size_t len = skb->len; - void *oibuf = (void *) &call->operation_ID; + int ret; - _enter("{%u},{%zu},%d", call->offset, len, last); + _enter("{%zu}", call->offset); ASSERTCMP(call->offset, <, 4); /* the operation ID forms the first four bytes of the request data */ - len = min_t(size_t, len, 4 - call->offset); - if (skb_copy_bits(skb, 0, oibuf + call->offset, len) < 0) - BUG(); - if (!pskb_pull(skb, len)) - BUG(); - call->offset += len; - - if (call->offset < 4) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; - } + ret = afs_extract_data(call, &call->operation_ID, 4, true); + if (ret < 0) + return ret; call->state = AFS_CALL_AWAIT_REQUEST; + call->offset = 0; /* ask the cache manager to route the call (it'll change the call type * if successful) */ @@ -803,7 +690,7 @@ static int afs_deliver_cm_op_id(struct afs_call *call, struct sk_buff *skb, /* pass responsibility for the remainer of this message off to the * cache manager op */ - return call->type->deliver(call, skb, last); + return call->type->deliver(call); } /* @@ -823,14 +710,15 @@ void afs_send_empty_reply(struct afs_call *call) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - switch (rxrpc_kernel_send_data(call->rxcall, &msg, 0)) { + switch (rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, 0)) { case 0: _leave(" [replied]"); return; case -ENOMEM: _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT, ENOMEM, "KOO"); default: afs_end_call(call); _leave(" [error]"); @@ -859,7 +747,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) msg.msg_flags = 0; call->state = AFS_CALL_AWAIT_ACK; - n = rxrpc_kernel_send_data(call->rxcall, &msg, len); + n = rxrpc_kernel_send_data(afs_socket, call->rxcall, &msg, len); if (n >= 0) { /* Success */ _leave(" [replied]"); @@ -868,7 +756,8 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) if (n == -ENOMEM) { _debug("oom"); - rxrpc_kernel_abort_call(call->rxcall, RX_USER_ABORT); + rxrpc_kernel_abort_call(afs_socket, call->rxcall, + RX_USER_ABORT, ENOMEM, "KOO"); } afs_end_call(call); _leave(" [error]"); @@ -877,25 +766,40 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len) /* * Extract a piece of data from the received data socket buffers. */ -int afs_extract_data(struct afs_call *call, struct sk_buff *skb, - bool last, void *buf, size_t count) +int afs_extract_data(struct afs_call *call, void *buf, size_t count, + bool want_more) { - size_t len = skb->len; + int ret; - _enter("{%u},{%zu},%d,,%zu", call->offset, len, last, count); + _enter("{%s,%zu},,%zu,%d", + call->type->name, call->offset, count, want_more); - ASSERTCMP(call->offset, <, count); + ASSERTCMP(call->offset, <=, count); - len = min_t(size_t, len, count - call->offset); - if (skb_copy_bits(skb, 0, buf + call->offset, len) < 0 || - !pskb_pull(skb, len)) - BUG(); - call->offset += len; + ret = rxrpc_kernel_recv_data(afs_socket, call->rxcall, + buf, count, &call->offset, + want_more, &call->abort_code); + if (ret == 0 || ret == -EAGAIN) + return ret; - if (call->offset < count) { - afs_data_consumed(call, skb); - _leave(" = -EAGAIN"); - return -EAGAIN; + if (ret == 1) { + switch (call->state) { + case AFS_CALL_AWAIT_REPLY: + call->state = AFS_CALL_COMPLETE; + break; + case AFS_CALL_AWAIT_REQUEST: + call->state = AFS_CALL_REPLYING; + break; + default: + break; + } + return 0; } - return 0; + + if (ret == -ECONNABORTED) + call->error = call->type->abort_to_error(call->abort_code); + else + call->error = ret; + call->state = AFS_CALL_COMPLETE; + return ret; } diff --git a/fs/afs/server.c b/fs/afs/server.c index f342acf3547d..d4066ab7dd55 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -178,13 +178,18 @@ server_in_two_cells: /* * look up a server by its IP address */ -struct afs_server *afs_find_server(const struct in_addr *_addr) +struct afs_server *afs_find_server(const struct sockaddr_rxrpc *srx) { struct afs_server *server = NULL; struct rb_node *p; - struct in_addr addr = *_addr; + struct in_addr addr = srx->transport.sin.sin_addr; - _enter("%pI4", &addr.s_addr); + _enter("{%d,%pI4}", srx->transport.family, &addr.s_addr); + + if (srx->transport.family != AF_INET) { + WARN(true, "AFS does not yes support non-IPv4 addresses\n"); + return NULL; + } read_lock(&afs_servers_lock); diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c index f94d1abdc3eb..94bcd97d22b8 100644 --- a/fs/afs/vlclient.c +++ b/fs/afs/vlclient.c @@ -58,17 +58,16 @@ static int afs_vl_abort_to_error(u32 abort_code) /* * deliver reply data to a VL.GetEntryByXXX call */ -static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call, - struct sk_buff *skb, bool last) +static int afs_deliver_vl_get_entry_by_xxx(struct afs_call *call) { struct afs_cache_vlocation *entry; __be32 *bp; u32 tmp; int loop, ret; - _enter(",,%u", last); + _enter(""); - ret = afs_transfer_reply(call, skb, last); + ret = afs_transfer_reply(call); if (ret < 0) return ret; diff --git a/fs/afs/vlocation.c b/fs/afs/vlocation.c index 52976785a32c..45a86396fd2d 100644 --- a/fs/afs/vlocation.c +++ b/fs/afs/vlocation.c @@ -594,8 +594,8 @@ static void afs_vlocation_reaper(struct work_struct *work) */ int __init afs_vlocation_update_init(void) { - afs_vlocation_update_worker = - create_singlethread_workqueue("kafs_vlupdated"); + afs_vlocation_update_worker = alloc_workqueue("kafs_vlupdated", + WQ_MEM_RECLAIM, 0); return afs_vlocation_update_worker ? 0 : -ENOMEM; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index c633476616e0..bca66d83a765 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -390,6 +390,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, atomic_set(&ent->count, 1); spin_lock_init(&ent->pde_unload_lock); INIT_LIST_HEAD(&ent->pde_openers); + proc_set_user(ent, (*parent)->uid, (*parent)->gid); + out: return ent; } diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index c8bbc68cdb05..7ae6b1da7cab 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -185,6 +186,8 @@ const struct file_operations proc_net_operations = { static __net_init int proc_net_ns_init(struct net *net) { struct proc_dir_entry *netd, *net_statd; + kuid_t uid; + kgid_t gid; int err; err = -ENOMEM; @@ -199,6 +202,16 @@ static __net_init int proc_net_ns_init(struct net *net) netd->parent = &proc_root; memcpy(netd->name, "net", 4); + uid = make_kuid(net->user_ns, 0); + if (!uid_valid(uid)) + uid = netd->uid; + + gid = make_kgid(net->user_ns, 0); + if (!gid_valid(gid)) + gid = netd->gid; + + proc_set_user(netd, uid, gid); + err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); if (!net_statd) diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 1b93650dda2f..2ed3d71d4767 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -430,6 +430,7 @@ static int sysctl_perm(struct ctl_table_header *head, struct ctl_table *table, i static struct inode *proc_sys_make_inode(struct super_block *sb, struct ctl_table_header *head, struct ctl_table *table) { + struct ctl_table_root *root = head->root; struct inode *inode; struct proc_inode *ei; @@ -457,6 +458,10 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (is_empty_dir(head)) make_empty_dir_inode(inode); } + + if (root->set_ownership) + root->set_ownership(head, table, &inode->i_uid, &inode->i_gid); + out: return inode; } diff --git a/include/dt-bindings/net/mscc-phy-vsc8531.h b/include/dt-bindings/net/mscc-phy-vsc8531.h new file mode 100644 index 000000000000..2383dd20ff43 --- /dev/null +++ b/include/dt-bindings/net/mscc-phy-vsc8531.h @@ -0,0 +1,21 @@ +/* + * Device Tree constants for Microsemi VSC8531 PHY + * + * Author: Nagaraju Lakkaraju + * + * License: Dual MIT/GPL + * Copyright (c) 2016 Microsemi Corporation + */ + +#ifndef _DT_BINDINGS_MSCC_VSC8531_H +#define _DT_BINDINGS_MSCC_VSC8531_H + +/* MAC interface Edge rate control VDDMAC in milli Volts */ +#define MSCC_VDDMAC_3300 3300 +#define MSCC_VDDMAC_2500 2500 +#define MSCC_VDDMAC_1800 1800 +#define MSCC_VDDMAC_1500 1500 +#define MSCC_VDDMAC_MAX 4 +#define MSCC_SLOWDOWN_MAX 8 + +#endif diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 3db25df396cb..8eeedb2db924 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -205,6 +205,9 @@ struct bcma_host_ops { #define BCMA_PKG_ID_BCM4709 0 #define BCMA_CHIP_ID_BCM47094 53030 #define BCMA_CHIP_ID_BCM53018 53018 +#define BCMA_CHIP_ID_BCM53573 53573 +#define BCMA_PKG_ID_BCM53573 0 +#define BCMA_PKG_ID_BCM47189 1 /* Board types (on PCI usually equals to the subsystem dev id) */ /* BCM4313 */ diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h index 4901fb358b07..9986f8288d01 100644 --- a/include/linux/bcma/bcma_regs.h +++ b/include/linux/bcma/bcma_regs.h @@ -24,6 +24,7 @@ #define BCMA_CLKCTLST_4328A0_HAVEALP 0x00020000 /* 4328a0 has reversed bits */ /* Agent registers (common for every core) */ +#define BCMA_OOB_SEL_OUT_A30 0x0100 #define BCMA_IOCTL 0x0408 /* IO control */ #define BCMA_IOCTL_CLK 0x0001 #define BCMA_IOCTL_FGC 0x0002 diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h new file mode 100644 index 000000000000..f6505d83069d --- /dev/null +++ b/include/linux/bitfield.h @@ -0,0 +1,93 @@ +/* + * Copyright (C) 2014 Felix Fietkau + * Copyright (C) 2004 - 2009 Ivo van Doorn + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef _LINUX_BITFIELD_H +#define _LINUX_BITFIELD_H + +#include + +/* + * Bitfield access macros + * + * FIELD_{GET,PREP} macros take as first parameter shifted mask + * from which they extract the base mask and shift amount. + * Mask must be a compilation time constant. + * + * Example: + * + * #define REG_FIELD_A GENMASK(6, 0) + * #define REG_FIELD_B BIT(7) + * #define REG_FIELD_C GENMASK(15, 8) + * #define REG_FIELD_D GENMASK(31, 16) + * + * Get: + * a = FIELD_GET(REG_FIELD_A, reg); + * b = FIELD_GET(REG_FIELD_B, reg); + * + * Set: + * reg = FIELD_PREP(REG_FIELD_A, 1) | + * FIELD_PREP(REG_FIELD_B, 0) | + * FIELD_PREP(REG_FIELD_C, c) | + * FIELD_PREP(REG_FIELD_D, 0x40); + * + * Modify: + * reg &= ~REG_FIELD_C; + * reg |= FIELD_PREP(REG_FIELD_C, c); + */ + +#define __bf_shf(x) (__builtin_ffsll(x) - 1) + +#define __BF_FIELD_CHECK(_mask, _reg, _val, _pfx) \ + ({ \ + BUILD_BUG_ON_MSG(!__builtin_constant_p(_mask), \ + _pfx "mask is not constant"); \ + BUILD_BUG_ON_MSG(!(_mask), _pfx "mask is zero"); \ + BUILD_BUG_ON_MSG(__builtin_constant_p(_val) ? \ + ~((_mask) >> __bf_shf(_mask)) & (_val) : 0, \ + _pfx "value too large for the field"); \ + BUILD_BUG_ON_MSG((_mask) > (typeof(_reg))~0ull, \ + _pfx "type of reg too small for mask"); \ + __BUILD_BUG_ON_NOT_POWER_OF_2((_mask) + \ + (1ULL << __bf_shf(_mask))); \ + }) + +/** + * FIELD_PREP() - prepare a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_val: value to put in the field + * + * FIELD_PREP() masks and shifts up the value. The result should + * be combined with other fields of the bitfield using logical OR. + */ +#define FIELD_PREP(_mask, _val) \ + ({ \ + __BF_FIELD_CHECK(_mask, 0ULL, _val, "FIELD_PREP: "); \ + ((typeof(_mask))(_val) << __bf_shf(_mask)) & (_mask); \ + }) + +/** + * FIELD_GET() - extract a bitfield element + * @_mask: shifted mask defining the field's length and position + * @_reg: 32bit value of entire bitfield + * + * FIELD_GET() extracts the field specified by @_mask from the + * bitfield passed in as @_reg by masking and shifting it down. + */ +#define FIELD_GET(_mask, _reg) \ + ({ \ + __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ + (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ + }) + +#endif diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 11134238417d..c201017b5730 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -96,6 +96,7 @@ enum bpf_return_type { struct bpf_func_proto { u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); bool gpl_only; + bool pkt_access; enum bpf_return_type ret_type; enum bpf_arg_type arg1_type; enum bpf_arg_type arg2_type; @@ -138,6 +139,13 @@ enum bpf_reg_type { */ PTR_TO_PACKET, PTR_TO_PACKET_END, /* skb->data + headlen */ + + /* PTR_TO_MAP_VALUE_ADJ is used for doing pointer math inside of a map + * elem value. We only allow this if we can statically verify that + * access from this register are going to fall within the size of the + * map element. + */ + PTR_TO_MAP_VALUE_ADJ, }; struct bpf_prog; @@ -151,7 +159,8 @@ struct bpf_verifier_ops { */ bool (*is_valid_access)(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type); - + int (*gen_prologue)(struct bpf_insn *insn, bool direct_write, + const struct bpf_prog *prog); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn, struct bpf_prog *prog); @@ -297,6 +306,10 @@ static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) static inline void bpf_prog_put(struct bpf_prog *prog) { } +static inline struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h new file mode 100644 index 000000000000..7035b997aaa5 --- /dev/null +++ b/include/linux/bpf_verifier.h @@ -0,0 +1,102 @@ +/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _LINUX_BPF_VERIFIER_H +#define _LINUX_BPF_VERIFIER_H 1 + +#include /* for enum bpf_reg_type */ +#include /* for MAX_BPF_STACK */ + + /* Just some arbitrary values so we can safely do math without overflowing and + * are obviously wrong for any sort of memory access. + */ +#define BPF_REGISTER_MAX_RANGE (1024 * 1024 * 1024) +#define BPF_REGISTER_MIN_RANGE -(1024 * 1024 * 1024) + +struct bpf_reg_state { + enum bpf_reg_type type; + /* + * Used to determine if any memory access using this register will + * result in a bad access. + */ + u64 min_value, max_value; + union { + /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ + s64 imm; + + /* valid when type == PTR_TO_PACKET* */ + struct { + u32 id; + u16 off; + u16 range; + }; + + /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | + * PTR_TO_MAP_VALUE_OR_NULL + */ + struct bpf_map *map_ptr; + }; +}; + +enum bpf_stack_slot_type { + STACK_INVALID, /* nothing was stored in this stack slot */ + STACK_SPILL, /* register spilled into stack */ + STACK_MISC /* BPF program wrote some data into this slot */ +}; + +#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ + +/* state of the program: + * type of all registers and stack info + */ +struct bpf_verifier_state { + struct bpf_reg_state regs[MAX_BPF_REG]; + u8 stack_slot_type[MAX_BPF_STACK]; + struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; +}; + +/* linked list of verifier states used to prune search */ +struct bpf_verifier_state_list { + struct bpf_verifier_state state; + struct bpf_verifier_state_list *next; +}; + +struct bpf_insn_aux_data { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ +}; + +#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ + +struct bpf_verifier_env; +struct bpf_ext_analyzer_ops { + int (*insn_hook)(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx); +}; + +/* single container for all structs + * one verifier_env per bpf_check() call + */ +struct bpf_verifier_env { + struct bpf_prog *prog; /* eBPF program being verified */ + struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ + int stack_size; /* number of states to be processed */ + struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ + const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + void *analyzer_priv; /* pointer to external analyzer's private data */ + struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ + u32 used_map_cnt; /* number of used maps */ + u32 id_gen; /* used to generate unique reg IDs */ + bool allow_ptr_leaks; + bool seen_direct_write; + bool varlen_map_value_access; + struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ +}; + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv); + +#endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/include/linux/bug.h b/include/linux/bug.h index e51b0709e78d..292d6a10b0c2 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -13,6 +13,7 @@ enum bug_trap_type { struct pt_regs; #ifdef __CHECKER__ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) (0) #define BUILD_BUG_ON_ZERO(e) (0) #define BUILD_BUG_ON_NULL(e) ((void*)0) @@ -24,6 +25,8 @@ struct pt_regs; #else /* __CHECKER__ */ /* Force a compilation error if a constant expression is not a power of 2 */ +#define __BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON(((n) & ((n) - 1)) != 0) #define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 984f73b719a9..a4414a11eea7 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -497,6 +497,23 @@ static inline bool cgroup_is_descendant(struct cgroup *cgrp, return cgrp->ancestor_ids[ancestor->level] == ancestor->id; } +/** + * task_under_cgroup_hierarchy - test task's membership of cgroup ancestry + * @task: the task to be tested + * @ancestor: possible ancestor of @task's cgroup + * + * Tests whether @task's default cgroup hierarchy is a descendant of @ancestor. + * It follows all the same rules as cgroup_is_descendant, and only applies + * to the default hierarchy. + */ +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + struct css_set *cset = task_css_set(task); + + return cgroup_is_descendant(cset->dfl_cgrp, ancestor); +} + /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_is_populated(struct cgroup *cgrp) { @@ -557,6 +574,7 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; +struct cgroup; static inline void css_put(struct cgroup_subsys_state *css) {} static inline int cgroup_attach_task_all(struct task_struct *from, @@ -574,6 +592,11 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline bool task_under_cgroup_hierarchy(struct task_struct *task, + struct cgroup *ancestor) +{ + return true; +} #endif /* !CONFIG_CGROUPS */ /* diff --git a/include/linux/filter.h b/include/linux/filter.h index a16439b99fd9..1f09c521adfe 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -314,6 +314,70 @@ struct bpf_prog_aux; bpf_size; \ }) +#define BPF_SIZEOF(type) \ + ({ \ + const int __size = bytes_to_bpf_size(sizeof(type)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define BPF_FIELD_SIZEOF(type, field) \ + ({ \ + const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \ + BUILD_BUG_ON(__size < 0); \ + __size; \ + }) + +#define __BPF_MAP_0(m, v, ...) v +#define __BPF_MAP_1(m, v, t, a, ...) m(t, a) +#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__) +#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__) +#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__) +#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__) + +#define __BPF_REG_0(...) __BPF_PAD(5) +#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4) +#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3) +#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2) +#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1) +#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__) + +#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__) +#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__) + +#define __BPF_CAST(t, a) \ + (__force t) \ + (__force \ + typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long), \ + (unsigned long)0, (t)0))) a +#define __BPF_V void +#define __BPF_N + +#define __BPF_DECL_ARGS(t, a) t a +#define __BPF_DECL_REGS(t, a) u64 a + +#define __BPF_PAD(n) \ + __BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2, \ + u64, __ur_3, u64, __ur_4, u64, __ur_5) + +#define BPF_CALL_x(x, name, ...) \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)); \ + u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__)) \ + { \ + return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\ + } \ + static __always_inline \ + u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__)) + +#define BPF_CALL_0(name, ...) BPF_CALL_x(0, name, __VA_ARGS__) +#define BPF_CALL_1(name, ...) BPF_CALL_x(1, name, __VA_ARGS__) +#define BPF_CALL_2(name, ...) BPF_CALL_x(2, name, __VA_ARGS__) +#define BPF_CALL_3(name, ...) BPF_CALL_x(3, name, __VA_ARGS__) +#define BPF_CALL_4(name, ...) BPF_CALL_x(4, name, __VA_ARGS__) +#define BPF_CALL_5(name, ...) BPF_CALL_x(5, name, __VA_ARGS__) + #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index cd184bdca58f..6824556d37ed 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1169,6 +1169,13 @@ int __must_check __vmbus_driver_register(struct hv_driver *hv_driver, const char *mod_name); void vmbus_driver_unregister(struct hv_driver *hv_driver); +static inline const char *vmbus_dev_name(const struct hv_device *device_obj) +{ + const struct kobject *kobj = &device_obj->device.kobj; + + return kobj->name; +} + void vmbus_hvsock_device_unregister(struct vmbus_channel *channel); int vmbus_allocate_mmio(struct resource **new, struct hv_device *device_obj, diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dcb89e3515db..c6587c01d951 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -45,6 +45,7 @@ struct br_ip_list { #define BR_PROXYARP BIT(8) #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +#define BR_MCAST_FLOOD BIT(11) #define BR_DEFAULT_AGEING_TIME (300 * HZ) diff --git a/include/linux/if_link.h b/include/linux/if_link.h index f923d15b432c..0b17c585b5cd 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -25,5 +25,6 @@ struct ifla_vf_info { __u32 max_tx_rate; __u32 rss_query_en; __u32 trusted; + __be16 vlan_proto; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index a5f6ce6b578c..3319d97d789d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -81,6 +81,7 @@ static inline bool is_vlan_dev(const struct net_device *dev) #define skb_vlan_tag_present(__skb) ((__skb)->vlan_tci & VLAN_TAG_PRESENT) #define skb_vlan_tag_get(__skb) ((__skb)->vlan_tci & ~VLAN_TAG_PRESENT) #define skb_vlan_tag_get_id(__skb) ((__skb)->vlan_tci & VLAN_VID_MASK) +#define skb_vlan_tag_get_prio(__skb) ((__skb)->vlan_tci & VLAN_PRIO_MASK) /** * struct vlan_pcpu_stats - VLAN percpu rx/tx stats @@ -271,6 +272,23 @@ static inline int vlan_get_encap_level(struct net_device *dev) } #endif +/** + * eth_type_vlan - check for valid vlan ether type. + * @ethertype: ether type to check + * + * Returns true if the ether type is a vlan ether type. + */ +static inline bool eth_type_vlan(__be16 ethertype) +{ + switch (ethertype) { + case htons(ETH_P_8021Q): + case htons(ETH_P_8021AD): + return true; + default: + return false; + } +} + static inline bool vlan_hw_offload_capable(netdev_features_t features, __be16 proto) { @@ -424,8 +442,7 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci) { struct vlan_ethhdr *veth = (struct vlan_ethhdr *)skb->data; - if (veth->h_vlan_proto != htons(ETH_P_8021Q) && - veth->h_vlan_proto != htons(ETH_P_8021AD)) + if (!eth_type_vlan(veth->h_vlan_proto)) return -EINVAL; *vlan_tci = ntohs(veth->h_vlan_TCI); @@ -487,7 +504,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, * present at mac_len - VLAN_HLEN (if mac_len > 0), or at * ETH_HLEN otherwise */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { + if (eth_type_vlan(type)) { if (vlan_depth) { if (WARN_ON(vlan_depth < VLAN_HLEN)) return 0; @@ -505,8 +522,7 @@ static inline __be16 __vlan_get_protocol(struct sk_buff *skb, __be16 type, vh = (struct vlan_hdr *)(skb->data + vlan_depth); type = vh->h_vlan_encapsulated_proto; vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); + } while (eth_type_vlan(type)); } if (depth) @@ -571,8 +587,7 @@ static inline void vlan_set_encap_proto(struct sk_buff *skb, static inline bool skb_vlan_tagged(const struct sk_buff *skb) { if (!skb_vlan_tag_present(skb) && - likely(skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD))) + likely(!eth_type_vlan(skb->protocol))) return false; return true; @@ -592,15 +607,14 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) if (!skb_vlan_tag_present(skb)) { struct vlan_ethhdr *veh; - if (likely(protocol != htons(ETH_P_8021Q) && - protocol != htons(ETH_P_8021AD))) + if (likely(!eth_type_vlan(protocol))) return false; veh = (struct vlan_ethhdr *)skb->data; protocol = veh->h_vlan_encapsulated_proto; } - if (protocol != htons(ETH_P_8021Q) && protocol != htons(ETH_P_8021AD)) + if (!eth_type_vlan(protocol)) return false; return true; diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index feb04ea20f11..65da430e260f 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -37,7 +37,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 pid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh); + const struct nlmsghdr *unlh, bool net_admin); void inet_diag_dump_icsk(struct inet_hashinfo *h, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, @@ -56,7 +56,7 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); + struct user_namespace *user_ns, bool net_admin); extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index c6dbcd84a2c7..7e9a789be5e0 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -18,6 +18,7 @@ struct ipv6_devconf { __s32 dad_transmits; __s32 rtr_solicits; __s32 rtr_solicit_interval; + __s32 rtr_solicit_max_interval; __s32 rtr_solicit_delay; __s32 force_mld_version; __s32 mldv1_unsolicited_report_interval; diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 3ffc69ebe967..0fb7ffb1775f 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -238,6 +238,11 @@ static inline ktime_t ktime_sub_us(const ktime_t kt, const u64 usec) return ktime_sub_ns(kt, usec * NSEC_PER_USEC); } +static inline ktime_t ktime_sub_ms(const ktime_t kt, const u64 msec) +{ + return ktime_sub_ns(kt, msec * NSEC_PER_MSEC); +} + extern ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs); /** diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h index 116b284bc4ce..1f3568694a57 100644 --- a/include/linux/mlx4/cmd.h +++ b/include/linux/mlx4/cmd.h @@ -309,7 +309,8 @@ int mlx4_get_vf_stats(struct mlx4_dev *dev, int port, int vf_idx, struct ifla_vf_stats *vf_stats); u32 mlx4_comm_get_version(void); int mlx4_set_vf_mac(struct mlx4_dev *dev, int port, int vf, u64 mac); -int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos); +int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, + u8 qos, __be16 proto); int mlx4_set_vf_rate(struct mlx4_dev *dev, int port, int vf, int min_tx_rate, int max_tx_rate); int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting); diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 42da3552f7cb..59b50d3eedb4 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -221,6 +221,7 @@ enum { MLX4_DEV_CAP_FLAG2_ROCE_V1_V2 = 1ULL << 33, MLX4_DEV_CAP_FLAG2_DMFS_UC_MC_SNIFFER = 1ULL << 34, MLX4_DEV_CAP_FLAG2_DIAG_PER_PORT = 1ULL << 35, + MLX4_DEV_CAP_FLAG2_SVLAN_BY_QP = 1ULL << 36, }; enum { @@ -1371,6 +1372,8 @@ int mlx4_SET_PORT_fcs_check(struct mlx4_dev *dev, u8 port, int mlx4_SET_PORT_VXLAN(struct mlx4_dev *dev, u8 port, u8 steering, int enable); int set_phv_bit(struct mlx4_dev *dev, u8 port, int new_val); int get_phv_bit(struct mlx4_dev *dev, u8 port, int *phv); +int mlx4_get_is_vlan_offload_disabled(struct mlx4_dev *dev, u8 port, + bool *vlan_offload_disabled); int mlx4_find_cached_mac(struct mlx4_dev *dev, u8 port, u64 mac, int *idx); int mlx4_find_cached_vlan(struct mlx4_dev *dev, u8 port, u16 vid, int *idx); int mlx4_register_vlan(struct mlx4_dev *dev, u8 port, u16 vlan, int *index); diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index deaa2217214d..b4ee8f62ce8d 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -160,6 +160,7 @@ struct mlx4_qp_path { enum { /* fl */ MLX4_FL_CV = 1 << 6, + MLX4_FL_SV = 1 << 5, MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, @@ -267,6 +268,7 @@ enum { MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, + MLX4_UPD_QP_PATH_MASK_SV = 22 + 32, }; enum { /* param3 */ diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 2566f6d6444f..7c3c0d3aca37 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -170,12 +170,12 @@ static inline void mlx5_cq_arm(struct mlx5_core_cq *cq, u32 cmd, int mlx5_init_cq_table(struct mlx5_core_dev *dev); void mlx5_cleanup_cq_table(struct mlx5_core_dev *dev); int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_create_cq_mbox_in *in, int inlen); + u32 *in, int inlen); int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq); int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_query_cq_mbox_out *out); + u32 *out, int outlen); int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, - struct mlx5_modify_cq_mbox_in *in, int in_sz); + u32 *in, int inlen); int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, u16 cq_period, u16 cq_max_count); diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 0b6d15cddb2f..77c141797152 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -197,19 +197,6 @@ enum { MLX5_PCIE_CTRL_TPH_MASK = 3 << 4, }; -enum { - MLX5_ACCESS_MODE_PA = 0, - MLX5_ACCESS_MODE_MTT = 1, - MLX5_ACCESS_MODE_KLM = 2 -}; - -enum { - MLX5_MKEY_REMOTE_INVAL = 1 << 24, - MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, - MLX5_MKEY_BSF_EN = 1 << 30, - MLX5_MKEY_LEN64 = 1 << 31, -}; - enum { MLX5_EN_RD = (u64)1, MLX5_EN_WR = (u64)2 @@ -411,33 +398,6 @@ enum { MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 }; -struct mlx5_inbox_hdr { - __be16 opcode; - u8 rsvd[4]; - __be16 opmod; -}; - -struct mlx5_outbox_hdr { - u8 status; - u8 rsvd[3]; - __be32 syndrome; -}; - -struct mlx5_cmd_query_adapter_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_adapter_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[24]; - u8 intapin; - u8 rsvd1[13]; - __be16 vsd_vendor_id; - u8 vsd[208]; - u8 vsd_psid[16]; -}; - enum mlx5_odp_transport_cap_bits { MLX5_ODP_SUPPORT_SEND = 1 << 31, MLX5_ODP_SUPPORT_RECV = 1 << 30, @@ -455,30 +415,6 @@ struct mlx5_odp_caps { char reserved2[0xe4]; }; -struct mlx5_cmd_init_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_init_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_teardown_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 profile; - u8 rsvd1[4]; -}; - -struct mlx5_cmd_teardown_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; @@ -494,7 +430,6 @@ struct mlx5_cmd_layout { u8 status_own; }; - struct health_buffer { __be32 assert_var[5]; __be32 rsvd0[3]; @@ -856,245 +791,15 @@ struct mlx5_cqe128 { struct mlx5_cqe64 cqe64; }; -struct mlx5_srq_ctx { - u8 state_log_sz; - u8 rsvd0[3]; - __be32 flags_xrcd; - __be32 pgoff_cqn; - u8 rsvd1[4]; - u8 log_pg_sz; - u8 rsvd2[7]; - __be32 pd; - __be16 lwm; - __be16 wqe_cnt; - u8 rsvd3[8]; - __be64 db_record; -}; - -struct mlx5_create_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_srqn; - u8 rsvd0[4]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[208]; - __be64 pas[0]; -}; - -struct mlx5_create_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd[4]; -}; - -struct mlx5_destroy_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_srq_ctx ctx; - u8 rsvd1[32]; - __be64 pas[0]; -}; - -struct mlx5_arm_srq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 srqn; - __be16 rsvd; - __be16 lwm; -}; - -struct mlx5_arm_srq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cq_context { - u8 status; - u8 cqe_sz_flags; - u8 st; - u8 rsvd3; - u8 rsvd4[6]; - __be16 page_offset; - __be32 log_sz_usr_page; - __be16 cq_period; - __be16 cq_max_count; - __be16 rsvd20; - __be16 c_eqn; - u8 log_pg_sz; - u8 rsvd25[7]; - __be32 last_notified_index; - __be32 solicit_producer_index; - __be32 consumer_counter; - __be32 producer_counter; - u8 rsvd48[8]; - __be64 db_record_addr; -}; - -struct mlx5_create_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_cqn; - u8 rsvdx[4]; - struct mlx5_cq_context ctx; - u8 rsvd6[192]; - __be64 pas[0]; -}; - -struct mlx5_create_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - -struct mlx5_query_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - u8 rsvd0[4]; -}; - -struct mlx5_query_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; - struct mlx5_cq_context ctx; - u8 rsvd6[16]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 cqn; - __be32 field_select; - struct mlx5_cq_context ctx; - u8 rsvd[192]; - __be64 pas[0]; -}; - -struct mlx5_modify_cq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_enable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_disable_hca_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_eq_context { - u8 status; - u8 ec_oi; - u8 st; - u8 rsvd2[7]; - __be16 page_pffset; - __be32 log_sz_usr_page; - u8 rsvd3[7]; - u8 intr; - u8 log_page_size; - u8 rsvd4[15]; - __be32 consumer_counter; - __be32 produser_counter; - u8 rsvd5[16]; -}; - -struct mlx5_create_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 input_eqn; - u8 rsvd1[4]; - struct mlx5_eq_context ctx; - u8 rsvd2[8]; - __be64 events_mask; - u8 rsvd3[176]; - __be64 pas[0]; -}; - -struct mlx5_create_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[3]; - u8 eq_number; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_destroy_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_map_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - __be64 mask; - u8 mu; - u8 rsvd0[2]; - u8 eqn; - u8 rsvd1[24]; -}; - -struct mlx5_map_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_eq_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[3]; - u8 eqn; - u8 rsvd1[4]; -}; - -struct mlx5_query_eq_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - struct mlx5_eq_context ctx; +enum { + MLX5_MKEY_STATUS_FREE = 1 << 6, }; enum { - MLX5_MKEY_STATUS_FREE = 1 << 6, + MLX5_MKEY_REMOTE_INVAL = 1 << 24, + MLX5_MKEY_FLAG_SYNC_UMR = 1 << 29, + MLX5_MKEY_BSF_EN = 1 << 30, + MLX5_MKEY_LEN64 = 1 << 31, }; struct mlx5_mkey_seg { @@ -1119,134 +824,12 @@ struct mlx5_mkey_seg { u8 rsvd4[4]; }; -struct mlx5_query_special_ctxs_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_special_ctxs_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 reserved_lkey; -}; - -struct mlx5_create_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_mkey_index; - __be32 flags; - struct mlx5_mkey_seg seg; - u8 rsvd1[16]; - __be32 xlat_oct_act_size; - __be32 rsvd2; - u8 rsvd3[168]; - __be64 pas[0]; -}; - -struct mlx5_create_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - u8 rsvd[4]; -}; - -struct mlx5_destroy_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_query_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; -}; - -struct mlx5_query_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 mkey; - __be64 pas[0]; -}; - -struct mlx5_modify_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_dump_mkey_mbox_in { - struct mlx5_inbox_hdr hdr; -}; - -struct mlx5_dump_mkey_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 mkey; -}; - -struct mlx5_mad_ifc_mbox_in { - struct mlx5_inbox_hdr hdr; - __be16 remote_lid; - u8 rsvd0; - u8 port; - u8 rsvd1[4]; - u8 data[256]; -}; - -struct mlx5_mad_ifc_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - u8 data[256]; -}; - -struct mlx5_access_reg_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd0[2]; - __be16 register_id; - __be32 arg; - __be32 data[0]; -}; - -struct mlx5_access_reg_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 data[0]; -}; - #define MLX5_ATTR_EXTENDED_PORT_INFO cpu_to_be16(0xff90) enum { MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO = 1 << 0 }; -struct mlx5_allocate_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 npsv_pd; - __be32 rsvd_psv0; -}; - -struct mlx5_allocate_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; - __be32 psv_idx[4]; -}; - -struct mlx5_destroy_psv_in { - struct mlx5_inbox_hdr hdr; - __be32 psv_number; - u8 rsvd[4]; -}; - -struct mlx5_destroy_psv_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - enum { VPORT_STATE_DOWN = 0x0, VPORT_STATE_UP = 0x1, @@ -1381,6 +964,18 @@ enum mlx5_cap_type { #define MLX5_CAP_FLOWTABLE_NIC_RX_MAX(mdev, cap) \ MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive.cap) +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_RX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_receive_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX(mdev, cap) \ + MLX5_CAP_FLOWTABLE(mdev, flow_table_properties_nic_transmit_sniffer.cap) + +#define MLX5_CAP_FLOWTABLE_SNIFFER_TX_MAX(mdev, cap) \ + MLX5_CAP_FLOWTABLE_MAX(mdev, flow_table_properties_nic_transmit_sniffer.cap) + #define MLX5_CAP_ESW_FLOWTABLE(mdev, cap) \ MLX5_GET(flow_table_eswitch_cap, \ mdev->hca_caps_cur[MLX5_CAP_ESWITCH_FLOW_TABLE], cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ccea6fb16482..85c4786427e4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -48,10 +48,6 @@ #include #include -enum { - MLX5_RQ_BITMASK_VSD = 1 << 1, -}; - enum { MLX5_BOARD_ID_LEN = 64, MLX5_MAX_NAME_LEN = 16, @@ -481,6 +477,7 @@ struct mlx5_fc_stats { }; struct mlx5_eswitch; +struct mlx5_lag; struct mlx5_rl_entry { u32 rate; @@ -554,6 +551,7 @@ struct mlx5_priv { struct mlx5_flow_steering *steering; struct mlx5_eswitch *eswitch; struct mlx5_core_sriov sriov; + struct mlx5_lag *lag; unsigned long pci_dev_data; struct mlx5_fc_stats fc_stats; struct mlx5_rl_table rl_table; @@ -771,14 +769,15 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev); void mlx5_cmd_cleanup(struct mlx5_core_dev *dev); void mlx5_cmd_use_events(struct mlx5_core_dev *dev); void mlx5_cmd_use_polling(struct mlx5_core_dev *dev); -int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr); -int mlx5_cmd_status_to_err_v2(void *ptr); -int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); + int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size); int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size, void *out, int out_size, mlx5_cmd_cbk_t callback, void *context); +void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome); + +int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type); int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn); int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn); int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); @@ -807,15 +806,18 @@ int mlx5_core_arm_srq(struct mlx5_core_dev *dev, struct mlx5_core_srq *srq, u16 lwm, int is_srq); void mlx5_init_mkey_table(struct mlx5_core_dev *dev); void mlx5_cleanup_mkey_table(struct mlx5_core_dev *dev); +int mlx5_core_create_mkey_cb(struct mlx5_core_dev *dev, + struct mlx5_core_mkey *mkey, + u32 *in, int inlen, + u32 *out, int outlen, + mlx5_cmd_cbk_t callback, void *context); int mlx5_core_create_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_create_mkey_mbox_in *in, int inlen, - mlx5_cmd_cbk_t callback, void *context, - struct mlx5_create_mkey_mbox_out *out); + u32 *in, int inlen); int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey); int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey, - struct mlx5_query_mkey_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_dump_fill_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *_mkey, u32 *mkey); int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn); @@ -826,8 +828,6 @@ void mlx5_pagealloc_init(struct mlx5_core_dev *dev); void mlx5_pagealloc_cleanup(struct mlx5_core_dev *dev); int mlx5_pagealloc_start(struct mlx5_core_dev *dev); void mlx5_pagealloc_stop(struct mlx5_core_dev *dev); -int mlx5_sriov_init(struct mlx5_core_dev *dev); -int mlx5_sriov_cleanup(struct mlx5_core_dev *dev); void mlx5_core_req_pages_handler(struct mlx5_core_dev *dev, u16 func_id, s32 npages); int mlx5_satisfy_startup_pages(struct mlx5_core_dev *dev, int boot); @@ -865,7 +865,7 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in, int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq); void mlx5_debug_eq_remove(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq, - struct mlx5_query_eq_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_eq_debugfs_init(struct mlx5_core_dev *dev); void mlx5_eq_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_cq_debugfs_init(struct mlx5_core_dev *dev); @@ -930,6 +930,8 @@ enum { struct mlx5_interface { void * (*add)(struct mlx5_core_dev *dev); void (*remove)(struct mlx5_core_dev *dev, void *context); + int (*attach)(struct mlx5_core_dev *dev, void *context); + void (*detach)(struct mlx5_core_dev *dev, void *context); void (*event)(struct mlx5_core_dev *dev, void *context, enum mlx5_dev_event event, unsigned long param); void * (*get_dev)(void *context); @@ -942,6 +944,11 @@ int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); +bool mlx5_lag_is_active(struct mlx5_core_dev *dev); +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); + struct mlx5_profile { u64 mask; u8 log_max_qp; diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index e036d6030867..93ebc5e21334 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -54,6 +54,7 @@ static inline void build_leftovers_ft_param(int *priority, enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_BYPASS, + MLX5_FLOW_NAMESPACE_LAG, MLX5_FLOW_NAMESPACE_OFFLOADS, MLX5_FLOW_NAMESPACE_ETHTOOL, MLX5_FLOW_NAMESPACE_KERNEL, @@ -62,6 +63,8 @@ enum mlx5_flow_namespace_type { MLX5_FLOW_NAMESPACE_FDB, MLX5_FLOW_NAMESPACE_ESW_EGRESS, MLX5_FLOW_NAMESPACE_ESW_INGRESS, + MLX5_FLOW_NAMESPACE_SNIFFER_RX, + MLX5_FLOW_NAMESPACE_SNIFFER_TX, }; struct mlx5_flow_table; @@ -106,6 +109,9 @@ mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, int num_flow_table_entries, u32 level, u16 vport); +struct mlx5_flow_table *mlx5_create_lag_demux_flow_table( + struct mlx5_flow_namespace *ns, + int prio, u32 level); int mlx5_destroy_flow_table(struct mlx5_flow_table *ft); /* inbox should be set with the following values: diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d1f9a581aca8..6045d4d58065 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -152,7 +152,7 @@ enum { MLX5_CMD_OP_CONFIG_INT_MODERATION = 0x804, MLX5_CMD_OP_ACCESS_REG = 0x805, MLX5_CMD_OP_ATTACH_TO_MCG = 0x806, - MLX5_CMD_OP_DETTACH_FROM_MCG = 0x807, + MLX5_CMD_OP_DETACH_FROM_MCG = 0x807, MLX5_CMD_OP_GET_DROPPED_PACKET_LOG = 0x80a, MLX5_CMD_OP_MAD_IFC = 0x50d, MLX5_CMD_OP_QUERY_MAD_DEMUX = 0x80b, @@ -174,6 +174,12 @@ enum { MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY = 0x82b, MLX5_CMD_OP_SET_WOL_ROL = 0x830, MLX5_CMD_OP_QUERY_WOL_ROL = 0x831, + MLX5_CMD_OP_CREATE_LAG = 0x840, + MLX5_CMD_OP_MODIFY_LAG = 0x841, + MLX5_CMD_OP_QUERY_LAG = 0x842, + MLX5_CMD_OP_DESTROY_LAG = 0x843, + MLX5_CMD_OP_CREATE_VPORT_LAG = 0x844, + MLX5_CMD_OP_DESTROY_VPORT_LAG = 0x845, MLX5_CMD_OP_CREATE_TIR = 0x900, MLX5_CMD_OP_MODIFY_TIR = 0x901, MLX5_CMD_OP_DESTROY_TIR = 0x902, @@ -212,6 +218,8 @@ enum { MLX5_CMD_OP_DEALLOC_FLOW_COUNTER = 0x93a, MLX5_CMD_OP_QUERY_FLOW_COUNTER = 0x93b, MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, + MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, + MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, MLX5_CMD_OP_MAX }; @@ -281,7 +289,9 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 modify_root[0x1]; u8 identified_miss_table_mode[0x1]; u8 flow_table_modify[0x1]; - u8 reserved_at_7[0x19]; + u8 encap[0x1]; + u8 decap[0x1]; + u8 reserved_at_9[0x17]; u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; @@ -473,7 +483,9 @@ struct mlx5_ifc_ads_bits { struct mlx5_ifc_flow_table_nic_cap_bits { u8 nic_rx_multi_path_tirs[0x1]; - u8 reserved_at_1[0x1ff]; + u8 nic_rx_multi_path_tirs_fts[0x1]; + u8 allow_sniffer_and_nic_rx_shared_tir[0x1]; + u8 reserved_at_3[0x1fd]; struct mlx5_ifc_flow_table_prop_layout_bits flow_table_properties_nic_receive; @@ -512,7 +524,15 @@ struct mlx5_ifc_e_switch_cap_bits { u8 nic_vport_node_guid_modify[0x1]; u8 nic_vport_port_guid_modify[0x1]; - u8 reserved_at_20[0x7e0]; + u8 vxlan_encap_decap[0x1]; + u8 nvgre_encap_decap[0x1]; + u8 reserved_at_22[0x9]; + u8 log_max_encap_headers[0x5]; + u8 reserved_2b[0x6]; + u8 max_encap_header_size[0xa]; + + u8 reserved_40[0x7c0]; + }; struct mlx5_ifc_qos_cap_bits { @@ -767,7 +787,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 out_of_seq_cnt[0x1]; u8 vport_counters[0x1]; u8 retransmission_q_counters[0x1]; - u8 reserved_at_183[0x3]; + u8 reserved_at_183[0x1]; + u8 modify_rq_counter_set_id[0x1]; + u8 reserved_at_185[0x1]; u8 max_qp_cnt[0xa]; u8 pkey_table_size[0x10]; @@ -870,7 +892,10 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 pad_tx_eth_packet[0x1]; u8 reserved_at_263[0x8]; u8 log_bf_reg_size[0x5]; - u8 reserved_at_270[0x10]; + + u8 reserved_at_270[0xb]; + u8 lag_master[0x1]; + u8 num_lag_ports[0x4]; u8 reserved_at_280[0x10]; u8 max_wqe_sz_sq[0x10]; @@ -1904,7 +1929,7 @@ enum { struct mlx5_ifc_qpc_bits { u8 state[0x4]; - u8 reserved_at_4[0x4]; + u8 lag_tx_port_affinity[0x4]; u8 st[0x8]; u8 reserved_at_10[0x3]; u8 pm_state[0x2]; @@ -1966,7 +1991,10 @@ struct mlx5_ifc_qpc_bits { u8 reserved_at_3e0[0x8]; u8 cqn_snd[0x18]; - u8 reserved_at_400[0x40]; + u8 reserved_at_400[0x8]; + u8 deth_sqpn[0x18]; + + u8 reserved_at_420[0x20]; u8 reserved_at_440[0x8]; u8 last_acked_psn[0x18]; @@ -2064,6 +2092,8 @@ enum { MLX5_FLOW_CONTEXT_ACTION_DROP = 0x2, MLX5_FLOW_CONTEXT_ACTION_FWD_DEST = 0x4, MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, + MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, + MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, }; struct mlx5_ifc_flow_context_bits { @@ -2083,7 +2113,9 @@ struct mlx5_ifc_flow_context_bits { u8 reserved_at_a0[0x8]; u8 flow_counter_list_size[0x18]; - u8 reserved_at_c0[0x140]; + u8 encap_id[0x20]; + + u8 reserved_at_e0[0x120]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -2146,7 +2178,11 @@ struct mlx5_ifc_traffic_counter_bits { }; struct mlx5_ifc_tisc_bits { - u8 reserved_at_0[0xc]; + u8 strict_lag_tx_port_affinity[0x1]; + u8 reserved_at_1[0x3]; + u8 lag_tx_port_affinity[0x04]; + + u8 reserved_at_8[0x4]; u8 prio[0x4]; u8 reserved_at_10[0x10]; @@ -2808,7 +2844,7 @@ struct mlx5_ifc_xrqc_bits { struct mlx5_ifc_tag_matching_topology_context_bits tag_matching_topology_context; - u8 reserved_at_180[0x180]; + u8 reserved_at_180[0x200]; struct mlx5_ifc_wq_bits wq; }; @@ -3489,7 +3525,7 @@ struct mlx5_ifc_query_special_contexts_out_bits { u8 syndrome[0x20]; - u8 reserved_at_40[0x20]; + u8 dump_fill_mkey[0x20]; u8 resd_lkey[0x20]; }; @@ -4213,6 +4249,85 @@ struct mlx5_ifc_query_eq_in_bits { u8 reserved_at_60[0x20]; }; +struct mlx5_ifc_encap_header_in_bits { + u8 reserved_at_0[0x5]; + u8 header_type[0x3]; + u8 reserved_at_8[0xe]; + u8 encap_header_size[0xa]; + + u8 reserved_at_20[0x10]; + u8 encap_header[2][0x8]; + + u8 more_encap_header[0][0x8]; +}; + +struct mlx5_ifc_query_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header[0]; +}; + +struct mlx5_ifc_query_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0xa0]; +}; + +struct mlx5_ifc_alloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 encap_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0xa0]; + + struct mlx5_ifc_encap_header_in_bits encap_header; +}; + +struct mlx5_ifc_dealloc_encap_header_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_encap_header_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_20[0x10]; + u8 op_mod[0x10]; + + u8 encap_id[0x20]; + + u8 reserved_60[0x20]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -4517,7 +4632,9 @@ struct mlx5_ifc_modify_tis_out_bits { struct mlx5_ifc_modify_tis_bitmask_bits { u8 reserved_at_0[0x20]; - u8 reserved_at_20[0x1f]; + u8 reserved_at_20[0x1d]; + u8 lag_tx_port_affinity[0x1]; + u8 strict_lag_tx_port_affinity[0x1]; u8 prio[0x1]; }; @@ -4652,6 +4769,11 @@ struct mlx5_ifc_modify_rq_out_bits { u8 reserved_at_40[0x40]; }; +enum { + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3, +}; + struct mlx5_ifc_modify_rq_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -4721,7 +4843,7 @@ struct mlx5_ifc_modify_nic_vport_field_select_bits { u8 reserved_at_0[0x16]; u8 node_guid[0x1]; u8 port_guid[0x1]; - u8 reserved_at_18[0x1]; + u8 min_inline[0x1]; u8 mtu[0x1]; u8 change_event[0x1]; u8 promisc[0x1]; @@ -6099,7 +6221,9 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_a0[0x20]; - u8 reserved_at_c0[0x4]; + u8 encap_en[0x1]; + u8 decap_en[0x1]; + u8 reserved_at_c2[0x2]; u8 table_miss_mode[0x4]; u8 level[0x8]; u8 reserved_at_d0[0x8]; @@ -6108,7 +6232,10 @@ struct mlx5_ifc_create_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_create_flow_group_out_bits { @@ -7563,7 +7690,8 @@ struct mlx5_ifc_set_flow_table_root_in_bits { }; enum { - MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = 0x1, + MLX5_MODIFY_FLOW_TABLE_MISS_TABLE_ID = (1UL << 0), + MLX5_MODIFY_FLOW_TABLE_LAG_NEXT_TABLE_ID = (1UL << 15), }; struct mlx5_ifc_modify_flow_table_out_bits { @@ -7602,7 +7730,10 @@ struct mlx5_ifc_modify_flow_table_in_bits { u8 reserved_at_e0[0x8]; u8 table_miss_id[0x18]; - u8 reserved_at_100[0x100]; + u8 reserved_at_100[0x8]; + u8 lag_master_next_table_id[0x18]; + + u8 reserved_at_120[0x80]; }; struct mlx5_ifc_ets_tcn_config_reg_bits { @@ -7710,4 +7841,134 @@ struct mlx5_ifc_dcbx_param_bits { u8 error[0x8]; u8 reserved_at_a0[0x160]; }; + +struct mlx5_ifc_lagc_bits { + u8 reserved_at_0[0x1d]; + u8 lag_state[0x3]; + + u8 reserved_at_20[0x14]; + u8 tx_remap_affinity_2[0x4]; + u8 reserved_at_38[0x4]; + u8 tx_remap_affinity_1[0x4]; +}; + +struct mlx5_ifc_create_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_modify_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_modify_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + u8 field_select[0x20]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; + + struct mlx5_ifc_lagc_bits ctx; +}; + +struct mlx5_ifc_query_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_create_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_destroy_vport_lag_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x40]; +}; + #endif /* MLX5_IFC_H */ diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index e3012cc64b8a..b3065acd20b4 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -61,6 +61,39 @@ enum mlx5_an_status { #define MLX5_I2C_ADDR_HIGH 0x51 #define MLX5_EEPROM_PAGE_LENGTH 256 +enum mlx5e_link_mode { + MLX5E_1000BASE_CX_SGMII = 0, + MLX5E_1000BASE_KX = 1, + MLX5E_10GBASE_CX4 = 2, + MLX5E_10GBASE_KX4 = 3, + MLX5E_10GBASE_KR = 4, + MLX5E_20GBASE_KR2 = 5, + MLX5E_40GBASE_CR4 = 6, + MLX5E_40GBASE_KR4 = 7, + MLX5E_56GBASE_R4 = 8, + MLX5E_10GBASE_CR = 12, + MLX5E_10GBASE_SR = 13, + MLX5E_10GBASE_ER = 14, + MLX5E_40GBASE_SR4 = 15, + MLX5E_40GBASE_LR4 = 16, + MLX5E_50GBASE_SR2 = 18, + MLX5E_100GBASE_CR4 = 20, + MLX5E_100GBASE_SR4 = 21, + MLX5E_100GBASE_KR4 = 22, + MLX5E_100GBASE_LR4 = 23, + MLX5E_100BASE_TX = 24, + MLX5E_1000BASE_T = 25, + MLX5E_10GBASE_T = 26, + MLX5E_25GBASE_CR = 27, + MLX5E_25GBASE_KR = 28, + MLX5E_25GBASE_SR = 29, + MLX5E_50GBASE_CR2 = 30, + MLX5E_50GBASE_KR2 = 31, + MLX5E_LINK_MODES_NUMBER, +}; + +#define MLX5E_PROT_MASK(link_mode) (1 << link_mode) + int mlx5_set_port_caps(struct mlx5_core_dev *dev, u8 port_num, u32 caps); int mlx5_query_port_ptys(struct mlx5_core_dev *dev, u32 *ptys, int ptys_size, int proto_mask, u8 local_port); @@ -70,9 +103,10 @@ int mlx5_query_port_proto_admin(struct mlx5_core_dev *dev, u32 *proto_admin, int proto_mask); int mlx5_query_port_link_width_oper(struct mlx5_core_dev *dev, u8 *link_width_oper, u8 local_port); -int mlx5_query_port_proto_oper(struct mlx5_core_dev *dev, - u8 *proto_oper, int proto_mask, - u8 local_port); +int mlx5_query_port_ib_proto_oper(struct mlx5_core_dev *dev, + u8 *proto_oper, u8 local_port); +int mlx5_query_port_eth_proto_oper(struct mlx5_core_dev *dev, + u32 *proto_oper, u8 local_port); int mlx5_set_port_ptys(struct mlx5_core_dev *dev, bool an_disable, u32 proto_admin, int proto_mask); void mlx5_toggle_port_link(struct mlx5_core_dev *dev); diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 7879bf411891..0aacb2a7480d 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -123,12 +123,13 @@ enum { }; enum { - MLX5_NON_ZERO_RQ = 0 << 24, - MLX5_SRQ_RQ = 1 << 24, - MLX5_CRQ_RQ = 2 << 24, - MLX5_ZERO_LEN_RQ = 3 << 24 + MLX5_NON_ZERO_RQ = 0x0, + MLX5_SRQ_RQ = 0x1, + MLX5_CRQ_RQ = 0x2, + MLX5_ZERO_LEN_RQ = 0x3 }; +/* TODO REM */ enum { /* params1 */ MLX5_QP_BIT_SRE = 1 << 15, @@ -177,12 +178,6 @@ enum { MLX5_FENCE_MODE_SMALL_AND_FENCE = 4 << 5, }; -enum { - MLX5_QP_LAT_SENSITIVE = 1 << 28, - MLX5_QP_BLOCK_MCAST = 1 << 30, - MLX5_QP_ENABLE_SIG = 1 << 31, -}; - enum { MLX5_RCV_DBR = 0, MLX5_SND_DBR = 1, @@ -484,6 +479,7 @@ struct mlx5_qp_path { u8 rmac[6]; }; +/* FIXME: use mlx5_ifc.h qpc */ struct mlx5_qp_context { __be32 flags; __be32 flags_pd; @@ -525,99 +521,6 @@ struct mlx5_qp_context { u8 rsvd1[24]; }; -struct mlx5_create_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 input_qpn; - u8 rsvd0[4]; - __be32 opt_param_mask; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd3[16]; - __be64 pas[0]; -}; - -struct mlx5_create_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; -}; - -struct mlx5_destroy_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - -struct mlx5_modify_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd0[4]; - __be32 optparam; - u8 rsvd1[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; -}; - -struct mlx5_modify_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd0[8]; -}; - -struct mlx5_query_qp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[4]; -}; - -struct mlx5_query_qp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd1[8]; - __be32 optparam; - u8 rsvd0[4]; - struct mlx5_qp_context ctx; - u8 rsvd2[16]; - __be64 pas[0]; -}; - -struct mlx5_conf_sqp_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 qpn; - u8 rsvd[3]; - u8 type; -}; - -struct mlx5_conf_sqp_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_alloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 xrcdn; - u8 rsvd[4]; -}; - -struct mlx5_dealloc_xrcd_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn) { return radix_tree_lookup(&dev->priv.qp_table.tree, qpn); @@ -628,28 +531,17 @@ static inline struct mlx5_core_mkey *__mlx5_mr_lookup(struct mlx5_core_dev *dev, return radix_tree_lookup(&dev->priv.mkey_table.tree, key); } -struct mlx5_page_fault_resume_mbox_in { - struct mlx5_inbox_hdr hdr; - __be32 flags_qpn; - u8 reserved[4]; -}; - -struct mlx5_page_fault_resume_mbox_out { - struct mlx5_outbox_hdr hdr; - u8 rsvd[8]; -}; - int mlx5_core_create_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_create_qp_mbox_in *in, + u32 *in, int inlen); -int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 operation, - struct mlx5_modify_qp_mbox_in *in, int sqd_event, +int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode, + u32 opt_param_mask, void *qpc, struct mlx5_core_qp *qp); int mlx5_core_destroy_qp(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp); int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp, - struct mlx5_query_qp_mbox_out *out, int outlen); + u32 *out, int outlen); int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn); int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn); diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index e087b7d047ac..451b0bde9083 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); void mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev, u8 *min_inline); +int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev, + u16 vport, u8 min_inline); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h index 0d126aeb3ec0..d43ef96bf075 100644 --- a/include/linux/mmc/sdio_ids.h +++ b/include/linux/mmc/sdio_ids.h @@ -32,6 +32,7 @@ #define SDIO_DEVICE_ID_BROADCOM_43340 0xa94c #define SDIO_DEVICE_ID_BROADCOM_43341 0xa94d #define SDIO_DEVICE_ID_BROADCOM_4335_4339 0x4335 +#define SDIO_DEVICE_ID_BROADCOM_4339 0x4339 #define SDIO_DEVICE_ID_BROADCOM_43362 0xa962 #define SDIO_DEVICE_ID_BROADCOM_43430 0xa9a6 #define SDIO_DEVICE_ID_BROADCOM_4345 0x4345 diff --git a/include/linux/net.h b/include/linux/net.h index b9f0ff4d489c..cd0c8bd0a1de 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -128,6 +129,9 @@ struct page; struct sockaddr; struct msghdr; struct module; +struct sk_buff; +typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, + unsigned int, size_t); struct proto_ops { int family; @@ -186,6 +190,8 @@ struct proto_ops { struct pipe_inode_info *pipe, size_t len, unsigned int flags); int (*set_peek_off)(struct sock *sk, int val); int (*peek_len)(struct socket *sock); + int (*read_sock)(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor); }; #define DECLARE_SOCKADDR(type, dst, src) \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e8d79d4ebcfe..136ae6bbe81e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -52,6 +52,7 @@ #include #include #include +#include struct netpoll_info; struct device; @@ -788,6 +789,7 @@ enum { TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_MATCHALL, + TC_SETUP_CLSBPF, }; struct tc_cls_u32_offload; @@ -799,6 +801,7 @@ struct tc_to_netdev { struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; + struct tc_cls_bpf_offload *cls_bpf; }; }; @@ -923,6 +926,14 @@ struct netdev_xdp { * 3. Update dev->stats asynchronously and atomically, and define * neither operation. * + * bool (*ndo_has_offload_stats)(int attr_id) + * Return true if this device supports offload stats of this attr_id. + * + * int (*ndo_get_offload_stats)(int attr_id, const struct net_device *dev, + * void *attr_data) + * Get statistics for offload operations by attr_id. Write it into the + * attr_data pointer. + * * int (*ndo_vlan_rx_add_vid)(struct net_device *dev, __be16 proto, u16 vid); * If device supports VLAN filtering this function is called when a * VLAN id is registered. @@ -935,7 +946,8 @@ struct netdev_xdp { * * SR-IOV management functions. * int (*ndo_set_vf_mac)(struct net_device *dev, int vf, u8* mac); - * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, u8 qos); + * int (*ndo_set_vf_vlan)(struct net_device *dev, int vf, u16 vlan, + * u8 qos, __be16 proto); * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); @@ -1030,7 +1042,7 @@ struct netdev_xdp { * Deletes the FDB entry from dev coresponding to addr. * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb, * struct net_device *dev, struct net_device *filter_dev, - * int idx) + * int *idx) * Used to add FDB entries to dump requests. Implementers should add * entries to skb and update idx with the number of entries. * @@ -1154,6 +1166,10 @@ struct net_device_ops { struct rtnl_link_stats64* (*ndo_get_stats64)(struct net_device *dev, struct rtnl_link_stats64 *storage); + bool (*ndo_has_offload_stats)(int attr_id); + int (*ndo_get_offload_stats)(int attr_id, + const struct net_device *dev, + void *attr_data); struct net_device_stats* (*ndo_get_stats)(struct net_device *dev); int (*ndo_vlan_rx_add_vid)(struct net_device *dev, @@ -1172,7 +1188,8 @@ struct net_device_ops { int (*ndo_set_vf_mac)(struct net_device *dev, int queue, u8 *mac); int (*ndo_set_vf_vlan)(struct net_device *dev, - int queue, u16 vlan, u8 qos); + int queue, u16 vlan, + u8 qos, __be16 proto); int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, int max_tx_rate); @@ -1262,7 +1279,7 @@ struct net_device_ops { struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); int (*ndo_bridge_setlink)(struct net_device *dev, struct nlmsghdr *nlh, @@ -1561,8 +1578,6 @@ enum netdev_priv_flags { * * @xps_maps: XXX: need comments on this one * - * @offload_fwd_mark: Offload device fwding mark - * * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1784,7 +1799,7 @@ struct net_device { #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS - struct list_head nf_hooks_ingress; + struct nf_hook_entry __rcu *nf_hooks_ingress; #endif unsigned char broadcast[MAX_ADDR_LEN]; @@ -1800,6 +1815,9 @@ struct net_device { unsigned int num_tx_queues; unsigned int real_num_tx_queues; struct Qdisc *qdisc; +#ifdef CONFIG_NET_SCHED + DECLARE_HASHTABLE (qdisc_hash, 4); +#endif unsigned long tx_queue_len; spinlock_t tx_global_lock; int watchdog_timeo; @@ -1810,9 +1828,6 @@ struct net_device { #ifdef CONFIG_NET_CLS_ACT struct tcf_proto __rcu *egress_cl_list; #endif -#ifdef CONFIG_NET_SWITCHDEV - u32 offload_fwd_mark; -#endif /* These may be needed for future network-power-down code. */ struct timer_list watchdog_timer; diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 9230f9aee896..abc7fdcb9eb1 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -55,12 +55,34 @@ struct nf_hook_state { struct net_device *out; struct sock *sk; struct net *net; - struct list_head *hook_list; + struct nf_hook_entry __rcu *hook_entries; int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; +typedef unsigned int nf_hookfn(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state); +struct nf_hook_ops { + struct list_head list; + + /* User fills in from here down. */ + nf_hookfn *hook; + struct net_device *dev; + void *priv; + u_int8_t pf; + unsigned int hooknum; + /* Hooks are ordered in ascending priority. */ + int priority; +}; + +struct nf_hook_entry { + struct nf_hook_entry __rcu *next; + struct nf_hook_ops ops; + const struct nf_hook_ops *orig_ops; +}; + static inline void nf_hook_state_init(struct nf_hook_state *p, - struct list_head *hook_list, + struct nf_hook_entry *hook_entry, unsigned int hook, int thresh, u_int8_t pf, struct net_device *indev, @@ -76,26 +98,11 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->out = outdev; p->sk = sk; p->net = net; - p->hook_list = hook_list; + RCU_INIT_POINTER(p->hook_entries, hook_entry); p->okfn = okfn; } -typedef unsigned int nf_hookfn(void *priv, - struct sk_buff *skb, - const struct nf_hook_state *state); -struct nf_hook_ops { - struct list_head list; - - /* User fills in from here down. */ - nf_hookfn *hook; - struct net_device *dev; - void *priv; - u_int8_t pf; - unsigned int hooknum; - /* Hooks are ordered in ascending priority. */ - int priority; -}; struct nf_sockopt_ops { struct list_head list; @@ -133,6 +140,8 @@ int nf_register_hook(struct nf_hook_ops *reg); void nf_unregister_hook(struct nf_hook_ops *reg); int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n); +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n); /* Functions to register get/setsockopt ranges (non-inclusive). You need to check permissions yourself! */ @@ -161,7 +170,8 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { - struct list_head *hook_list; + struct nf_hook_entry *hook_head; + int ret = 1; #ifdef HAVE_JUMP_LABEL if (__builtin_constant_p(pf) && @@ -170,16 +180,19 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; #endif - hook_list = &net->nf.hooks[pf][hook]; - - if (!list_empty(hook_list)) { + rcu_read_lock(); + hook_head = rcu_dereference(net->nf.hooks[pf][hook]); + if (hook_head) { struct nf_hook_state state; - nf_hook_state_init(&state, hook_list, hook, thresh, + nf_hook_state_init(&state, hook_head, hook, thresh, pf, indev, outdev, sk, net, okfn); - return nf_hook_slow(skb, &state); + + ret = nf_hook_slow(skb, &state); } - return 1; + rcu_read_unlock(); + + return ret; } static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 275505792664..1d1ef4e20512 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -4,13 +4,9 @@ #include struct ip_conntrack_stat { - unsigned int searched; unsigned int found; - unsigned int new; unsigned int invalid; unsigned int ignore; - unsigned int delete; - unsigned int delete_list; unsigned int insert; unsigned int insert_failed; unsigned int drop; diff --git a/include/linux/netfilter/nf_conntrack_proto_gre.h b/include/linux/netfilter/nf_conntrack_proto_gre.h index df78dc2b5524..dee0acd0dd31 100644 --- a/include/linux/netfilter/nf_conntrack_proto_gre.h +++ b/include/linux/netfilter/nf_conntrack_proto_gre.h @@ -1,68 +1,8 @@ #ifndef _CONNTRACK_PROTO_GRE_H #define _CONNTRACK_PROTO_GRE_H #include - -/* GRE PROTOCOL HEADER */ - -/* GRE Version field */ -#define GRE_VERSION_1701 0x0 -#define GRE_VERSION_PPTP 0x1 - -/* GRE Protocol field */ -#define GRE_PROTOCOL_PPTP 0x880B - -/* GRE Flags */ -#define GRE_FLAG_C 0x80 -#define GRE_FLAG_R 0x40 -#define GRE_FLAG_K 0x20 -#define GRE_FLAG_S 0x10 -#define GRE_FLAG_A 0x80 - -#define GRE_IS_C(f) ((f)&GRE_FLAG_C) -#define GRE_IS_R(f) ((f)&GRE_FLAG_R) -#define GRE_IS_K(f) ((f)&GRE_FLAG_K) -#define GRE_IS_S(f) ((f)&GRE_FLAG_S) -#define GRE_IS_A(f) ((f)&GRE_FLAG_A) - -/* GRE is a mess: Four different standards */ -struct gre_hdr { -#if defined(__LITTLE_ENDIAN_BITFIELD) - __u16 rec:3, - srr:1, - seq:1, - key:1, - routing:1, - csum:1, - version:3, - reserved:4, - ack:1; -#elif defined(__BIG_ENDIAN_BITFIELD) - __u16 csum:1, - routing:1, - key:1, - seq:1, - srr:1, - rec:3, - ack:1, - reserved:4, - version:3; -#else -#error "Adjust your defines" -#endif - __be16 protocol; -}; - -/* modified GRE header for PPTP */ -struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __be16 payload_len; /* size of ppp payload, not inc. gre header */ - __be16 call_id; /* peer's call_id for this session */ - __be32 seq; /* sequence number. Present if S==1 */ - __be32 ack; /* seq number of highest packet received by */ - /* sender in this session */ -}; +#include +#include struct nf_ct_gre { unsigned int stream_timeout; diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 5fcd375ef175..33e37fb41d5d 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -11,22 +11,30 @@ static inline bool nf_hook_ingress_active(const struct sk_buff *skb) if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) return false; #endif - return !list_empty(&skb->dev->nf_hooks_ingress); + return rcu_access_pointer(skb->dev->nf_hooks_ingress); } +/* caller must hold rcu_read_lock */ static inline int nf_hook_ingress(struct sk_buff *skb) { + struct nf_hook_entry *e = rcu_dereference(skb->dev->nf_hooks_ingress); struct nf_hook_state state; - nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, - skb->dev, NULL, NULL, dev_net(skb->dev), NULL); + /* Must recheck the ingress hook head, in the event it became NULL + * after the check in nf_hook_ingress_active evaluated to true. + */ + if (unlikely(!e)) + return 0; + + nf_hook_state_init(&state, e, NF_NETDEV_INGRESS, INT_MIN, + NFPROTO_NETDEV, skb->dev, NULL, NULL, + dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } static inline void nf_hook_ingress_init(struct net_device *dev) { - INIT_LIST_HEAD(&dev->nf_hooks_ingress); + RCU_INIT_POINTER(dev->nf_hooks_ingress, NULL); } #else /* CONFIG_NETFILTER_INGRESS */ static inline int nf_hook_ingress_active(struct sk_buff *skb) diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 5c5362584aba..060d0ede88df 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -690,6 +690,10 @@ struct perf_event { u64 (*clock)(void); perf_overflow_handler_t overflow_handler; void *overflow_handler_context; +#ifdef CONFIG_BPF_SYSCALL + perf_overflow_handler_t orig_overflow_handler; + struct bpf_prog *prog; +#endif #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; @@ -802,6 +806,11 @@ struct perf_output_handle { int page; }; +struct bpf_perf_event_data_kern { + struct pt_regs *regs; + struct perf_sample_data *data; +}; + #ifdef CONFIG_CGROUP_PERF /* diff --git a/include/linux/phy.h b/include/linux/phy.h index 2d24b283aa2d..e25f1830fbcf 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -80,6 +80,7 @@ typedef enum { PHY_INTERFACE_MODE_XGMII, PHY_INTERFACE_MODE_MOCA, PHY_INTERFACE_MODE_QSGMII, + PHY_INTERFACE_MODE_TRGMII, PHY_INTERFACE_MODE_MAX, } phy_interface_t; @@ -123,6 +124,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "moca"; case PHY_INTERFACE_MODE_QSGMII: return "qsgmii"; + case PHY_INTERFACE_MODE_TRGMII: + return "trgmii"; default: return "unknown"; } diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h index 6b15e168148a..5ad54fc66cf0 100644 --- a/include/linux/ptp_clock_kernel.h +++ b/include/linux/ptp_clock_kernel.h @@ -127,6 +127,11 @@ struct ptp_clock; * * @info: Structure describing the new clock. * @parent: Pointer to the parent device of the new clock. + * + * Returns a valid pointer on success or PTR_ERR on failure. If PHC + * support is missing at the configuration level, this function + * returns NULL, and drivers are expected to gracefully handle that + * case separately. */ extern struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 40c0ada01806..734deb094618 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -5,28 +5,77 @@ * (GPL) Version 2, available from the file COPYING in the main directory of * this source tree. */ +#ifndef _COMMON_HSI_H +#define _COMMON_HSI_H +#include +#include +#include +#include + +/* dma_addr_t manip */ +#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) +#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) +#define DMA_REGPAIR_LE(x, val) do { \ + (x).hi = DMA_HI_LE((val)); \ + (x).lo = DMA_LO_LE((val)); \ + } while (0) + +#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) +#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) +#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) +#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) #ifndef __COMMON_HSI__ #define __COMMON_HSI__ -#define CORE_SPQE_PAGE_SIZE_BYTES 4096 #define X_FINAL_CLEANUP_AGG_INT 1 + +#define EVENT_RING_PAGE_SIZE_BYTES 4096 + #define NUM_OF_GLOBAL_QUEUES 128 +#define COMMON_QUEUE_ENTRY_MAX_BYTE_SIZE 64 + +#define ISCSI_CDU_TASK_SEG_TYPE 0 +#define RDMA_CDU_TASK_SEG_TYPE 1 + +#define FW_ASSERT_GENERAL_ATTN_IDX 32 + +#define MAX_PINNED_CCFC 32 /* Queue Zone sizes in bytes */ #define TSTORM_QZONE_SIZE 8 -#define MSTORM_QZONE_SIZE 0 +#define MSTORM_QZONE_SIZE 16 #define USTORM_QZONE_SIZE 8 #define XSTORM_QZONE_SIZE 8 #define YSTORM_QZONE_SIZE 0 #define PSTORM_QZONE_SIZE 0 -#define ETH_MAX_NUM_RX_QUEUES_PER_VF 16 +#define MSTORM_VF_ZONE_DEFAULT_SIZE_LOG 7 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DEFAULT 16 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_DOUBLE 48 +#define ETH_MAX_NUM_RX_QUEUES_PER_VF_QUAD 112 + +/********************************/ +/* CORE (LIGHT L2) FW CONSTANTS */ +/********************************/ + +#define CORE_LL2_MAX_RAMROD_PER_CON 8 +#define CORE_LL2_TX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_BD_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_CQE_PAGE_SIZE_BYTES 4096 +#define CORE_LL2_RX_NUM_NEXT_PAGE_BDS 1 + +#define CORE_LL2_TX_MAX_BDS_PER_PACKET 12 + +#define CORE_SPQE_PAGE_SIZE_BYTES 4096 + +#define MAX_NUM_LL2_RX_QUEUES 32 +#define MAX_NUM_LL2_TX_STATS_COUNTERS 32 #define FW_MAJOR_VERSION 8 #define FW_MINOR_VERSION 10 -#define FW_REVISION_VERSION 5 +#define FW_REVISION_VERSION 10 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -83,6 +132,20 @@ #define NUM_OF_LCIDS (320) #define NUM_OF_LTIDS (320) +/* Clock values */ +#define MASTER_CLK_FREQ_E4 (375e6) +#define STORM_CLK_FREQ_E4 (1000e6) +#define CLK25M_CLK_FREQ_E4 (25e6) + +/* Global PXP windows (GTT) */ +#define NUM_OF_GTT 19 +#define GTT_DWORD_SIZE_BITS 10 +#define GTT_BYTE_SIZE_BITS (GTT_DWORD_SIZE_BITS + 2) +#define GTT_DWORD_SIZE BIT(GTT_DWORD_SIZE_BITS) + +/* Tools Version */ +#define TOOLS_VERSION 10 + /*****************/ /* CDU CONSTANTS */ /*****************/ @@ -90,6 +153,8 @@ #define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (17) #define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0x1ffff) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (12) +#define CDU_VF_FL_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0xfff) /*****************/ /* DQ CONSTANTS */ /*****************/ @@ -115,6 +180,11 @@ #define DQ_XCM_ETH_TX_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 #define DQ_XCM_ETH_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 #define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 +#define DQ_XCM_ISCSI_SQ_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_ISCSI_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 +#define DQ_XCM_ISCSI_EXP_STAT_SN_CMD DQ_XCM_AGG_VAL_SEL_REG6 +#define DQ_XCM_ROCE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -159,13 +229,16 @@ #define DQ_XCM_AGG_FLG_SHIFT_CF23 7 /* XCM agg counter flag selection */ -#define DQ_XCM_CORE_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_CORE_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_DQ_CF_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF18) -#define DQ_XCM_ETH_TERMINATE_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF19) -#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF22) -#define DQ_XCM_ETH_TPH_EN_CMD (1 << DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_CORE_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_CORE_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_CORE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_DQ_CF_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_ETH_TERMINATE_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ETH_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_TPH_EN_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) /* UCM agg counter flag selection (HW) */ #define DQ_UCM_AGG_FLG_SHIFT_CF0 0 @@ -178,9 +251,45 @@ #define DQ_UCM_AGG_FLG_SHIFT_RULE1EN 7 /* UCM agg counter flag selection (FW) */ -#define DQ_UCM_ETH_PMD_TX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF4) -#define DQ_UCM_ETH_PMD_RX_ARM_CMD (1 << DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_ETH_PMD_TX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ETH_PMD_RX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_ROCE_CQ_ARM_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +/* TCM agg counter flag selection (HW) */ +#define DQ_TCM_AGG_FLG_SHIFT_CF0 0 +#define DQ_TCM_AGG_FLG_SHIFT_CF1 1 +#define DQ_TCM_AGG_FLG_SHIFT_CF2 2 +#define DQ_TCM_AGG_FLG_SHIFT_CF3 3 +#define DQ_TCM_AGG_FLG_SHIFT_CF4 4 +#define DQ_TCM_AGG_FLG_SHIFT_CF5 5 +#define DQ_TCM_AGG_FLG_SHIFT_CF6 6 +#define DQ_TCM_AGG_FLG_SHIFT_CF7 7 +/* TCM agg counter flag selection (FW) */ +#define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) + +/* PWM address mapping */ +#define DQ_PWM_OFFSET_DPM_BASE 0x0 +#define DQ_PWM_OFFSET_DPM_END 0x27 +#define DQ_PWM_OFFSET_XCM16_BASE 0x40 +#define DQ_PWM_OFFSET_XCM32_BASE 0x44 +#define DQ_PWM_OFFSET_UCM16_BASE 0x48 +#define DQ_PWM_OFFSET_UCM32_BASE 0x4C +#define DQ_PWM_OFFSET_UCM16_4 0x50 +#define DQ_PWM_OFFSET_TCM16_BASE 0x58 +#define DQ_PWM_OFFSET_TCM32_BASE 0x5C +#define DQ_PWM_OFFSET_XCM_FLAGS 0x68 +#define DQ_PWM_OFFSET_UCM_FLAGS 0x69 +#define DQ_PWM_OFFSET_TCM_FLAGS 0x6B + +#define DQ_PWM_OFFSET_XCM_RDMA_SQ_PROD (DQ_PWM_OFFSET_XCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_32BIT (DQ_PWM_OFFSET_UCM32_BASE) +#define DQ_PWM_OFFSET_UCM_RDMA_CQ_CONS_16BIT (DQ_PWM_OFFSET_UCM16_4) +#define DQ_PWM_OFFSET_UCM_RDMA_INT_TIMEOUT (DQ_PWM_OFFSET_UCM16_BASE + 2) +#define DQ_PWM_OFFSET_UCM_RDMA_ARM_FLAGS (DQ_PWM_OFFSET_UCM_FLAGS) +#define DQ_PWM_OFFSET_TCM_ROCE_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 1) +#define DQ_PWM_OFFSET_TCM_IWARP_RQ_PROD (DQ_PWM_OFFSET_TCM16_BASE + 3) #define DQ_REGION_SHIFT (12) /* DPM */ @@ -214,15 +323,17 @@ */ #define CM_TX_PQ_BASE 0x200 +/* number of global Vport/QCN rate limiters */ +#define MAX_QM_GLOBAL_RLS 256 /* QM registers data */ #define QM_LINE_CRD_REG_WIDTH 16 -#define QM_LINE_CRD_REG_SIGN_BIT (1 << (QM_LINE_CRD_REG_WIDTH - 1)) +#define QM_LINE_CRD_REG_SIGN_BIT BIT((QM_LINE_CRD_REG_WIDTH - 1)) #define QM_BYTE_CRD_REG_WIDTH 24 -#define QM_BYTE_CRD_REG_SIGN_BIT (1 << (QM_BYTE_CRD_REG_WIDTH - 1)) +#define QM_BYTE_CRD_REG_SIGN_BIT BIT((QM_BYTE_CRD_REG_WIDTH - 1)) #define QM_WFQ_CRD_REG_WIDTH 32 -#define QM_WFQ_CRD_REG_SIGN_BIT (1 << (QM_WFQ_CRD_REG_WIDTH - 1)) +#define QM_WFQ_CRD_REG_SIGN_BIT BIT((QM_WFQ_CRD_REG_WIDTH - 1)) #define QM_RL_CRD_REG_WIDTH 32 -#define QM_RL_CRD_REG_SIGN_BIT (1 << (QM_RL_CRD_REG_WIDTH - 1)) +#define QM_RL_CRD_REG_SIGN_BIT BIT((QM_RL_CRD_REG_WIDTH - 1)) /*****************/ /* CAU CONSTANTS */ @@ -287,6 +398,17 @@ /* PXP CONSTANTS */ /*****************/ +/* Bars for Blocks */ +#define PXP_BAR_GRC 0 +#define PXP_BAR_TSDM 0 +#define PXP_BAR_USDM 0 +#define PXP_BAR_XSDM 0 +#define PXP_BAR_MSDM 0 +#define PXP_BAR_YSDM 0 +#define PXP_BAR_PSDM 0 +#define PXP_BAR_IGU 0 +#define PXP_BAR_DQ 1 + /* PTT and GTT */ #define PXP_NUM_PF_WINDOWS 12 #define PXP_PER_PF_ENTRY_SIZE 8 @@ -334,6 +456,52 @@ (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \ PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1) +/* PF BAR */ +#define PXP_BAR0_START_GRC 0x0000 +#define PXP_BAR0_GRC_LENGTH 0x1C00000 +#define PXP_BAR0_END_GRC (PXP_BAR0_START_GRC + \ + PXP_BAR0_GRC_LENGTH - 1) + +#define PXP_BAR0_START_IGU 0x1C00000 +#define PXP_BAR0_IGU_LENGTH 0x10000 +#define PXP_BAR0_END_IGU (PXP_BAR0_START_IGU + \ + PXP_BAR0_IGU_LENGTH - 1) + +#define PXP_BAR0_START_TSDM 0x1C80000 +#define PXP_BAR0_SDM_LENGTH 0x40000 +#define PXP_BAR0_SDM_RESERVED_LENGTH 0x40000 +#define PXP_BAR0_END_TSDM (PXP_BAR0_START_TSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_MSDM 0x1D00000 +#define PXP_BAR0_END_MSDM (PXP_BAR0_START_MSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_USDM 0x1D80000 +#define PXP_BAR0_END_USDM (PXP_BAR0_START_USDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_XSDM 0x1E00000 +#define PXP_BAR0_END_XSDM (PXP_BAR0_START_XSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_YSDM 0x1E80000 +#define PXP_BAR0_END_YSDM (PXP_BAR0_START_YSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_START_PSDM 0x1F00000 +#define PXP_BAR0_END_PSDM (PXP_BAR0_START_PSDM + \ + PXP_BAR0_SDM_LENGTH - 1) + +#define PXP_BAR0_FIRST_INVALID_ADDRESS (PXP_BAR0_END_PSDM + 1) + +/* VF BAR */ +#define PXP_VF_BAR0 0 + +#define PXP_VF_BAR0_START_GRC 0x3E00 +#define PXP_VF_BAR0_GRC_LENGTH 0x200 +#define PXP_VF_BAR0_END_GRC (PXP_VF_BAR0_START_GRC + \ + PXP_VF_BAR0_GRC_LENGTH - 1) #define PXP_VF_BAR0_START_IGU 0 #define PXP_VF_BAR0_IGU_LENGTH 0x3000 @@ -399,6 +567,20 @@ #define PXP_NUM_ILT_RECORDS_BB 7600 #define PXP_NUM_ILT_RECORDS_K2 11000 #define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2) +#define PXP_QUEUES_ZONE_MAX_NUM 320 +/*****************/ +/* PRM CONSTANTS */ +/*****************/ +#define PRM_DMA_PAD_BYTES_NUM 2 +/******************/ +/* SDMs CONSTANTS */ +/******************/ +#define SDM_OP_GEN_TRIG_NONE 0 +#define SDM_OP_GEN_TRIG_WAKE_THREAD 1 +#define SDM_OP_GEN_TRIG_AGG_INT 2 +#define SDM_OP_GEN_TRIG_LOADER 4 +#define SDM_OP_GEN_TRIG_INDICATE_ERROR 6 +#define SDM_OP_GEN_TRIG_RELEASE_THREAD 7 #define SDM_COMP_TYPE_NONE 0 #define SDM_COMP_TYPE_WAKE_THREAD 1 @@ -424,6 +606,8 @@ /* PRS CONSTANTS */ /*****************/ +#define PRS_GFT_CAM_LINES_NO_MATCH 31 + /* Async data KCQ CQE */ struct async_data { __le32 cid; @@ -440,20 +624,6 @@ struct coalescing_timeset { #define COALESCING_TIMESET_VALID_SHIFT 7 }; -struct common_prs_pf_msg_info { - __le32 value; -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_NPAR_DEFAULT_PF_SHIFT 0 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_1_SHIFT 1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_2_SHIFT 2 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_MASK 0x1 -#define COMMON_PRS_PF_MSG_INFO_FW_DEBUG_3_SHIFT 3 -#define COMMON_PRS_PF_MSG_INFO_RESERVED_MASK 0xFFFFFFF -#define COMMON_PRS_PF_MSG_INFO_RESERVED_SHIFT 4 -}; - struct common_queue_zone { __le16 ring_drv_data_consumer; __le16 reserved; @@ -473,6 +643,19 @@ struct vf_pf_channel_eqe_data { struct regpair msg_addr; }; +struct iscsi_eqe_data { + __le32 cid; + __le16 conn_id; + u8 error_code; + u8 error_pdu_opcode_reserved; +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_MASK 0x3F +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_SHIFT 0 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_MASK 0x1 +#define ISCSI_EQE_DATA_ERROR_PDU_OPCODE_VALID_SHIFT 6 +#define ISCSI_EQE_DATA_RESERVED0_MASK 0x1 +#define ISCSI_EQE_DATA_RESERVED0_SHIFT 7 +}; + struct malicious_vf_eqe_data { u8 vf_id; u8 err_id; @@ -488,8 +671,10 @@ struct initial_cleanup_eqe_data { union event_ring_data { u8 bytes[8]; struct vf_pf_channel_eqe_data vf_pf_channel; + struct iscsi_eqe_data iscsi_info; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; + struct regpair roce_handle; }; /* Event Ring Entry */ @@ -616,6 +801,52 @@ enum db_dest { MAX_DB_DEST }; +/* Enum of doorbell DPM types */ +enum db_dpm_type { + DPM_LEGACY, + DPM_ROCE, + DPM_L2_INLINE, + DPM_L2_BD, + MAX_DB_DPM_TYPE +}; + +/* Structure for doorbell data, in L2 DPM mode, for 1st db in a DPM burst */ +struct db_l2_dpm_data { + __le16 icid; + __le16 bd_prod; + __le32 params; +#define DB_L2_DPM_DATA_SIZE_MASK 0x3F +#define DB_L2_DPM_DATA_SIZE_SHIFT 0 +#define DB_L2_DPM_DATA_DPM_TYPE_MASK 0x3 +#define DB_L2_DPM_DATA_DPM_TYPE_SHIFT 6 +#define DB_L2_DPM_DATA_NUM_BDS_MASK 0xFF +#define DB_L2_DPM_DATA_NUM_BDS_SHIFT 8 +#define DB_L2_DPM_DATA_PKT_SIZE_MASK 0x7FF +#define DB_L2_DPM_DATA_PKT_SIZE_SHIFT 16 +#define DB_L2_DPM_DATA_RESERVED0_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED0_SHIFT 27 +#define DB_L2_DPM_DATA_SGE_NUM_MASK 0x7 +#define DB_L2_DPM_DATA_SGE_NUM_SHIFT 28 +#define DB_L2_DPM_DATA_RESERVED1_MASK 0x1 +#define DB_L2_DPM_DATA_RESERVED1_SHIFT 31 +}; + +/* Structure for SGE in a DPM doorbell of type DPM_L2_BD */ +struct db_l2_dpm_sge { + struct regpair addr; + __le16 nbytes; + __le16 bitfields; +#define DB_L2_DPM_SGE_TPH_ST_INDEX_MASK 0x1FF +#define DB_L2_DPM_SGE_TPH_ST_INDEX_SHIFT 0 +#define DB_L2_DPM_SGE_RESERVED0_MASK 0x3 +#define DB_L2_DPM_SGE_RESERVED0_SHIFT 9 +#define DB_L2_DPM_SGE_ST_VALID_MASK 0x1 +#define DB_L2_DPM_SGE_ST_VALID_SHIFT 11 +#define DB_L2_DPM_SGE_RESERVED1_MASK 0xF +#define DB_L2_DPM_SGE_RESERVED1_SHIFT 12 + __le32 reserved2; +}; + /* Structure for doorbell address, in legacy mode */ struct db_legacy_addr { __le32 addr; @@ -627,6 +858,49 @@ struct db_legacy_addr { #define DB_LEGACY_ADDR_ICID_SHIFT 5 }; +/* Structure for doorbell address, in PWM mode */ +struct db_pwm_addr { + __le32 addr; +#define DB_PWM_ADDR_RESERVED0_MASK 0x7 +#define DB_PWM_ADDR_RESERVED0_SHIFT 0 +#define DB_PWM_ADDR_OFFSET_MASK 0x7F +#define DB_PWM_ADDR_OFFSET_SHIFT 3 +#define DB_PWM_ADDR_WID_MASK 0x3 +#define DB_PWM_ADDR_WID_SHIFT 10 +#define DB_PWM_ADDR_DPI_MASK 0xFFFF +#define DB_PWM_ADDR_DPI_SHIFT 12 +#define DB_PWM_ADDR_RESERVED1_MASK 0xF +#define DB_PWM_ADDR_RESERVED1_SHIFT 28 +}; + +/* Parameters to RoCE firmware, passed in EDPM doorbell */ +struct db_roce_dpm_params { + __le32 params; +#define DB_ROCE_DPM_PARAMS_SIZE_MASK 0x3F +#define DB_ROCE_DPM_PARAMS_SIZE_SHIFT 0 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_DPM_TYPE_SHIFT 6 +#define DB_ROCE_DPM_PARAMS_OPCODE_MASK 0xFF +#define DB_ROCE_DPM_PARAMS_OPCODE_SHIFT 8 +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_MASK 0x7FF +#define DB_ROCE_DPM_PARAMS_WQE_SIZE_SHIFT 16 +#define DB_ROCE_DPM_PARAMS_RESERVED0_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_RESERVED0_SHIFT 27 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_COMPLETION_FLG_SHIFT 28 +#define DB_ROCE_DPM_PARAMS_S_FLG_MASK 0x1 +#define DB_ROCE_DPM_PARAMS_S_FLG_SHIFT 29 +#define DB_ROCE_DPM_PARAMS_RESERVED1_MASK 0x3 +#define DB_ROCE_DPM_PARAMS_RESERVED1_SHIFT 30 +}; + +/* Structure for doorbell data, in ROCE DPM mode, for 1st db in a DPM burst */ +struct db_roce_dpm_data { + __le16 icid; + __le16 prod_val; + struct db_roce_dpm_params params; +}; + /* Igu interrupt command */ enum igu_int_cmd { IGU_INT_ENABLE = 0, @@ -764,6 +1038,19 @@ struct pxp_ptt_entry { struct pxp_pretend_cmd pretend; }; +/* VF Zone A Permission Register. */ +struct pxp_vf_zone_a_permission { + __le32 control; +#define PXP_VF_ZONE_A_PERMISSION_VFID_MASK 0xFF +#define PXP_VF_ZONE_A_PERMISSION_VFID_SHIFT 0 +#define PXP_VF_ZONE_A_PERMISSION_VALID_MASK 0x1 +#define PXP_VF_ZONE_A_PERMISSION_VALID_SHIFT 8 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_MASK 0x7F +#define PXP_VF_ZONE_A_PERMISSION_RESERVED0_SHIFT 9 +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_MASK 0xFFFF +#define PXP_VF_ZONE_A_PERMISSION_RESERVED1_SHIFT 16 +}; + /* RSS hash type */ struct rdif_task_context { __le32 initial_ref_tag; @@ -831,6 +1118,7 @@ struct rdif_task_context { __le32 reserved2; }; +/* RSS hash type */ enum rss_hash_type { RSS_HASH_TYPE_DEFAULT = 0, RSS_HASH_TYPE_IPV4 = 1, @@ -942,7 +1230,7 @@ struct tdif_task_context { }; struct timers_context { - __le32 logical_client0; + __le32 logical_client_0; #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC0_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC0_MASK 0x1 @@ -951,7 +1239,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC0_SHIFT 29 #define TIMERS_CONTEXT_RESERVED0_MASK 0x3 #define TIMERS_CONTEXT_RESERVED0_SHIFT 30 - __le32 logical_client1; + __le32 logical_client_1; #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC1_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC1_MASK 0x1 @@ -960,7 +1248,7 @@ struct timers_context { #define TIMERS_CONTEXT_ACTIVELC1_SHIFT 29 #define TIMERS_CONTEXT_RESERVED1_MASK 0x3 #define TIMERS_CONTEXT_RESERVED1_SHIFT 30 - __le32 logical_client2; + __le32 logical_client_2; #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_MASK 0xFFFFFFF #define TIMERS_CONTEXT_EXPIRATIONTIMELC2_SHIFT 0 #define TIMERS_CONTEXT_VALIDLC2_MASK 0x1 @@ -978,3 +1266,4 @@ struct timers_context { #define TIMERS_CONTEXT_RESERVED3_SHIFT 29 }; #endif /* __COMMON_HSI__ */ +#endif diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index b5ebc697d05f..1aa0727c4136 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -13,9 +13,12 @@ /* ETH FW CONSTANTS */ /********************/ #define ETH_HSI_VER_MAJOR 3 -#define ETH_HSI_VER_MINOR 0 -#define ETH_CACHE_LINE_SIZE 64 +#define ETH_HSI_VER_MINOR 10 +#define ETH_HSI_VER_NO_PKT_LEN_TUNN 5 + +#define ETH_CACHE_LINE_SIZE 64 +#define ETH_RX_CQE_GAP 32 #define ETH_MAX_RAMROD_PER_CON 8 #define ETH_TX_BD_PAGE_SIZE_BYTES 4096 #define ETH_RX_BD_PAGE_SIZE_BYTES 4096 @@ -24,15 +27,25 @@ #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 +#define ETH_TX_MAX_BDS_PER_LSO_PACKET 255 #define ETH_TX_MAX_LSO_HDR_NBD 4 #define ETH_TX_MIN_BDS_PER_LSO_PKT 3 #define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT 3 #define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT 2 #define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE 2 -#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 12 + 8)) +#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 4 + 12 + 8)) #define ETH_TX_MAX_LSO_HDR_BYTES 510 +#define ETH_TX_LSO_WINDOW_BDS_NUM (18 - 1) +#define ETH_TX_LSO_WINDOW_MIN_LEN 9700 +#define ETH_TX_MAX_LSO_PAYLOAD_LEN 0xFE000 +#define ETH_TX_NUM_SAME_AS_LAST_ENTRIES 320 +#define ETH_TX_INACTIVE_SAME_AS_LAST 0xFFFF #define ETH_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define ETH_NUM_STATISTIC_COUNTERS_DOUBLE_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - MAX_NUM_VFS / 2) +#define ETH_NUM_STATISTIC_COUNTERS_QUAD_VF_ZONE \ + (ETH_NUM_STATISTIC_COUNTERS - 3 * MAX_NUM_VFS / 4) /* Maximum number of buffers, used for RX packet placement */ #define ETH_RX_MAX_BUFF_PER_PKT 5 @@ -59,6 +72,8 @@ #define ETH_TPA_CQE_CONT_LEN_LIST_SIZE 6 #define ETH_TPA_CQE_END_LEN_LIST_SIZE 4 +/* Control frame check constants */ +#define ETH_CTL_FRAME_ETH_TYPE_NUM 4 struct eth_tx_1st_bd_flags { u8 bitfields; @@ -82,10 +97,10 @@ struct eth_tx_1st_bd_flags { /* The parsing information data fo rthe first tx bd of a given packet. */ struct eth_tx_data_1st_bd { - __le16 vlan; - u8 nbds; - struct eth_tx_1st_bd_flags bd_flags; - __le16 bitfields; + __le16 vlan; + u8 nbds; + struct eth_tx_1st_bd_flags bd_flags; + __le16 bitfields; #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_MASK 0x1 #define ETH_TX_DATA_1ST_BD_TUNN_FLAG_SHIFT 0 #define ETH_TX_DATA_1ST_BD_RESERVED0_MASK 0x1 @@ -96,7 +111,7 @@ struct eth_tx_data_1st_bd { /* The parsing information data for the second tx bd of a given packet. */ struct eth_tx_data_2nd_bd { - __le16 tunn_ip_size; + __le16 tunn_ip_size; __le16 bitfields1; #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK 0xF #define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 @@ -125,9 +140,14 @@ struct eth_tx_data_2nd_bd { #define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 }; +/* Firmware data for L2-EDPM packet. */ +struct eth_edpm_fw_data { + struct eth_tx_data_1st_bd data_1st_bd; + struct eth_tx_data_2nd_bd data_2nd_bd; + __le32 reserved; +}; + struct eth_fast_path_cqe_fw_debug { - u8 reserved0; - u8 reserved1; __le16 reserved2; }; @@ -148,6 +168,17 @@ struct eth_tunnel_parsing_flags { #define ETH_TUNNEL_PARSING_FLAGS_IPV4_OPTIONS_SHIFT 7 }; +/* PMD flow control bits */ +struct eth_pmd_flow_flags { + u8 flags; +#define ETH_PMD_FLOW_FLAGS_VALID_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_VALID_SHIFT 0 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_MASK 0x1 +#define ETH_PMD_FLOW_FLAGS_TOGGLE_SHIFT 1 +#define ETH_PMD_FLOW_FLAGS_RESERVED_MASK 0x3F +#define ETH_PMD_FLOW_FLAGS_RESERVED_SHIFT 2 +}; + /* Regular ETH Rx FP CQE. */ struct eth_fast_path_rx_reg_cqe { u8 type; @@ -166,64 +197,63 @@ struct eth_fast_path_rx_reg_cqe { u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; u8 bd_num; - u8 reserved[7]; + u8 reserved[9]; struct eth_fast_path_cqe_fw_debug fw_debug; u8 reserved1[3]; - u8 flags; -#define ETH_FAST_PATH_RX_REG_CQE_VALID_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_SHIFT 0 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_FAST_PATH_RX_REG_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_MASK 0x3F -#define ETH_FAST_PATH_RX_REG_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-continue ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_cont_cqe { - u8 type; - u8 tpa_agg_index; - __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; - u8 reserved[5]; - u8 reserved1; - __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 len_list[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved; + u8 reserved1; + __le16 reserved2[ETH_TPA_CQE_CONT_LEN_LIST_SIZE]; + u8 reserved3[3]; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-end ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_end_cqe { - u8 type; - u8 tpa_agg_index; - __le16 total_packet_len; - u8 num_of_bds; - u8 end_reason; - __le16 num_of_coalesced_segs; - __le32 ts_delta; - __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; - u8 reserved1[3]; - u8 reserved2; - __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + u8 type; + u8 tpa_agg_index; + __le16 total_packet_len; + u8 num_of_bds; + u8 end_reason; + __le16 num_of_coalesced_segs; + __le32 ts_delta; + __le16 len_list[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved3[ETH_TPA_CQE_END_LEN_LIST_SIZE]; + __le16 reserved1; + u8 reserved2; + struct eth_pmd_flow_flags pmd_flags; }; /* TPA-start ETH Rx FP CQE. */ struct eth_fast_path_rx_tpa_start_cqe { - u8 type; - u8 bitfields; + u8 type; + u8 bitfields; #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_MASK 0x7 #define ETH_FAST_PATH_RX_TPA_START_CQE_RSS_HASH_TYPE_SHIFT 0 #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_MASK 0xF #define ETH_FAST_PATH_RX_TPA_START_CQE_TC_SHIFT 3 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_MASK 0x1 #define ETH_FAST_PATH_RX_TPA_START_CQE_RESERVED0_SHIFT 7 - __le16 seg_len; + __le16 seg_len; struct parsing_and_err_flags pars_flags; - __le16 vlan_tag; - __le32 rss_hash; - __le16 len_on_first_bd; - u8 placement_offset; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_first_bd; + u8 placement_offset; struct eth_tunnel_parsing_flags tunnel_pars_flags; - u8 tpa_agg_index; - u8 header_len; - __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; + u8 tpa_agg_index; + u8 header_len; + __le16 ext_bd_len_list[ETH_TPA_CQE_START_LEN_LIST_SIZE]; struct eth_fast_path_cqe_fw_debug fw_debug; + u8 reserved; + struct eth_pmd_flow_flags pmd_flags; }; /* The L4 pseudo checksum mode for Ethernet */ @@ -245,15 +275,7 @@ struct eth_slow_path_rx_cqe { u8 reserved[25]; __le16 echo; u8 reserved1; - u8 flags; -/* for PMD mode - valid indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_SHIFT 0 -/* for PMD mode - valid toggle indication */ -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_MASK 0x1 -#define ETH_SLOW_PATH_RX_CQE_VALID_TOGGLE_SHIFT 1 -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_MASK 0x3F -#define ETH_SLOW_PATH_RX_CQE_RESERVED2_SHIFT 2 + struct eth_pmd_flow_flags pmd_flags; }; /* union for all ETH Rx CQE types */ @@ -276,6 +298,11 @@ enum eth_rx_cqe_type { MAX_ETH_RX_CQE_TYPE }; +struct eth_rx_pmd_cqe { + union eth_rx_cqe cqe; + u8 reserved[ETH_RX_CQE_GAP]; +}; + enum eth_rx_tunn_type { ETH_RX_NO_TUNN, ETH_RX_TUNN_GENEVE, @@ -313,8 +340,8 @@ struct eth_tx_2nd_bd { /* The parsing information data for the third tx bd of a given packet. */ struct eth_tx_data_3rd_bd { - __le16 lso_mss; - __le16 bitfields; + __le16 lso_mss; + __le16 bitfields; #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK 0xF #define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0 #define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK 0xF @@ -323,8 +350,8 @@ struct eth_tx_data_3rd_bd { #define ETH_TX_DATA_3RD_BD_START_BD_SHIFT 8 #define ETH_TX_DATA_3RD_BD_RESERVED0_MASK 0x7F #define ETH_TX_DATA_3RD_BD_RESERVED0_SHIFT 9 - u8 tunn_l4_hdr_start_offset_w; - u8 tunn_hdr_size_w; + u8 tunn_l4_hdr_start_offset_w; + u8 tunn_hdr_size_w; }; /* The third tx bd of a given packet */ @@ -355,10 +382,10 @@ struct eth_tx_bd { }; union eth_tx_bd_types { - struct eth_tx_1st_bd first_bd; - struct eth_tx_2nd_bd second_bd; - struct eth_tx_3rd_bd third_bd; - struct eth_tx_bd reg_bd; + struct eth_tx_1st_bd first_bd; + struct eth_tx_2nd_bd second_bd; + struct eth_tx_3rd_bd third_bd; + struct eth_tx_bd reg_bd; }; /* Mstorm Queue Zone */ @@ -389,8 +416,8 @@ struct eth_db_data { #define ETH_DB_DATA_RESERVED_SHIFT 5 #define ETH_DB_DATA_AGG_VAL_SEL_MASK 0x3 #define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6 - u8 agg_flags; - __le16 bd_prod; + u8 agg_flags; + __le16 bd_prod; }; #endif /* __ETH_COMMON__ */ diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index b3c0feb15ae9..8f64b1223c2f 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -311,7 +311,7 @@ struct iscsi_login_req_hdr { #define ISCSI_LOGIN_REQ_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_REQ_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -464,7 +464,7 @@ struct iscsi_login_response_hdr { #define ISCSI_LOGIN_RESPONSE_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_LOGIN_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 isid_TABC; + __le32 isid_tabc; __le16 tsih; __le16 isid_d; __le32 itt; @@ -688,8 +688,7 @@ union iscsi_cqe { enum iscsi_cqes_type { ISCSI_CQE_TYPE_SOLICITED = 1, ISCSI_CQE_TYPE_UNSOLICITED, - ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE - , + ISCSI_CQE_TYPE_SOLICITED_WITH_SENSE, ISCSI_CQE_TYPE_TASK_CLEANUP, ISCSI_CQE_TYPE_DUMMY, MAX_ISCSI_CQES_TYPE @@ -769,9 +768,9 @@ enum iscsi_eqe_opcode { ISCSI_EVENT_TYPE_UPDATE_CONN, ISCSI_EVENT_TYPE_CLEAR_SQ, ISCSI_EVENT_TYPE_TERMINATE_CONN, + ISCSI_EVENT_TYPE_MAC_UPDATE_CONN, ISCSI_EVENT_TYPE_ASYN_CONNECT_COMPLETE, ISCSI_EVENT_TYPE_ASYN_TERMINATE_DONE, - RESERVED8, RESERVED9, ISCSI_EVENT_TYPE_START_OF_ERROR_TYPES = 10, ISCSI_EVENT_TYPE_ASYN_ABORT_RCVD, @@ -867,6 +866,7 @@ enum iscsi_ramrod_cmd_id { ISCSI_RAMROD_CMD_ID_UPDATE_CONN = 4, ISCSI_RAMROD_CMD_ID_TERMINATION_CONN = 5, ISCSI_RAMROD_CMD_ID_CLEAR_SQ = 6, + ISCSI_RAMROD_CMD_ID_MAC_UPDATE = 7, MAX_ISCSI_RAMROD_CMD_ID }; @@ -883,6 +883,16 @@ union iscsi_seq_num { __le16 r2t_sn; }; +struct iscsi_spe_conn_mac_update { + struct iscsi_slow_path_hdr hdr; + __le16 conn_id; + __le32 fw_cid; + __le16 remote_mac_addr_lo; + __le16 remote_mac_addr_mid; + __le16 remote_mac_addr_hi; + u8 reserved0[2]; +}; + struct iscsi_spe_conn_offload { struct iscsi_slow_path_hdr hdr; __le16 conn_id; @@ -1302,14 +1312,6 @@ struct mstorm_iscsi_stats_drv { struct regpair iscsi_rx_dropped_pdus_task_not_valid; }; -struct ooo_opaque { - __le32 cid; - u8 drop_isle; - u8 drop_size; - u8 ooo_opcode; - u8 ooo_isle; -}; - struct pstorm_iscsi_stats_drv { struct regpair iscsi_tx_bytes_cnt; struct regpair iscsi_tx_packet_cnt; diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h index 7e441bdeabdc..72d88cf3ca25 100644 --- a/include/linux/qed/qed_chain.h +++ b/include/linux/qed/qed_chain.h @@ -16,19 +16,6 @@ #include #include -/* dma_addr_t manip */ -#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) -#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) -#define DMA_REGPAIR_LE(x, val) do { \ - (x).hi = DMA_HI_LE((val)); \ - (x).lo = DMA_LO_LE((val)); \ - } while (0) - -#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) -#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) -#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) -#define HILO_DMA_REGPAIR(regpair) ((dma_addr_t)HILO_64_REGPAIR(regpair)) - enum qed_chain_mode { /* Each Page contains a next pointer at its end */ QED_CHAIN_MODE_NEXT_PTR, diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4475a9d8ae15..33c24ebc9b7f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -23,6 +23,9 @@ struct qed_dev_eth_info { u8 port_mac[ETH_ALEN]; u8 num_vlan_filters; + + /* Legacy VF - this affects the datapath, so qede has to know */ + bool is_legacy; }; struct qed_update_vport_rss_params { diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d6c4177df7cb..f9ae903bbb84 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -34,6 +34,8 @@ enum dcbx_protocol_type { DCBX_MAX_PROTOCOL_TYPE }; +#define QED_ROCE_PROTOCOL_INDEX (3) + #ifdef CONFIG_DCB #define QED_LLDP_CHASSIS_ID_STAT_LEN 4 #define QED_LLDP_PORT_ID_STAT_LEN 4 @@ -260,15 +262,15 @@ struct qed_dev_info { /* MFW version */ u32 mfw_rev; - bool rdma_supported; - u32 flash_size; u8 mf_mode; bool tx_switching; + bool rdma_supported; }; enum qed_sb_type { QED_SB_TYPE_L2_QUEUE, + QED_SB_TYPE_CNQ, }; enum qed_protocol { @@ -276,6 +278,21 @@ enum qed_protocol { QED_PROTOCOL_ISCSI, }; +enum qed_link_mode_bits { + QED_LM_FIBRE_BIT = BIT(0), + QED_LM_Autoneg_BIT = BIT(1), + QED_LM_Asym_Pause_BIT = BIT(2), + QED_LM_Pause_BIT = BIT(3), + QED_LM_1000baseT_Half_BIT = BIT(4), + QED_LM_1000baseT_Full_BIT = BIT(5), + QED_LM_10000baseKR_Full_BIT = BIT(6), + QED_LM_25000baseKR_Full_BIT = BIT(7), + QED_LM_40000baseLR4_Full_BIT = BIT(8), + QED_LM_50000baseKR2_Full_BIT = BIT(9), + QED_LM_100000baseKR4_Full_BIT = BIT(10), + QED_LM_COUNT = 11 +}; + struct qed_link_params { bool link_up; @@ -303,9 +320,11 @@ struct qed_link_params { struct qed_link_output { bool link_up; - u32 supported_caps; /* In SUPPORTED defs */ - u32 advertised_caps; /* In ADVERTISED defs */ - u32 lp_caps; /* In ADVERTISED defs */ + /* In QED_LM_* defs */ + u32 supported_caps; + u32 advertised_caps; + u32 lp_caps; + u32 speed; /* In Mb/s */ u8 duplex; /* In DUPLEX defs */ u8 port; /* In PORT defs */ @@ -438,6 +457,10 @@ struct qed_common_ops { void (*simd_handler_clean)(struct qed_dev *cdev, int index); + int (*dbg_all_data) (struct qed_dev *cdev, void *buffer); + + int (*dbg_all_data_size) (struct qed_dev *cdev); + /** * @brief can_link_change - can the instance change the link or not * @@ -606,8 +629,9 @@ enum DP_MODULE { QED_MSG_SP = 0x100000, QED_MSG_STORAGE = 0x200000, QED_MSG_CXT = 0x800000, + QED_MSG_LL2 = 0x1000000, QED_MSG_ILT = 0x2000000, - QED_MSG_ROCE = 0x4000000, + QED_MSG_RDMA = 0x4000000, QED_MSG_DEBUG = 0x8000000, /* to be added...up to 0x8000000 */ }; diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h new file mode 100644 index 000000000000..fd75c265dba3 --- /dev/null +++ b/include/linux/qed/qed_ll2_if.h @@ -0,0 +1,139 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_LL2_IF_H +#define _QED_LL2_IF_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct qed_ll2_stats { + u64 gsi_invalid_hdr; + u64 gsi_invalid_pkt_length; + u64 gsi_unsupported_pkt_typ; + u64 gsi_crcchksm_error; + + u64 packet_too_big_discard; + u64 no_buff_discard; + + u64 rcv_ucast_bytes; + u64 rcv_mcast_bytes; + u64 rcv_bcast_bytes; + u64 rcv_ucast_pkts; + u64 rcv_mcast_pkts; + u64 rcv_bcast_pkts; + + u64 sent_ucast_bytes; + u64 sent_mcast_bytes; + u64 sent_bcast_bytes; + u64 sent_ucast_pkts; + u64 sent_mcast_pkts; + u64 sent_bcast_pkts; +}; + +#define QED_LL2_UNUSED_HANDLE (0xff) + +struct qed_ll2_cb_ops { + int (*rx_cb)(void *, struct sk_buff *, u32, u32); + int (*tx_cb)(void *, struct sk_buff *, bool); +}; + +struct qed_ll2_params { + u16 mtu; + bool drop_ttl0_packets; + bool rx_vlan_stripping; + u8 tx_tc; + bool frags_mapped; + u8 ll2_mac_address[ETH_ALEN]; +}; + +struct qed_ll2_ops { +/** + * @brief start - initializes ll2 + * + * @param cdev + * @param params - protocol driver configuration for the ll2. + * + * @return 0 on success, otherwise error value. + */ + int (*start)(struct qed_dev *cdev, struct qed_ll2_params *params); + +/** + * @brief stop - stops the ll2 + * + * @param cdev + * + * @return 0 on success, otherwise error value. + */ + int (*stop)(struct qed_dev *cdev); + +/** + * @brief start_xmit - transmits an skb over the ll2 interface + * + * @param cdev + * @param skb + * + * @return 0 on success, otherwise error value. + */ + int (*start_xmit)(struct qed_dev *cdev, struct sk_buff *skb); + +/** + * @brief register_cb_ops - protocol driver register the callback for Rx/Tx + * packets. Should be called before `start'. + * + * @param cdev + * @param cookie - to be passed to the callback functions. + * @param ops - the callback functions to register for Rx / Tx. + * + * @return 0 on success, otherwise error value. + */ + void (*register_cb_ops)(struct qed_dev *cdev, + const struct qed_ll2_cb_ops *ops, + void *cookie); + +/** + * @brief get LL2 related statistics + * + * @param cdev + * @param stats - pointer to struct that would be filled with stats + * + * @return 0 on success, error otherwise. + */ + int (*get_stats)(struct qed_dev *cdev, struct qed_ll2_stats *stats); +}; + +#ifdef CONFIG_QED_LL2 +int qed_ll2_alloc_if(struct qed_dev *); +void qed_ll2_dealloc_if(struct qed_dev *); +#else +static const struct qed_ll2_ops qed_ll2_ops_pass = { + .start = NULL, + .stop = NULL, + .start_xmit = NULL, + .register_cb_ops = NULL, + .get_stats = NULL, +}; + +static inline int qed_ll2_alloc_if(struct qed_dev *cdev) +{ + return 0; +} + +static inline void qed_ll2_dealloc_if(struct qed_dev *cdev) +{ +} +#endif +#endif diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h new file mode 100644 index 000000000000..53047d3fa678 --- /dev/null +++ b/include/linux/qed/qed_roce_if.h @@ -0,0 +1,604 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _QED_ROCE_IF_H +#define _QED_ROCE_IF_H +#include +#include +#include +#include +#include +#include +#include +#include +#include + +enum qed_roce_ll2_tx_dest { + /* Light L2 TX Destination to the Network */ + QED_ROCE_LL2_TX_DEST_NW, + + /* Light L2 TX Destination to the Loopback */ + QED_ROCE_LL2_TX_DEST_LB, + QED_ROCE_LL2_TX_DEST_MAX +}; + +#define QED_RDMA_MAX_CNQ_SIZE (0xFFFF) + +/* rdma interface */ + +enum qed_roce_qp_state { + QED_ROCE_QP_STATE_RESET, + QED_ROCE_QP_STATE_INIT, + QED_ROCE_QP_STATE_RTR, + QED_ROCE_QP_STATE_RTS, + QED_ROCE_QP_STATE_SQD, + QED_ROCE_QP_STATE_ERR, + QED_ROCE_QP_STATE_SQE +}; + +enum qed_rdma_tid_type { + QED_RDMA_TID_REGISTERED_MR, + QED_RDMA_TID_FMR, + QED_RDMA_TID_MW_TYPE1, + QED_RDMA_TID_MW_TYPE2A +}; + +struct qed_rdma_events { + void *context; + void (*affiliated_event)(void *context, u8 fw_event_code, + void *fw_handle); + void (*unaffiliated_event)(void *context, u8 event_code); +}; + +struct qed_rdma_device { + u32 vendor_id; + u32 vendor_part_id; + u32 hw_ver; + u64 fw_ver; + + u64 node_guid; + u64 sys_image_guid; + + u8 max_cnq; + u8 max_sge; + u8 max_srq_sge; + u16 max_inline; + u32 max_wqe; + u32 max_srq_wqe; + u8 max_qp_resp_rd_atomic_resc; + u8 max_qp_req_rd_atomic_resc; + u64 max_dev_resp_rd_atomic_resc; + u32 max_cq; + u32 max_qp; + u32 max_srq; + u32 max_mr; + u64 max_mr_size; + u32 max_cqe; + u32 max_mw; + u32 max_fmr; + u32 max_mr_mw_fmr_pbl; + u64 max_mr_mw_fmr_size; + u32 max_pd; + u32 max_ah; + u8 max_pkey; + u16 max_srq_wr; + u8 max_stats_queues; + u32 dev_caps; + + /* Abilty to support RNR-NAK generation */ + +#define QED_RDMA_DEV_CAP_RNR_NAK_MASK 0x1 +#define QED_RDMA_DEV_CAP_RNR_NAK_SHIFT 0 + /* Abilty to support shutdown port */ +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_MASK 0x1 +#define QED_RDMA_DEV_CAP_SHUTDOWN_PORT_SHIFT 1 + /* Abilty to support port active event */ +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_ACTIVE_EVENT_SHIFT 2 + /* Abilty to support port change event */ +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_MASK 0x1 +#define QED_RDMA_DEV_CAP_PORT_CHANGE_EVENT_SHIFT 3 + /* Abilty to support system image GUID */ +#define QED_RDMA_DEV_CAP_SYS_IMAGE_MASK 0x1 +#define QED_RDMA_DEV_CAP_SYS_IMAGE_SHIFT 4 + /* Abilty to support bad P_Key counter support */ +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BAD_PKEY_CNT_SHIFT 5 + /* Abilty to support atomic operations */ +#define QED_RDMA_DEV_CAP_ATOMIC_OP_MASK 0x1 +#define QED_RDMA_DEV_CAP_ATOMIC_OP_SHIFT 6 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_CQ_SHIFT 7 + /* Abilty to support modifying the maximum number of + * outstanding work requests per QP + */ +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_MASK 0x1 +#define QED_RDMA_DEV_CAP_RESIZE_MAX_WR_SHIFT 8 + /* Abilty to support automatic path migration */ +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_MASK 0x1 +#define QED_RDMA_DEV_CAP_AUTO_PATH_MIG_SHIFT 9 + /* Abilty to support the base memory management extensions */ +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_MEMORY_EXT_SHIFT 10 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_BASE_QUEUE_EXT_SHIFT 11 + /* Abilty to support multipile page sizes per memory region */ +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_MASK 0x1 +#define QED_RDMA_DEV_CAP_MULTI_PAGE_PER_MR_EXT_SHIFT 12 + /* Abilty to support block list physical buffer list */ +#define QED_RDMA_DEV_CAP_BLOCK_MODE_MASK 0x1 +#define QED_RDMA_DEV_CAP_BLOCK_MODE_SHIFT 13 + /* Abilty to support zero based virtual addresses */ +#define QED_RDMA_DEV_CAP_ZBVA_MASK 0x1 +#define QED_RDMA_DEV_CAP_ZBVA_SHIFT 14 + /* Abilty to support local invalidate fencing */ +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_MASK 0x1 +#define QED_RDMA_DEV_CAP_LOCAL_INV_FENCE_SHIFT 15 + /* Abilty to support Loopback on QP */ +#define QED_RDMA_DEV_CAP_LB_INDICATOR_MASK 0x1 +#define QED_RDMA_DEV_CAP_LB_INDICATOR_SHIFT 16 + u64 page_size_caps; + u8 dev_ack_delay; + u32 reserved_lkey; + u32 bad_pkey_counter; + struct qed_rdma_events events; +}; + +enum qed_port_state { + QED_RDMA_PORT_UP, + QED_RDMA_PORT_DOWN, +}; + +enum qed_roce_capability { + QED_ROCE_V1 = 1 << 0, + QED_ROCE_V2 = 1 << 1, +}; + +struct qed_rdma_port { + enum qed_port_state port_state; + int link_speed; + u64 max_msg_size; + u8 source_gid_table_len; + void *source_gid_table_ptr; + u8 pkey_table_len; + void *pkey_table_ptr; + u32 pkey_bad_counter; + enum qed_roce_capability capability; +}; + +struct qed_rdma_cnq_params { + u8 num_pbl_pages; + u64 pbl_ptr; +}; + +/* The CQ Mode affects the CQ doorbell transaction size. + * 64/32 bit machines should configure to 32/16 bits respectively. + */ +enum qed_rdma_cq_mode { + QED_RDMA_CQ_MODE_16_BITS, + QED_RDMA_CQ_MODE_32_BITS, +}; + +struct qed_roce_dcqcn_params { + u8 notification_point; + u8 reaction_point; + + /* fields for notification point */ + u32 cnp_send_timeout; + + /* fields for reaction point */ + u32 rl_bc_rate; + u16 rl_max_rate; + u16 rl_r_ai; + u16 rl_r_hai; + u16 dcqcn_g; + u32 dcqcn_k_us; + u32 dcqcn_timeout_us; +}; + +struct qed_rdma_start_in_params { + struct qed_rdma_events *events; + struct qed_rdma_cnq_params cnq_pbl_list[128]; + u8 desired_cnq; + enum qed_rdma_cq_mode cq_mode; + struct qed_roce_dcqcn_params dcqcn_params; + u16 max_mtu; + u8 mac_addr[ETH_ALEN]; + u8 iwarp_flags; +}; + +struct qed_rdma_add_user_out_params { + u16 dpi; + u64 dpi_addr; + u64 dpi_phys_addr; + u32 dpi_size; +}; + +enum roce_mode { + ROCE_V1, + ROCE_V2_IPV4, + ROCE_V2_IPV6, + MAX_ROCE_MODE +}; + +union qed_gid { + u8 bytes[16]; + u16 words[8]; + u32 dwords[4]; + u64 qwords[2]; + u32 ipv4_addr; +}; + +struct qed_rdma_register_tid_in_params { + u32 itid; + enum qed_rdma_tid_type tid_type; + u8 key; + u16 pd; + bool local_read; + bool local_write; + bool remote_read; + bool remote_write; + bool remote_atomic; + bool mw_bind; + u64 pbl_ptr; + bool pbl_two_level; + u8 pbl_page_size_log; + u8 page_size_log; + u32 fbo; + u64 length; + u64 vaddr; + bool zbva; + bool phy_mr; + bool dma_mr; + + bool dif_enabled; + u64 dif_error_addr; + u64 dif_runt_addr; +}; + +struct qed_rdma_create_cq_in_params { + u32 cq_handle_lo; + u32 cq_handle_hi; + u32 cq_size; + u16 dpi; + bool pbl_two_level; + u64 pbl_ptr; + u16 pbl_num_pages; + u8 pbl_page_size_log; + u8 cnq_id; + u16 int_timeout; +}; + +struct qed_rdma_create_srq_in_params { + u64 pbl_base_addr; + u64 prod_pair_addr; + u16 num_pages; + u16 pd_id; + u16 page_size; +}; + +struct qed_rdma_destroy_cq_in_params { + u16 icid; +}; + +struct qed_rdma_destroy_cq_out_params { + u16 num_cq_notif; +}; + +struct qed_rdma_create_qp_in_params { + u32 qp_handle_lo; + u32 qp_handle_hi; + u32 qp_handle_async_lo; + u32 qp_handle_async_hi; + bool use_srq; + bool signal_all; + bool fmr_and_reserved_lkey; + u16 pd; + u16 dpi; + u16 sq_cq_id; + u16 sq_num_pages; + u64 sq_pbl_ptr; + u8 max_sq_sges; + u16 rq_cq_id; + u16 rq_num_pages; + u64 rq_pbl_ptr; + u16 srq_id; + u8 stats_queue; +}; + +struct qed_rdma_create_qp_out_params { + u32 qp_id; + u16 icid; + void *rq_pbl_virt; + dma_addr_t rq_pbl_phys; + void *sq_pbl_virt; + dma_addr_t sq_pbl_phys; +}; + +struct qed_rdma_modify_qp_in_params { + u32 modify_flags; +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_NEW_STATE_SHIFT 0 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_PKEY_SHIFT 1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_RDMA_OPS_EN_SHIFT 2 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_DEST_QP_SHIFT 3 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ADDRESS_VECTOR_SHIFT 4 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RQ_PSN_SHIFT 5 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_SQ_PSN_SHIFT 6 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_REQ_SHIFT 7 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_MASK 0x1 +#define QED_RDMA_MODIFY_QP_VALID_MAX_RD_ATOMIC_RESP_SHIFT 8 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ACK_TIMEOUT_SHIFT 9 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RETRY_CNT_SHIFT 10 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_RNR_RETRY_CNT_SHIFT 11 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_MIN_RNR_NAK_TIMER_SHIFT 12 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_E2E_FLOW_CONTROL_EN_SHIFT 13 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_MASK 0x1 +#define QED_ROCE_MODIFY_QP_VALID_ROCE_MODE_SHIFT 14 + + enum qed_roce_qp_state new_state; + u16 pkey; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + u32 dest_qp; + bool lb_indication; + u16 mtu; + u8 traffic_class_tos; + u8 hop_limit_ttl; + u32 flow_label; + union qed_gid sgid; + union qed_gid dgid; + u16 udp_src_port; + + u16 vlan_id; + + u32 rq_psn; + u32 sq_psn; + u8 max_rd_atomic_resp; + u8 max_rd_atomic_req; + u32 ack_timeout; + u8 retry_cnt; + u8 rnr_retry_cnt; + u8 min_rnr_nak_timer; + bool sqd_async; + u8 remote_mac_addr[6]; + u8 local_mac_addr[6]; + bool use_local_mac; + enum roce_mode roce_mode; +}; + +struct qed_rdma_query_qp_out_params { + enum qed_roce_qp_state state; + u32 rq_psn; + u32 sq_psn; + bool draining; + u16 mtu; + u32 dest_qp; + bool incoming_rdma_read_en; + bool incoming_rdma_write_en; + bool incoming_atomic_en; + bool e2e_flow_control_en; + union qed_gid sgid; + union qed_gid dgid; + u32 flow_label; + u8 hop_limit_ttl; + u8 traffic_class_tos; + u32 timeout; + u8 rnr_retry; + u8 retry_cnt; + u8 min_rnr_nak_timer; + u16 pkey_index; + u8 max_rd_atomic; + u8 max_dest_rd_atomic; + bool sqd_async; +}; + +struct qed_rdma_create_srq_out_params { + u16 srq_id; +}; + +struct qed_rdma_destroy_srq_in_params { + u16 srq_id; +}; + +struct qed_rdma_modify_srq_in_params { + u32 wqe_limit; + u16 srq_id; +}; + +struct qed_rdma_stats_out_params { + u64 sent_bytes; + u64 sent_pkts; + u64 rcv_bytes; + u64 rcv_pkts; +}; + +struct qed_rdma_counters_out_params { + u64 pd_count; + u64 max_pd; + u64 dpi_count; + u64 max_dpi; + u64 cq_count; + u64 max_cq; + u64 qp_count; + u64 max_qp; + u64 tid_count; + u64 max_tid; +}; + +#define QED_ROCE_TX_HEAD_FAILURE (1) +#define QED_ROCE_TX_FRAG_FAILURE (2) + +struct qed_roce_ll2_header { + void *vaddr; + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_buffer { + dma_addr_t baddr; + size_t len; +}; + +struct qed_roce_ll2_packet { + struct qed_roce_ll2_header header; + int n_seg; + struct qed_roce_ll2_buffer payload[RDMA_MAX_SGE_PER_SQ_WQE]; + int roce_mode; + enum qed_roce_ll2_tx_dest tx_dest; +}; + +struct qed_roce_ll2_tx_params { + int reserved; +}; + +struct qed_roce_ll2_rx_params { + u16 vlan_id; + u8 smac[ETH_ALEN]; + int rc; +}; + +struct qed_roce_ll2_cbs { + void (*tx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt); + + void (*rx_cb)(void *pdev, struct qed_roce_ll2_packet *pkt, + struct qed_roce_ll2_rx_params *params); +}; + +struct qed_roce_ll2_params { + u16 max_rx_buffers; + u16 max_tx_buffers; + u16 mtu; + u8 mac_address[ETH_ALEN]; + struct qed_roce_ll2_cbs cbs; + void *cb_cookie; +}; + +struct qed_roce_ll2_info { + u8 handle; + struct qed_roce_ll2_cbs cbs; + u8 mac_address[ETH_ALEN]; + void *cb_cookie; + + /* Lock to protect ll2 */ + struct mutex lock; +}; + +enum qed_rdma_type { + QED_RDMA_TYPE_ROCE, +}; + +struct qed_dev_rdma_info { + struct qed_dev_info common; + enum qed_rdma_type rdma_type; +}; + +struct qed_rdma_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_rdma_info *info); + void *(*rdma_get_rdma_ctx)(struct qed_dev *cdev); + + int (*rdma_init)(struct qed_dev *dev, + struct qed_rdma_start_in_params *iparams); + + int (*rdma_add_user)(void *rdma_cxt, + struct qed_rdma_add_user_out_params *oparams); + + void (*rdma_remove_user)(void *rdma_cxt, u16 dpi); + int (*rdma_stop)(void *rdma_cxt); + struct qed_rdma_device* (*rdma_query_device)(void *rdma_cxt); + struct qed_rdma_port* (*rdma_query_port)(void *rdma_cxt); + int (*rdma_get_start_sb)(struct qed_dev *cdev); + int (*rdma_get_min_cnq_msix)(struct qed_dev *cdev); + void (*rdma_cnq_prod_update)(void *rdma_cxt, u8 cnq_index, u16 prod); + int (*rdma_get_rdma_int)(struct qed_dev *cdev, + struct qed_int_info *info); + int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt); + int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd); + void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd); + int (*rdma_create_cq)(void *rdma_cxt, + struct qed_rdma_create_cq_in_params *params, + u16 *icid); + int (*rdma_destroy_cq)(void *rdma_cxt, + struct qed_rdma_destroy_cq_in_params *iparams, + struct qed_rdma_destroy_cq_out_params *oparams); + struct qed_rdma_qp * + (*rdma_create_qp)(void *rdma_cxt, + struct qed_rdma_create_qp_in_params *iparams, + struct qed_rdma_create_qp_out_params *oparams); + + int (*rdma_modify_qp)(void *roce_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_modify_qp_in_params *iparams); + + int (*rdma_query_qp)(void *rdma_cxt, struct qed_rdma_qp *qp, + struct qed_rdma_query_qp_out_params *oparams); + int (*rdma_destroy_qp)(void *rdma_cxt, struct qed_rdma_qp *qp); + int + (*rdma_register_tid)(void *rdma_cxt, + struct qed_rdma_register_tid_in_params *iparams); + int (*rdma_deregister_tid)(void *rdma_cxt, u32 itid); + int (*rdma_alloc_tid)(void *rdma_cxt, u32 *itid); + void (*rdma_free_tid)(void *rdma_cxt, u32 itid); + int (*roce_ll2_start)(struct qed_dev *cdev, + struct qed_roce_ll2_params *params); + int (*roce_ll2_stop)(struct qed_dev *cdev); + int (*roce_ll2_tx)(struct qed_dev *cdev, + struct qed_roce_ll2_packet *packet, + struct qed_roce_ll2_tx_params *params); + int (*roce_ll2_post_rx_buffer)(struct qed_dev *cdev, + struct qed_roce_ll2_buffer *buf, + u64 cookie, u8 notify_fw); + int (*roce_ll2_set_mac_filter)(struct qed_dev *cdev, + u8 *old_mac_address, + u8 *new_mac_address); + int (*roce_ll2_stats)(struct qed_dev *cdev, + struct qed_ll2_stats *stats); +}; + +const struct qed_rdma_ops *qed_get_rdma_ops(void); + +#endif diff --git a/include/linux/qed/qede_roce.h b/include/linux/qed/qede_roce.h new file mode 100644 index 000000000000..99fbe6d55acb --- /dev/null +++ b/include/linux/qed/qede_roce.h @@ -0,0 +1,88 @@ +/* QLogic qedr NIC Driver + * Copyright (c) 2015-2016 QLogic Corporation + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and /or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef QEDE_ROCE_H +#define QEDE_ROCE_H + +struct qedr_dev; +struct qed_dev; +struct qede_dev; + +enum qede_roce_event { + QEDE_UP, + QEDE_DOWN, + QEDE_CHANGE_ADDR, + QEDE_CLOSE +}; + +struct qede_roce_event_work { + struct list_head list; + struct work_struct work; + void *ptr; + enum qede_roce_event event; +}; + +struct qedr_driver { + unsigned char name[32]; + + struct qedr_dev* (*add)(struct qed_dev *, struct pci_dev *, + struct net_device *); + + void (*remove)(struct qedr_dev *); + void (*notify)(struct qedr_dev *, enum qede_roce_event); +}; + +/* APIs for RoCE driver to register callback handlers, + * which will be invoked when device is added, removed, ifup, ifdown + */ +int qede_roce_register_driver(struct qedr_driver *drv); +void qede_roce_unregister_driver(struct qedr_driver *drv); + +bool qede_roce_supported(struct qede_dev *dev); + +#if IS_ENABLED(CONFIG_INFINIBAND_QEDR) +int qede_roce_dev_add(struct qede_dev *dev); +void qede_roce_dev_event_open(struct qede_dev *dev); +void qede_roce_dev_event_close(struct qede_dev *dev); +void qede_roce_dev_remove(struct qede_dev *dev); +void qede_roce_event_changeaddr(struct qede_dev *qedr); +#else +static inline int qede_roce_dev_add(struct qede_dev *dev) +{ + return 0; +} + +static inline void qede_roce_dev_event_open(struct qede_dev *dev) {} +static inline void qede_roce_dev_event_close(struct qede_dev *dev) {} +static inline void qede_roce_dev_remove(struct qede_dev *dev) {} +static inline void qede_roce_event_changeaddr(struct qede_dev *qedr) {} +#endif +#endif diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index 187991c1f439..7663725faa94 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -28,6 +28,7 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index accba0e6b704..dc3889d1bbe6 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -11,6 +11,14 @@ #define TCP_INVALID_TIMEOUT_VAL -1 +struct ooo_opaque { + __le32 cid; + u8 drop_isle; + u8 drop_size; + u8 ooo_opcode; + u8 ooo_isle; +}; + enum tcp_connect_mode { TCP_CONNECT_ACTIVE, TCP_CONNECT_PASSIVE, @@ -18,14 +26,10 @@ enum tcp_connect_mode { }; struct tcp_init_params { - __le32 max_cwnd; - __le16 dup_ack_threshold; + __le32 two_msl_timer; __le16 tx_sws_timer; - __le16 min_rto; - __le16 min_rto_rt; - __le16 max_rto; u8 maxfinrt; - u8 reserved[1]; + u8 reserved[9]; }; enum tcp_ip_version { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3eef0802a0cd..5c132d3188be 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2015 Herbert Xu + * Copyright (c) 2015-2016 Herbert Xu * Copyright (c) 2014-2015 Thomas Graf * Copyright (c) 2008-2014 Patrick McHardy * @@ -53,6 +53,11 @@ struct rhash_head { struct rhash_head __rcu *next; }; +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -137,6 +142,7 @@ struct rhashtable_params { * @key_len: Key length for hashfn * @elasticity: Maximum chain length before rehash * @p: Configuration parameters + * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping * @lock: Spin lock to protect walker list @@ -147,11 +153,20 @@ struct rhashtable { unsigned int key_len; unsigned int elasticity; struct rhashtable_params p; + bool rhlist; struct work_struct run_work; struct mutex mutex; spinlock_t lock; }; +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + /** * struct rhashtable_walker - Hash table walker * @list: List entry on list of walkers @@ -163,9 +178,10 @@ struct rhashtable_walker { }; /** - * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * struct rhashtable_iter - Hash table iterator * @ht: Table to iterate through * @p: Current pointer + * @list: Current hash list pointer * @walker: Associated rhashtable walker * @slot: Current slot * @skip: Number of entries to skip in slot @@ -173,7 +189,8 @@ struct rhashtable_walker { struct rhashtable_iter { struct rhashtable *ht; struct rhash_head *p; - struct rhashtable_walker *walker; + struct rhlist_head *list; + struct rhashtable_walker walker; unsigned int slot; unsigned int skip; }; @@ -339,15 +356,14 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, int rhashtable_init(struct rhashtable *ht, const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *old_tbl); -int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj); -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp); +void rhashtable_walk_enter(struct rhashtable *ht, + struct rhashtable_iter *iter); void rhashtable_walk_exit(struct rhashtable_iter *iter); int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); void *rhashtable_walk_next(struct rhashtable_iter *iter); @@ -506,6 +522,31 @@ void rhashtable_destroy(struct rhashtable *ht); rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ tbl, hash, member) +/** + * rhl_for_each_rcu - iterate over rcu hash table list + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_rcu(pos, list) \ + for (pos = list; pos; pos = rcu_dereference_raw(pos->next)) + +/** + * rhl_for_each_entry_rcu - iterate over rcu hash table list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rlist_head to use as a loop cursor. + * @list: the head of the list + * @member: name of the &struct rlist_head within the hashable struct. + * + * This hash chain list-traversal primitive should be used on the + * list returned by rhltable_lookup. + */ +#define rhl_for_each_entry_rcu(tpos, pos, list, member) \ + for (pos = list; pos && rht_entry(tpos, pos, member); \ + pos = rcu_dereference_raw(pos->next)) + static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, const void *obj) { @@ -515,18 +556,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); } -/** - * rhashtable_lookup_fast - search hash table, inlined version - * @ht: hash table - * @key: the pointer to the key - * @params: hash table parameters - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * Returns the first entry on which the compare function returned true. - */ -static inline void *rhashtable_lookup_fast( +/* Internal function, do not use. */ +static inline struct rhash_head *__rhashtable_lookup( struct rhashtable *ht, const void *key, const struct rhashtable_params params) { @@ -538,8 +569,6 @@ static inline void *rhashtable_lookup_fast( struct rhash_head *he; unsigned int hash; - rcu_read_lock(); - tbl = rht_dereference_rcu(ht->tbl, ht); restart: hash = rht_key_hashfn(ht, tbl, key, params); @@ -548,8 +577,7 @@ restart: params.obj_cmpfn(&arg, rht_obj(ht, he)) : rhashtable_compare(&arg, rht_obj(ht, he))) continue; - rcu_read_unlock(); - return rht_obj(ht, he); + return he; } /* Ensure we see any new tables. */ @@ -558,89 +586,165 @@ restart: tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (unlikely(tbl)) goto restart; - rcu_read_unlock(); return NULL; } -/* Internal function, please use rhashtable_insert_fast() instead */ -static inline int __rhashtable_insert_fast( - struct rhashtable *ht, const void *key, struct rhash_head *obj, +/** + * rhashtable_lookup - search hash table + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * This must only be called under the RCU read lock. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup( + struct rhashtable *ht, const void *key, const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(ht, key, params); + + return he ? rht_obj(ht, he) : NULL; +} + +/** + * rhashtable_lookup_fast - search hash table, without RCU read lock + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Only use this function when you have other mechanisms guaranteeing + * that the object won't go away after the RCU read lock is released. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + void *obj; + + rcu_read_lock(); + obj = rhashtable_lookup(ht, key, params); + rcu_read_unlock(); + + return obj; +} + +/** + * rhltable_lookup - search hash list table + * @hlt: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. All matching entries are returned + * in a list. + * + * This must only be called under the RCU read lock. + * + * Returns the list of entries that match the given key. + */ +static inline struct rhlist_head *rhltable_lookup( + struct rhltable *hlt, const void *key, + const struct rhashtable_params params) +{ + struct rhash_head *he = __rhashtable_lookup(&hlt->ht, key, params); + + return he ? container_of(he, struct rhlist_head, rhead) : NULL; +} + +/* Internal function, please use rhashtable_insert_fast() instead. This + * function returns the existing element already in hashes in there is a clash, + * otherwise it returns an error via ERR_PTR(). + */ +static inline void *__rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) { struct rhashtable_compare_arg arg = { .ht = ht, .key = key, }; - struct bucket_table *tbl, *new_tbl; + struct rhash_head __rcu **pprev; + struct bucket_table *tbl; struct rhash_head *head; spinlock_t *lock; - unsigned int elasticity; unsigned int hash; - int err; + int elasticity; + void *data; -restart: rcu_read_lock(); tbl = rht_dereference_rcu(ht->tbl, ht); + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); - /* All insertions must grab the oldest table containing - * the hashed bucket that is yet to be rehashed. - */ - for (;;) { - hash = rht_head_hashfn(ht, tbl, obj, params); - lock = rht_bucket_lock(tbl, hash); - spin_lock_bh(lock); - - if (tbl->rehash <= hash) - break; - + if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { +slow_path: spin_unlock_bh(lock); - tbl = rht_dereference_rcu(tbl->future_tbl, ht); + rcu_read_unlock(); + return rhashtable_insert_slow(ht, key, obj); } - new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (unlikely(new_tbl)) { - tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); - if (!IS_ERR_OR_NULL(tbl)) - goto slow_path; + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *plist; + struct rhlist_head *list; - err = PTR_ERR(tbl); - goto out; + elasticity--; + if (!key || + (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + data = rht_obj(ht, head); + + if (!rhlist) + goto out; + + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + goto good; } - err = -E2BIG; + if (elasticity <= 0) + goto slow_path; + + data = ERR_PTR(-E2BIG); if (unlikely(rht_grow_above_max(ht, tbl))) goto out; - if (unlikely(rht_grow_above_100(ht, tbl))) { -slow_path: - spin_unlock_bh(lock); - err = rhashtable_insert_rehash(ht, tbl); - rcu_read_unlock(); - if (err) - return err; - - goto restart; - } - - err = -EEXIST; - elasticity = ht->elasticity; - rht_for_each(head, tbl, hash) { - if (key && - unlikely(!(params.obj_cmpfn ? - params.obj_cmpfn(&arg, rht_obj(ht, head)) : - rhashtable_compare(&arg, rht_obj(ht, head))))) - goto out; - if (!--elasticity) - goto slow_path; - } - - err = 0; + if (unlikely(rht_grow_above_100(ht, tbl))) + goto slow_path; head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); @@ -648,11 +752,14 @@ slow_path: if (rht_grow_above_75(ht, tbl)) schedule_work(&ht->run_work); +good: + data = NULL; + out: spin_unlock_bh(lock); rcu_read_unlock(); - return err; + return data; } /** @@ -675,7 +782,65 @@ static inline int rhashtable_insert_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - return __rhashtable_insert_fast(ht, NULL, obj, params); + void *ret; + + ret = __rhashtable_insert_fast(ht, NULL, obj, params, false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhltable_insert_key - insert object into hash list table + * @hlt: hash list table + * @key: the pointer to the key + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert_key( + struct rhltable *hlt, const void *key, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return PTR_ERR(__rhashtable_insert_fast(&hlt->ht, key, &list->rhead, + params, true)); +} + +/** + * rhltable_insert - insert object into hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhltable_insert( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + const char *key = rht_obj(&hlt->ht, &list->rhead); + + key += params.key_offset; + + return rhltable_insert_key(hlt, key, list, params); } /** @@ -704,11 +869,16 @@ static inline int rhashtable_lookup_insert_fast( const struct rhashtable_params params) { const char *key = rht_obj(ht, obj); + void *ret; BUG_ON(ht->p.obj_hashfn); - return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, - params); + ret = __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; } /** @@ -737,15 +907,42 @@ static inline int rhashtable_lookup_insert_key( struct rhashtable *ht, const void *key, struct rhash_head *obj, const struct rhashtable_params params) { + void *ret; + BUG_ON(!ht->p.obj_hashfn || !key); - return __rhashtable_insert_fast(ht, key, obj, params); + ret = __rhashtable_insert_fast(ht, key, obj, params, false); + if (IS_ERR(ret)) + return PTR_ERR(ret); + + return ret == NULL ? 0 : -EEXIST; +} + +/** + * rhashtable_lookup_get_insert_key - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * @data: pointer to element data already in hashes + * + * Just like rhashtable_lookup_insert_key(), but this function returns the + * object if it exists, NULL if it does not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params, false); } /* Internal function, please use rhashtable_remove_fast() instead */ -static inline int __rhashtable_remove_fast( +static inline int __rhashtable_remove_fast_one( struct rhashtable *ht, struct bucket_table *tbl, - struct rhash_head *obj, const struct rhashtable_params params) + struct rhash_head *obj, const struct rhashtable_params params, + bool rhlist) { struct rhash_head __rcu **pprev; struct rhash_head *he; @@ -760,18 +957,86 @@ static inline int __rhashtable_remove_fast( pprev = &tbl->buckets[hash]; rht_for_each(he, tbl, hash) { + struct rhlist_head *list; + + list = container_of(he, struct rhlist_head, rhead); + if (he != obj) { + struct rhlist_head __rcu **lpprev; + pprev = &he->next; - continue; + + if (!rhlist) + continue; + + do { + lpprev = &list->next; + list = rht_dereference_bucket(list->next, + tbl, hash); + } while (list && obj != &list->rhead); + + if (!list) + continue; + + list = rht_dereference_bucket(list->next, tbl, hash); + RCU_INIT_POINTER(*lpprev, list); + err = 0; + break; } - rcu_assign_pointer(*pprev, obj->next); - err = 0; + obj = rht_dereference_bucket(obj->next, tbl, hash); + err = 1; + + if (rhlist) { + list = rht_dereference_bucket(list->next, tbl, hash); + if (list) { + RCU_INIT_POINTER(list->rhead.next, obj); + obj = &list->rhead; + err = 0; + } + } + + rcu_assign_pointer(*pprev, obj); break; } spin_unlock_bh(lock); + if (err > 0) { + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + err = 0; + } + + return err; +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params, bool rhlist) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast_one(ht, tbl, obj, params, + rhlist)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + return err; } @@ -794,34 +1059,29 @@ static inline int rhashtable_remove_fast( struct rhashtable *ht, struct rhash_head *obj, const struct rhashtable_params params) { - struct bucket_table *tbl; - int err; + return __rhashtable_remove_fast(ht, obj, params, false); +} - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->tbl, ht); - - /* Because we have already taken (and released) the bucket - * lock in old_tbl, if we find that future_tbl is not yet - * visible then that guarantees the entry to still be in - * the old tbl if it exists. - */ - while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && - (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) - ; - - if (err) - goto out; - - atomic_dec(&ht->nelems); - if (unlikely(ht->p.automatic_shrinking && - rht_shrink_below_30(ht, tbl))) - schedule_work(&ht->run_work); - -out: - rcu_read_unlock(); - - return err; +/** + * rhltable_remove - remove object from hash list table + * @hlt: hash list table + * @list: pointer to hash list head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhltable_remove( + struct rhltable *hlt, struct rhlist_head *list, + const struct rhashtable_params params) +{ + return __rhashtable_remove_fast(&hlt->ht, &list->rhead, params, true); } /* Internal function, please use rhashtable_replace_fast() instead */ @@ -906,4 +1166,59 @@ static inline int rhashtable_replace_fast( return err; } +/* Obsolete function, do not use in new code. */ +static inline int rhashtable_walk_init(struct rhashtable *ht, + struct rhashtable_iter *iter, gfp_t gfp) +{ + rhashtable_walk_enter(ht, iter); + return 0; +} + +/** + * rhltable_walk_enter - Initialise an iterator + * @hlt: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit after this function returns. + */ +static inline void rhltable_walk_enter(struct rhltable *hlt, + struct rhashtable_iter *iter) +{ + return rhashtable_walk_enter(&hlt->ht, iter); +} + +/** + * rhltable_free_and_destroy - free elements and destroy hash list table + * @hlt: the hash list table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * See documentation for rhashtable_free_and_destroy. + */ +static inline void rhltable_free_and_destroy(struct rhltable *hlt, + void (*free_fn)(void *ptr, + void *arg), + void *arg) +{ + return rhashtable_free_and_destroy(&hlt->ht, free_fn, arg); +} + +static inline void rhltable_destroy(struct rhltable *hlt) +{ + return rhltable_free_and_destroy(hlt, NULL, NULL); +} + #endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 2daece8979f7..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -105,7 +105,7 @@ extern int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx); + int *idx); extern int ndo_dflt_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0f665cb26b50..9bf60b556bd2 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -612,7 +612,6 @@ static inline bool skb_mstamp_after(const struct skb_mstamp *t1, * @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS * @napi_id: id of the NAPI struct this skb came from * @secmark: security marking - * @offload_fwd_mark: fwding offload mark * @mark: Generic packet mark * @vlan_proto: vlan encapsulation protocol * @vlan_tci: vlan tag control information @@ -677,13 +676,23 @@ struct sk_buff { */ kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; + +/* if you move cloned around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define CLONED_MASK (1 << 7) +#else +#define CLONED_MASK 1 +#endif +#define CLONED_OFFSET() offsetof(struct sk_buff, __cloned_offset) + + __u8 __cloned_offset[0]; __u8 cloned:1, nohdr:1, fclone:2, peeked:1, head_frag:1, - xmit_more:1; - /* one bit hole */ + xmit_more:1, + __unused:1; /* one bit hole */ kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied @@ -730,7 +739,10 @@ struct sk_buff { __u8 ipvs_property:1; __u8 inner_protocol_type:1; __u8 remcsum_offload:1; - /* 3 or 5 bit hole */ +#ifdef CONFIG_NET_SWITCHDEV + __u8 offload_fwd_mark:1; +#endif + /* 2, 4 or 5 bit hole */ #ifdef CONFIG_NET_SCHED __u16 tc_index; /* traffic control index */ @@ -757,14 +769,9 @@ struct sk_buff { unsigned int sender_cpu; }; #endif - union { #ifdef CONFIG_NETWORK_SECMARK - __u32 secmark; + __u32 secmark; #endif -#ifdef CONFIG_NET_SWITCHDEV - __u32 offload_fwd_mark; -#endif - }; union { __u32 mark; @@ -2295,7 +2302,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) int ___pskb_trim(struct sk_buff *skb, unsigned int len); -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +static inline void __skb_set_length(struct sk_buff *skb, unsigned int len) { if (unlikely(skb_is_nonlinear(skb))) { WARN_ON(1); @@ -2305,6 +2312,11 @@ static inline void __skb_trim(struct sk_buff *skb, unsigned int len) skb_set_tail_pointer(skb, len); } +static inline void __skb_trim(struct sk_buff *skb, unsigned int len) +{ + __skb_set_length(skb, len); +} + void skb_trim(struct sk_buff *skb, unsigned int len); static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) @@ -2335,6 +2347,20 @@ static inline void pskb_trim_unique(struct sk_buff *skb, unsigned int len) BUG_ON(err); } +static inline int __skb_grow(struct sk_buff *skb, unsigned int len) +{ + unsigned int diff = len - skb->len; + + if (skb_tailroom(skb) < diff) { + int ret = pskb_expand_head(skb, 0, diff - skb_tailroom(skb), + GFP_ATOMIC); + if (ret) + return ret; + } + __skb_set_length(skb, len); + return 0; +} + /** * skb_orphan - orphan a buffer * @skb: buffer to orphan @@ -2386,6 +2412,8 @@ static inline void __skb_queue_purge(struct sk_buff_head *list) kfree_skb(skb); } +void skb_rbtree_purge(struct rb_root *root); + void *netdev_alloc_frag(unsigned int fragsz); struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int length, @@ -2938,6 +2966,21 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) return __pskb_trim(skb, len); } +static inline int __skb_trim_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + __skb_trim(skb, len); + return 0; +} + +static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) +{ + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_NONE; + return __skb_grow(skb, len); +} + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3042,6 +3085,7 @@ bool skb_gso_validate_mtu(const struct sk_buff *skb, unsigned int mtu); struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features); struct sk_buff *skb_vlan_untag(struct sk_buff *skb); int skb_ensure_writable(struct sk_buff *skb, int write_len); +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci); int skb_vlan_pop(struct sk_buff *skb); int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci); struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, @@ -3726,6 +3770,13 @@ static inline bool skb_is_gso_v6(const struct sk_buff *skb) return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; } +static inline void skb_gso_reset(struct sk_buff *skb) +{ + skb_shinfo(skb)->gso_size = 0; + skb_shinfo(skb)->gso_segs = 0; + skb_shinfo(skb)->gso_type = 0; +} + void __skb_warn_lro_forwarding(const struct sk_buff *skb); static inline bool skb_warn_if_lro(const struct sk_buff *skb) diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index a4f7203a9017..ecc3e07c6e63 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -25,6 +25,7 @@ #include #include #include +#include #include /* For the /proc/sys support */ @@ -159,6 +160,9 @@ struct ctl_table_root { struct ctl_table_set default_set; struct ctl_table_set *(*lookup)(struct ctl_table_root *root, struct nsproxy *namespaces); + void (*set_ownership)(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid); int (*permissions)(struct ctl_table_header *head, struct ctl_table *table); }; diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7be9b1242354..a17ae7b85218 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -19,6 +19,7 @@ #include +#include #include #include #include @@ -212,7 +213,8 @@ struct tcp_sock { u8 reord; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ - u8 unused; + u8 rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ + unused:7; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ thin_dupack : 1,/* Fast retransmit on first dupack */ @@ -234,9 +236,7 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ - struct rtt_meas { - u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ - } rtt_min[3]; + struct minmax rtt_min; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ @@ -268,6 +268,12 @@ struct tcp_sock { * receiver in Recovery. */ u32 prr_out; /* Total number of pkts sent during Recovery. */ u32 delivered; /* Total data packets delivered incl. rexmits */ + u32 lost; /* Total data packets lost incl. rexmits */ + u32 app_limited; /* limited until "delivered" reaches this val */ + struct skb_mstamp first_tx_mstamp; /* start of window send phase */ + struct skb_mstamp delivered_mstamp; /* time we reached "delivered" */ + u32 rate_delivered; /* saved rate sample: packets delivered */ + u32 rate_interval_us; /* saved rate sample: time elapsed */ u32 rcv_wnd; /* Current receiver window */ u32 write_seq; /* Tail(+1) of data held in tcp send buffer */ @@ -281,10 +287,9 @@ struct tcp_sock { struct sk_buff* lost_skb_hint; struct sk_buff *retransmit_skb_hint; - /* OOO segments go in this list. Note that socket lock must be held, - * as we do not use sk_buff_head lock. - */ - struct sk_buff_head out_of_order_queue; + /* OOO segments go in this rbtree. Socket lock must be held. */ + struct rb_root out_of_order_queue; + struct sk_buff *ooo_last_skb; /* cache rb_last(out_of_order_queue) */ /* SACKs data, these 2 need to be together (see tcp_options_write) */ struct tcp_sack_block duplicate_sack[1]; /* D-SACK block */ diff --git a/include/linux/win_minmax.h b/include/linux/win_minmax.h new file mode 100644 index 000000000000..56569604278f --- /dev/null +++ b/include/linux/win_minmax.h @@ -0,0 +1,37 @@ +/** + * lib/minmax.c: windowed min/max tracker by Kathleen Nichols. + * + */ +#ifndef MINMAX_H +#define MINMAX_H + +#include + +/* A single data point for our parameterized min-max tracker */ +struct minmax_sample { + u32 t; /* time measurement was taken */ + u32 v; /* value measured */ +}; + +/* State for the parameterized min-max tracker */ +struct minmax { + struct minmax_sample s[3]; +}; + +static inline u32 minmax_get(const struct minmax *m) +{ + return m->s[0].v; +} + +static inline u32 minmax_reset(struct minmax *m, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + m->s[2] = m->s[1] = m->s[0] = val; + return m->s[0].v; +} + +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas); +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas); + +#endif diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 9826d3a9464c..f2d072787947 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -1,8 +1,9 @@ #ifndef _ADDRCONF_H #define _ADDRCONF_H -#define MAX_RTR_SOLICITATIONS 3 +#define MAX_RTR_SOLICITATIONS -1 /* unlimited */ #define RTR_SOLICITATION_INTERVAL (4*HZ) +#define RTR_SOLICITATION_MAX_INTERVAL (3600*HZ) /* 1 hour */ #define MIN_VALID_LIFETIME (2*3600) /* 2 hours */ diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h index 7b0f88699b25..1061a472a3e3 100644 --- a/include/net/af_rxrpc.h +++ b/include/net/af_rxrpc.h @@ -12,42 +12,39 @@ #ifndef _NET_RXRPC_H #define _NET_RXRPC_H -#include #include +struct key; +struct sock; +struct socket; struct rxrpc_call; -/* - * the mark applied to socket buffers that may be intercepted - */ -enum rxrpc_skb_mark { - RXRPC_SKB_MARK_DATA, /* data message */ - RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ - RXRPC_SKB_MARK_BUSY, /* server busy message */ - RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ - RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ - RXRPC_SKB_MARK_NET_ERROR, /* network error message */ - RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ - RXRPC_SKB_MARK_NEW_CALL, /* local error message */ -}; +typedef void (*rxrpc_notify_rx_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_notify_new_call_t)(struct sock *, struct rxrpc_call *, + unsigned long); +typedef void (*rxrpc_discard_new_call_t)(struct rxrpc_call *, unsigned long); +typedef void (*rxrpc_user_attach_call_t)(struct rxrpc_call *, unsigned long); -typedef void (*rxrpc_interceptor_t)(struct sock *, unsigned long, - struct sk_buff *); -void rxrpc_kernel_intercept_rx_messages(struct socket *, rxrpc_interceptor_t); +void rxrpc_kernel_new_call_notification(struct socket *, + rxrpc_notify_new_call_t, + rxrpc_discard_new_call_t); struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *, struct sockaddr_rxrpc *, struct key *, unsigned long, - gfp_t); -int rxrpc_kernel_send_data(struct rxrpc_call *, struct msghdr *, size_t); -void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); -void rxrpc_kernel_abort_call(struct rxrpc_call *, u32); -void rxrpc_kernel_end_call(struct rxrpc_call *); -bool rxrpc_kernel_is_data_last(struct sk_buff *); -u32 rxrpc_kernel_get_abort_code(struct sk_buff *); -int rxrpc_kernel_get_error_number(struct sk_buff *); -void rxrpc_kernel_free_skb(struct sk_buff *); -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *, unsigned long); -int rxrpc_kernel_reject_call(struct socket *); + gfp_t, + rxrpc_notify_rx_t); +int rxrpc_kernel_send_data(struct socket *, struct rxrpc_call *, + struct msghdr *, size_t); +int rxrpc_kernel_recv_data(struct socket *, struct rxrpc_call *, + void *, size_t, size_t *, bool, u32 *); +void rxrpc_kernel_abort_call(struct socket *, struct rxrpc_call *, + u32, int, const char *); +void rxrpc_kernel_end_call(struct socket *, struct rxrpc_call *); +void rxrpc_kernel_get_peer(struct socket *, struct rxrpc_call *, + struct sockaddr_rxrpc *); +int rxrpc_kernel_charge_accept(struct socket *, rxrpc_notify_rx_t, + rxrpc_user_attach_call_t, unsigned long, gfp_t); #endif /* _NET_RXRPC_H */ diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index bfd1590821d6..0a1e21d7bce1 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -29,7 +29,8 @@ #include #include -#define BT_SUBSYS_VERSION "2.21" +#define BT_SUBSYS_VERSION 2 +#define BT_SUBSYS_REVISION 22 #ifndef AF_BLUETOOTH #define AF_BLUETOOTH 31 @@ -371,6 +372,7 @@ void hci_sock_set_flag(struct sock *sk, int nr); void hci_sock_clear_flag(struct sock *sk, int nr); int hci_sock_test_flag(struct sock *sk, int nr); unsigned short hci_sock_get_channel(struct sock *sk); +u32 hci_sock_get_cookie(struct sock *sk); int hci_sock_init(void); void hci_sock_cleanup(void); diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 003b25283407..99aa5e5e3100 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -63,6 +63,7 @@ #define HCI_SDIO 6 #define HCI_SPI 7 #define HCI_I2C 8 +#define HCI_SMD 9 /* HCI controller types */ #define HCI_PRIMARY 0x00 @@ -207,7 +208,11 @@ enum { HCI_MGMT_INDEX_EVENTS, HCI_MGMT_UNCONF_INDEX_EVENTS, HCI_MGMT_EXT_INDEX_EVENTS, - HCI_MGMT_GENERIC_EVENTS, + HCI_MGMT_EXT_INFO_EVENTS, + HCI_MGMT_OPTION_EVENTS, + HCI_MGMT_SETTING_EVENTS, + HCI_MGMT_DEV_CLASS_EVENTS, + HCI_MGMT_LOCAL_NAME_EVENTS, HCI_MGMT_OOB_DATA_EVENTS, }; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index ee7fc47680a1..f00bf667ec33 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -211,6 +211,7 @@ struct hci_dev { __u8 dev_name[HCI_MAX_NAME_LENGTH]; __u8 short_name[HCI_MAX_SHORT_NAME_LENGTH]; __u8 eir[HCI_MAX_EIR_LENGTH]; + __u16 appearance; __u8 dev_class[3]; __u8 major_class; __u8 minor_class; @@ -399,7 +400,9 @@ struct hci_dev { struct delayed_work rpa_expired; bdaddr_t rpa; +#if IS_ENABLED(CONFIG_BT_LEDS) struct led_trigger *power_led; +#endif int (*open)(struct hci_dev *hdev); int (*close)(struct hci_dev *hdev); @@ -1026,8 +1029,8 @@ int hci_resume_dev(struct hci_dev *hdev); int hci_reset_dev(struct hci_dev *hdev); int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb); int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb); -void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); -void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...); +__printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...); int hci_dev_open(__u16 dev); int hci_dev_close(__u16 dev); int hci_dev_do_close(struct hci_dev *hdev); @@ -1404,6 +1407,9 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb); void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, int flag, struct sock *skip_sk); void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb); +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk); void hci_sock_dev_event(struct hci_dev *hdev, int event); @@ -1449,6 +1455,7 @@ void hci_mgmt_chan_unregister(struct hci_mgmt_chan *c); #define DISCOV_BREDR_INQUIRY_LEN 0x08 #define DISCOV_LE_RESTART_DELAY msecs_to_jiffies(200) /* msec */ +void mgmt_fill_version_info(void *ver); int mgmt_new_settings(struct hci_dev *hdev); void mgmt_index_added(struct hci_dev *hdev); void mgmt_index_removed(struct hci_dev *hdev); diff --git a/include/net/bluetooth/hci_mon.h b/include/net/bluetooth/hci_mon.h index 587d0131b349..240786b04a46 100644 --- a/include/net/bluetooth/hci_mon.h +++ b/include/net/bluetooth/hci_mon.h @@ -45,6 +45,10 @@ struct hci_mon_hdr { #define HCI_MON_VENDOR_DIAG 11 #define HCI_MON_SYSTEM_NOTE 12 #define HCI_MON_USER_LOGGING 13 +#define HCI_MON_CTRL_OPEN 14 +#define HCI_MON_CTRL_CLOSE 15 +#define HCI_MON_CTRL_COMMAND 16 +#define HCI_MON_CTRL_EVENT 17 struct hci_mon_new_index { __u8 type; diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h index 7647964b1efa..72a456bbbcd5 100644 --- a/include/net/bluetooth/mgmt.h +++ b/include/net/bluetooth/mgmt.h @@ -586,6 +586,24 @@ struct mgmt_rp_get_adv_size_info { #define MGMT_OP_START_LIMITED_DISCOVERY 0x0041 +#define MGMT_OP_READ_EXT_INFO 0x0042 +#define MGMT_READ_EXT_INFO_SIZE 0 +struct mgmt_rp_read_ext_info { + bdaddr_t bdaddr; + __u8 version; + __le16 manufacturer; + __le32 supported_settings; + __le32 current_settings; + __le16 eir_len; + __u8 eir[0]; +} __packed; + +#define MGMT_OP_SET_APPEARANCE 0x0043 +struct mgmt_cp_set_appearance { + __u16 appearance; +} __packed; +#define MGMT_SET_APPEARANCE_SIZE 2 + #define MGMT_EV_CMD_COMPLETE 0x0001 struct mgmt_ev_cmd_complete { __le16 opcode; @@ -800,3 +818,9 @@ struct mgmt_ev_advertising_added { struct mgmt_ev_advertising_removed { __u8 instance; } __packed; + +#define MGMT_EV_EXT_INFO_CHANGED 0x0025 +struct mgmt_ev_ext_info_changed { + __le16 eir_len; + __u8 eir[0]; +} __packed; diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index beb7610d64e9..fe78f02a242e 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5,7 +5,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2016 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -593,6 +593,8 @@ struct survey_info { s8 noise; }; +#define CFG80211_MAX_WEP_KEYS 4 + /** * struct cfg80211_crypto_settings - Crypto settings * @wpa_versions: indicates which, if any, WPA versions are enabled @@ -610,6 +612,9 @@ struct survey_info { * allowed through even on unauthorized ports * @control_port_no_encrypt: TRUE to prevent encryption of control port * protocol frames. + * @wep_keys: static WEP keys, if not NULL points to an array of + * CFG80211_MAX_WEP_KEYS WEP keys + * @wep_tx_key: key index (0..3) of the default TX static WEP key */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -621,6 +626,8 @@ struct cfg80211_crypto_settings { bool control_port; __be16 control_port_ethertype; bool control_port_no_encrypt; + struct key_params *wep_keys; + int wep_tx_key; }; /** @@ -676,6 +683,18 @@ struct cfg80211_acl_data { struct mac_address mac_addrs[]; }; +/* + * cfg80211_bitrate_mask - masks for bitrate control + */ +struct cfg80211_bitrate_mask { + struct { + u32 legacy; + u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; + u16 vht_mcs[NL80211_VHT_NSS_MAX]; + enum nl80211_txrate_gi gi; + } control[NUM_NL80211_BANDS]; +}; + /** * struct cfg80211_ap_settings - AP configuration * @@ -700,6 +719,7 @@ struct cfg80211_acl_data { * MAC address based access control * @pbss: If set, start as a PCP instead of AP. Relevant for DMG * networks. + * @beacon_rate: bitrate to be used for beacons */ struct cfg80211_ap_settings { struct cfg80211_chan_def chandef; @@ -719,6 +739,7 @@ struct cfg80211_ap_settings { bool p2p_opp_ps; const struct cfg80211_acl_data *acl; bool pbss; + struct cfg80211_bitrate_mask beacon_rate; }; /** @@ -1351,6 +1372,7 @@ struct mesh_config { * @beacon_interval: beacon interval to use * @mcast_rate: multicat rate for Mesh Node [6Mbps is the default for 802.11a] * @basic_rates: basic rates to use when creating the mesh + * @beacon_rate: bitrate to be used for beacons * * These parameters are fixed when the mesh is created. */ @@ -1371,6 +1393,7 @@ struct mesh_setup { u16 beacon_interval; int mcast_rate[NUM_NL80211_BANDS]; u32 basic_rates; + struct cfg80211_bitrate_mask beacon_rate; }; /** @@ -2010,17 +2033,6 @@ enum wiphy_params_flags { WIPHY_PARAM_DYN_ACK = 1 << 5, }; -/* - * cfg80211_bitrate_mask - masks for bitrate control - */ -struct cfg80211_bitrate_mask { - struct { - u32 legacy; - u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN]; - u16 vht_mcs[NL80211_VHT_NSS_MAX]; - enum nl80211_txrate_gi gi; - } control[NUM_NL80211_BANDS]; -}; /** * struct cfg80211_pmksa - PMK Security Association * @@ -2301,6 +2313,98 @@ struct cfg80211_qos_map { struct cfg80211_dscp_range up[8]; }; +/** + * struct cfg80211_nan_conf - NAN configuration + * + * This struct defines NAN configuration parameters + * + * @master_pref: master preference (1 - 255) + * @dual: dual band operation mode, see &enum nl80211_nan_dual_band_conf + */ +struct cfg80211_nan_conf { + u8 master_pref; + u8 dual; +}; + +/** + * enum cfg80211_nan_conf_changes - indicates changed fields in NAN + * configuration + * + * @CFG80211_NAN_CONF_CHANGED_PREF: master preference + * @CFG80211_NAN_CONF_CHANGED_DUAL: dual band operation + */ +enum cfg80211_nan_conf_changes { + CFG80211_NAN_CONF_CHANGED_PREF = BIT(0), + CFG80211_NAN_CONF_CHANGED_DUAL = BIT(1), +}; + +/** + * struct cfg80211_nan_func_filter - a NAN function Rx / Tx filter + * + * @filter: the content of the filter + * @len: the length of the filter + */ +struct cfg80211_nan_func_filter { + const u8 *filter; + u8 len; +}; + +/** + * struct cfg80211_nan_func - a NAN function + * + * @type: &enum nl80211_nan_function_type + * @service_id: the service ID of the function + * @publish_type: &nl80211_nan_publish_type + * @close_range: if true, the range should be limited. Threshold is + * implementation specific. + * @publish_bcast: if true, the solicited publish should be broadcasted + * @subscribe_active: if true, the subscribe is active + * @followup_id: the instance ID for follow up + * @followup_reqid: the requestor instance ID for follow up + * @followup_dest: MAC address of the recipient of the follow up + * @ttl: time to live counter in DW. + * @serv_spec_info: Service Specific Info + * @serv_spec_info_len: Service Specific Info length + * @srf_include: if true, SRF is inclusive + * @srf_bf: Bloom Filter + * @srf_bf_len: Bloom Filter length + * @srf_bf_idx: Bloom Filter index + * @srf_macs: SRF MAC addresses + * @srf_num_macs: number of MAC addresses in SRF + * @rx_filters: rx filters that are matched with corresponding peer's tx_filter + * @tx_filters: filters that should be transmitted in the SDF. + * @num_rx_filters: length of &rx_filters. + * @num_tx_filters: length of &tx_filters. + * @instance_id: driver allocated id of the function. + * @cookie: unique NAN function identifier. + */ +struct cfg80211_nan_func { + enum nl80211_nan_function_type type; + u8 service_id[NL80211_NAN_FUNC_SERVICE_ID_LEN]; + u8 publish_type; + bool close_range; + bool publish_bcast; + bool subscribe_active; + u8 followup_id; + u8 followup_reqid; + struct mac_address followup_dest; + u32 ttl; + const u8 *serv_spec_info; + u8 serv_spec_info_len; + bool srf_include; + const u8 *srf_bf; + u8 srf_bf_len; + u8 srf_bf_idx; + struct mac_address *srf_macs; + int srf_num_macs; + struct cfg80211_nan_func_filter *rx_filters; + struct cfg80211_nan_func_filter *tx_filters; + u8 num_tx_filters; + u8 num_rx_filters; + u8 instance_id; + u64 cookie; +}; + /** * struct cfg80211_ops - backend description for wireless configuration * @@ -2432,7 +2536,8 @@ struct cfg80211_qos_map { * cases, the result of roaming is indicated with a call to * cfg80211_roamed() or cfg80211_roamed_bss(). * (invoked with the wireless_dev mutex held) - * @disconnect: Disconnect from the BSS/ESS. + * @disconnect: Disconnect from the BSS/ESS. Once done, call + * cfg80211_disconnected(). * (invoked with the wireless_dev mutex held) * * @join_ibss: Join the specified IBSS (or create if necessary). Once done, call @@ -2588,6 +2693,19 @@ struct cfg80211_qos_map { * and returning to the base channel for communication with the AP. * @tdls_cancel_channel_switch: Stop channel-switching with a TDLS peer. Both * peers must be on the base channel when the call completes. + * @start_nan: Start the NAN interface. + * @stop_nan: Stop the NAN interface. + * @add_nan_func: Add a NAN function. Returns negative value on failure. + * On success @nan_func ownership is transferred to the driver and + * it may access it outside of the scope of this function. The driver + * should free the @nan_func when no longer needed by calling + * cfg80211_free_nan_func(). + * On success the driver should assign an instance_id in the + * provided @nan_func. + * @del_nan_func: Delete a NAN function. + * @nan_change_conf: changes NAN configuration. The changed parameters must + * be specified in @changes (using &enum cfg80211_nan_conf_changes); + * All other parameters must be ignored. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow); @@ -2853,6 +2971,17 @@ struct cfg80211_ops { void (*tdls_cancel_channel_switch)(struct wiphy *wiphy, struct net_device *dev, const u8 *addr); + int (*start_nan)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf); + void (*stop_nan)(struct wiphy *wiphy, struct wireless_dev *wdev); + int (*add_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie); + int (*nan_change_conf)(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes); }; /* @@ -2899,6 +3028,8 @@ struct cfg80211_ops { * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels. * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in * beaconing mode (AP, IBSS, Mesh, ...). + * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation + * before connection. */ enum wiphy_flags { /* use hole at 0 */ @@ -2924,6 +3055,7 @@ enum wiphy_flags { WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL = BIT(21), WIPHY_FLAG_SUPPORTS_5_10_MHZ = BIT(22), WIPHY_FLAG_HAS_CHANNEL_SWITCH = BIT(23), + WIPHY_FLAG_HAS_STATIC_WEP = BIT(24), }; /** @@ -3301,6 +3433,8 @@ struct wiphy_iftype_ext_capab { * @bss_select_support: bitmask indicating the BSS selection criteria supported * by the driver in the .connect() callback. The bit position maps to the * attribute indices defined in &enum nl80211_bss_select_attr. + * + * @cookie_counter: unique generic cookie counter, used to identify objects. */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -3430,6 +3564,8 @@ struct wiphy { u32 bss_select_support; + u64 cookie_counter; + char priv[0] __aligned(NETDEV_ALIGN); }; @@ -3610,6 +3746,7 @@ struct cfg80211_cached_keys; * beacons, 0 when not valid * @address: The address for this device, valid only if @netdev is %NULL * @p2p_started: true if this is a P2P Device that has been started + * @nan_started: true if this is a NAN interface that has been started * @cac_started: true if DFS channel availability check has been started * @cac_start_time: timestamp (jiffies) when the dfs state was entered. * @cac_time_ms: CAC time in ms @@ -3641,7 +3778,7 @@ struct wireless_dev { struct mutex mtx; - bool use_4addr, p2p_started; + bool use_4addr, p2p_started, nan_started; u8 address[ETH_ALEN] __aligned(sizeof(u16)); @@ -3954,6 +4091,34 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list, unsigned int cfg80211_classify8021d(struct sk_buff *skb, struct cfg80211_qos_map *qos_map); +/** + * cfg80211_find_ie_match - match information element and byte array in data + * + * @eid: element ID + * @ies: data consisting of IEs + * @len: length of data + * @match: byte array to match + * @match_len: number of bytes in the match array + * @match_offset: offset in the IE where the byte array should match. + * If match_len is zero, this must also be set to zero. + * Otherwise this must be set to 2 or more, because the first + * byte is the element id, which is already compared to eid, and + * the second byte is the IE length. + * + * Return: %NULL if the element ID could not be found or if + * the element is invalid (claims to be longer than the given + * data) or if the byte array doesn't match, or a pointer to the first + * byte of the requested element, that is the byte containing the + * element ID. + * + * Note: There are no checks on the element length other than + * having to fit into the given data and being large enough for the + * byte array to match. + */ +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset); + /** * cfg80211_find_ie - find information element in data * @@ -3969,7 +4134,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb, * Note: There are no checks on the element length other than * having to fit into the given data. */ -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); +static inline const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +{ + return cfg80211_find_ie_match(eid, ies, len, NULL, 0, 0); +} /** * cfg80211_find_vendor_ie - find vendor specific information element in data @@ -5518,6 +5686,67 @@ wiphy_ext_feature_isset(struct wiphy *wiphy, return (ft_byte & BIT(ftidx % 8)) != 0; } +/** + * cfg80211_free_nan_func - free NAN function + * @f: NAN function that should be freed + * + * Frees all the NAN function and all it's allocated members. + */ +void cfg80211_free_nan_func(struct cfg80211_nan_func *f); + +/** + * struct cfg80211_nan_match_params - NAN match parameters + * @type: the type of the function that triggered a match. If it is + * %NL80211_NAN_FUNC_SUBSCRIBE it means that we replied to a subscriber. + * If it is %NL80211_NAN_FUNC_PUBLISH, it means that we got a discovery + * result. + * If it is %NL80211_NAN_FUNC_FOLLOW_UP, we received a follow up. + * @inst_id: the local instance id + * @peer_inst_id: the instance id of the peer's function + * @addr: the MAC address of the peer + * @info_len: the length of the &info + * @info: the Service Specific Info from the peer (if any) + * @cookie: unique identifier of the corresponding function + */ +struct cfg80211_nan_match_params { + enum nl80211_nan_function_type type; + u8 inst_id; + u8 peer_inst_id; + const u8 *addr; + u8 info_len; + const u8 *info; + u64 cookie; +}; + +/** + * cfg80211_nan_match - report a match for a NAN function. + * @wdev: the wireless device reporting the match + * @match: match notification parameters + * @gfp: allocation flags + * + * This function reports that the a NAN function had a match. This + * can be a subscribe that had a match or a solicited publish that + * was sent. It can also be a follow up that was received. + */ +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp); + +/** + * cfg80211_nan_func_terminated - notify about NAN function termination. + * + * @wdev: the wireless device reporting the match + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @cookie: unique NAN function identifier + * @gfp: allocation flags + * + * This function reports that the a NAN function is terminated. + */ +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp); + /* ethtool helper */ void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info); diff --git a/include/net/devlink.h b/include/net/devlink.h index c99ffe8cef3c..211bd3c37028 100644 --- a/include/net/devlink.h +++ b/include/net/devlink.h @@ -50,7 +50,6 @@ struct devlink_sb_pool_info { }; struct devlink_ops { - size_t priv_size; int (*port_type_set)(struct devlink_port *devlink_port, enum devlink_port_type port_type); int (*port_split)(struct devlink *devlink, unsigned int port_index, diff --git a/include/net/dsa.h b/include/net/dsa.h index 2217a3f817f8..b122196d5a1f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -26,6 +26,7 @@ enum dsa_tag_protocol { DSA_TAG_PROTO_TRAILER, DSA_TAG_PROTO_EDSA, DSA_TAG_PROTO_BRCM, + DSA_TAG_PROTO_QCA, DSA_TAG_LAST, /* MUST BE LAST */ }; @@ -142,6 +143,7 @@ struct dsa_port { struct net_device *netdev; struct device_node *dn; unsigned int ageing_time; + u8 stp_state; }; struct dsa_switch { @@ -165,9 +167,9 @@ struct dsa_switch { struct dsa_chip_data *cd; /* - * The used switch driver. + * The switch operations. */ - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; /* * An array of which element [a] indicates which port on this @@ -234,19 +236,21 @@ static inline u8 dsa_upstream_port(struct dsa_switch *ds) struct switchdev_trans; struct switchdev_obj; struct switchdev_obj_port_fdb; +struct switchdev_obj_port_mdb; struct switchdev_obj_port_vlan; -struct dsa_switch_driver { +struct dsa_switch_ops { struct list_head list; - enum dsa_tag_protocol tag_protocol; - /* * Probing and setup. */ const char *(*probe)(struct device *dsa_dev, struct device *host_dev, int sw_addr, void **priv); + + enum dsa_tag_protocol (*get_tag_protocol)(struct dsa_switch *ds); + int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); u32 (*get_phy_flags)(struct dsa_switch *ds, int port); @@ -336,6 +340,7 @@ struct dsa_switch_driver { void (*port_bridge_leave)(struct dsa_switch *ds, int port); void (*port_stp_state_set)(struct dsa_switch *ds, int port, u8 state); + void (*port_fast_age)(struct dsa_switch *ds, int port); /* * VLAN support @@ -368,17 +373,27 @@ struct dsa_switch_driver { int (*port_fdb_dump)(struct dsa_switch *ds, int port, struct switchdev_obj_port_fdb *fdb, int (*cb)(struct switchdev_obj *obj)); + + /* + * Multicast database + */ + int (*port_mdb_prepare)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + void (*port_mdb_add)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans); + int (*port_mdb_del)(struct dsa_switch *ds, int port, + const struct switchdev_obj_port_mdb *mdb); + int (*port_mdb_dump)(struct dsa_switch *ds, int port, + struct switchdev_obj_port_mdb *mdb, + int (*cb)(struct switchdev_obj *obj)); }; -void register_switch_driver(struct dsa_switch_driver *type); -void unregister_switch_driver(struct dsa_switch_driver *type); +void register_switch_driver(struct dsa_switch_ops *type); +void unregister_switch_driver(struct dsa_switch_ops *type); struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); -static inline void *ds_to_priv(struct dsa_switch *ds) -{ - return ds->priv; -} - static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) { return dst->rcv != NULL; @@ -386,4 +401,18 @@ static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst) void dsa_unregister_switch(struct dsa_switch *ds); int dsa_register_switch(struct dsa_switch *ds, struct device_node *np); +#ifdef CONFIG_PM_SLEEP +int dsa_switch_suspend(struct dsa_switch *ds); +int dsa_switch_resume(struct dsa_switch *ds); +#else +static inline int dsa_switch_suspend(struct dsa_switch *ds) +{ + return 0; +} +static inline int dsa_switch_resume(struct dsa_switch *ds) +{ + return 0; +} +#endif /* CONFIG_PM_SLEEP */ + #endif diff --git a/include/net/dst_metadata.h b/include/net/dst_metadata.h index 5db9f5910428..6965c8f68ade 100644 --- a/include/net/dst_metadata.h +++ b/include/net/dst_metadata.h @@ -112,12 +112,13 @@ static inline struct ip_tunnel_info *skb_tunnel_info_unclone(struct sk_buff *skb return &dst->u.tun_info; } -static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, - __be16 flags, - __be64 tunnel_id, - int md_size) +static inline struct metadata_dst *__ip_tun_set_dst(__be32 saddr, + __be32 daddr, + __u8 tos, __u8 ttl, + __be16 flags, + __be64 tunnel_id, + int md_size) { - const struct iphdr *iph = ip_hdr(skb); struct metadata_dst *tun_dst; tun_dst = tun_rx_dst(md_size); @@ -125,17 +126,30 @@ static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, return NULL; ip_tunnel_key_init(&tun_dst->u.tun_info.key, - iph->saddr, iph->daddr, iph->tos, iph->ttl, + saddr, daddr, tos, ttl, 0, 0, 0, tunnel_id, flags); return tun_dst; } -static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, +static inline struct metadata_dst *ip_tun_rx_dst(struct sk_buff *skb, __be16 flags, __be64 tunnel_id, int md_size) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); + const struct iphdr *iph = ip_hdr(skb); + + return __ip_tun_set_dst(iph->saddr, iph->daddr, iph->tos, iph->ttl, + flags, tunnel_id, md_size); +} + +static inline struct metadata_dst *__ipv6_tun_set_dst(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u8 tos, __u8 ttl, + __be32 label, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ struct metadata_dst *tun_dst; struct ip_tunnel_info *info; @@ -150,14 +164,26 @@ static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, info->key.tp_src = 0; info->key.tp_dst = 0; - info->key.u.ipv6.src = ip6h->saddr; - info->key.u.ipv6.dst = ip6h->daddr; + info->key.u.ipv6.src = *saddr; + info->key.u.ipv6.dst = *daddr; - info->key.tos = ipv6_get_dsfield(ip6h); - info->key.ttl = ip6h->hop_limit; - info->key.label = ip6_flowlabel(ip6h); + info->key.tos = tos; + info->key.ttl = ttl; + info->key.label = label; return tun_dst; } +static inline struct metadata_dst *ipv6_tun_rx_dst(struct sk_buff *skb, + __be16 flags, + __be64 tunnel_id, + int md_size) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + + return __ipv6_tun_set_dst(&ip6h->saddr, &ip6h->daddr, + ipv6_get_dsfield(ip6h), ip6h->hop_limit, + ip6_flowlabel(ip6h), flags, tunnel_id, + md_size); +} #endif /* __NET_DST_METADATA_H */ diff --git a/include/net/flow.h b/include/net/flow.h index d47ef4bb5423..035aa7716967 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -34,8 +34,7 @@ struct flowi_common { __u8 flowic_flags; #define FLOWI_FLAG_ANYSRC 0x01 #define FLOWI_FLAG_KNOWN_NH 0x02 -#define FLOWI_FLAG_L3MDEV_SRC 0x04 -#define FLOWI_FLAG_SKIP_NH_OIF 0x08 +#define FLOWI_FLAG_SKIP_NH_OIF 0x04 __u32 flowic_secid; struct flowi_tunnel flowic_tun_key; }; diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h index d3d60dccd19f..d9534927d93b 100644 --- a/include/net/flow_dissector.h +++ b/include/net/flow_dissector.h @@ -32,8 +32,13 @@ struct flow_dissector_key_basic { }; struct flow_dissector_key_tags { - u32 vlan_id:12, - flow_label:20; + u32 flow_label; +}; + +struct flow_dissector_key_vlan { + u16 vlan_id:12, + vlan_priority:3; + u16 padding; }; struct flow_dissector_key_keyid { @@ -119,7 +124,7 @@ enum flow_dissector_key_id { FLOW_DISSECTOR_KEY_PORTS, /* struct flow_dissector_key_ports */ FLOW_DISSECTOR_KEY_ETH_ADDRS, /* struct flow_dissector_key_eth_addrs */ FLOW_DISSECTOR_KEY_TIPC_ADDRS, /* struct flow_dissector_key_tipc_addrs */ - FLOW_DISSECTOR_KEY_VLANID, /* struct flow_dissector_key_flow_tags */ + FLOW_DISSECTOR_KEY_VLAN, /* struct flow_dissector_key_flow_vlan */ FLOW_DISSECTOR_KEY_FLOW_LABEL, /* struct flow_dissector_key_flow_tags */ FLOW_DISSECTOR_KEY_GRE_KEYID, /* struct flow_dissector_key_keyid */ FLOW_DISSECTOR_KEY_MPLS_ENTROPY, /* struct flow_dissector_key_keyid */ @@ -148,6 +153,7 @@ struct flow_keys { #define FLOW_KEYS_HASH_START_FIELD basic struct flow_dissector_key_basic basic; struct flow_dissector_key_tags tags; + struct flow_dissector_key_vlan vlan; struct flow_dissector_key_keyid keyid; struct flow_dissector_key_ports ports; struct flow_dissector_key_addrs addrs; @@ -177,7 +183,7 @@ struct flow_keys_digest { void make_flow_keys_digest(struct flow_keys_digest *digest, const struct flow_keys *flow); -static inline bool flow_keys_have_l4(struct flow_keys *keys) +static inline bool flow_keys_have_l4(const struct flow_keys *keys) { return (keys->ports.ports || keys->tags.flow_label); } diff --git a/include/net/fq.h b/include/net/fq.h index 268b49049c37..6d8521a30c5c 100644 --- a/include/net/fq.h +++ b/include/net/fq.h @@ -72,9 +72,12 @@ struct fq { u32 flows_cnt; u32 perturbation; u32 limit; + u32 memory_limit; + u32 memory_usage; u32 quantum; u32 backlog; u32 overlimit; + u32 overmemory; u32 collisions; }; diff --git a/include/net/fq_impl.h b/include/net/fq_impl.h index 163f3ed0f05a..4e6131cd3f43 100644 --- a/include/net/fq_impl.h +++ b/include/net/fq_impl.h @@ -29,6 +29,7 @@ static struct sk_buff *fq_flow_dequeue(struct fq *fq, tin->backlog_packets--; flow->backlog -= skb->len; fq->backlog--; + fq->memory_usage -= skb->truesize; if (flow->backlog == 0) { list_del_init(&flow->backlogchain); @@ -154,6 +155,7 @@ static void fq_tin_enqueue(struct fq *fq, flow->backlog += skb->len; tin->backlog_bytes += skb->len; tin->backlog_packets++; + fq->memory_usage += skb->truesize; fq->backlog++; fq_recalc_backlog(fq, tin, flow); @@ -166,7 +168,7 @@ static void fq_tin_enqueue(struct fq *fq, __skb_queue_tail(&flow->queue, skb); - if (fq->backlog > fq->limit) { + if (fq->backlog > fq->limit || fq->memory_usage > fq->memory_limit) { flow = list_first_entry_or_null(&fq->backlogs, struct fq_flow, backlogchain); @@ -181,6 +183,8 @@ static void fq_tin_enqueue(struct fq *fq, flow->tin->overlimit++; fq->overlimit++; + if (fq->memory_usage > fq->memory_limit) + fq->overmemory++; } } @@ -251,6 +255,7 @@ static int fq_init(struct fq *fq, int flows_cnt) fq->perturbation = prandom_u32(); fq->quantum = 300; fq->limit = 8192; + fq->memory_limit = 16 << 20; /* 16 MBytes */ fq->flows = kcalloc(fq->flows_cnt, sizeof(fq->flows[0]), GFP_KERNEL); if (!fq->flows) diff --git a/include/net/gre.h b/include/net/gre.h index 73ea256eb7d7..d25d836c129b 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -7,7 +7,15 @@ struct gre_base_hdr { __be16 flags; __be16 protocol; -}; +} __packed; + +struct gre_full_hdr { + struct gre_base_hdr fixed_header; + __be16 csum; + __be16 reserved1; + __be32 key; + __be32 seq; +} __packed; #define GRE_HEADER_SECTION 4 #define GREPROTO_CISCO 0 diff --git a/include/net/ieee80211_radiotap.h b/include/net/ieee80211_radiotap.h index b0fd9476c538..ba07b9d8ed63 100644 --- a/include/net/ieee80211_radiotap.h +++ b/include/net/ieee80211_radiotap.h @@ -190,6 +190,10 @@ struct ieee80211_radiotap_header { * IEEE80211_RADIOTAP_VHT u16, u8, u8, u8[4], u8, u8, u16 * * Contains VHT information about this frame. + * + * IEEE80211_RADIOTAP_TIMESTAMP u64, u16, u8, u8 variable + * + * Contains timestamp information for this frame. */ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_TSFT = 0, @@ -214,6 +218,7 @@ enum ieee80211_radiotap_type { IEEE80211_RADIOTAP_MCS = 19, IEEE80211_RADIOTAP_AMPDU_STATUS = 20, IEEE80211_RADIOTAP_VHT = 21, + IEEE80211_RADIOTAP_TIMESTAMP = 22, /* valid in every it_present bitmap, even vendor namespaces */ IEEE80211_RADIOTAP_RADIOTAP_NAMESPACE = 29, @@ -321,6 +326,22 @@ enum ieee80211_radiotap_type { #define IEEE80211_RADIOTAP_CODING_LDPC_USER2 0x04 #define IEEE80211_RADIOTAP_CODING_LDPC_USER3 0x08 +/* For IEEE80211_RADIOTAP_TIMESTAMP */ +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MASK 0x000F +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_MS 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_US 0x0001 +#define IEEE80211_RADIOTAP_TIMESTAMP_UNIT_NS 0x0003 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_MASK 0x00F0 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_BEGIN_MDPU 0x0000 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_MPDU 0x0010 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_EO_PPDU 0x0020 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_PLCP_SIG_ACQ 0x0030 +#define IEEE80211_RADIOTAP_TIMESTAMP_SPOS_UNKNOWN 0x00F0 + +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_64BIT 0x00 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT 0x01 +#define IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY 0x02 + /* helpers */ static inline int ieee80211_get_radiotap_len(unsigned char *data) { diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index 1c8b6820b694..515352c6280a 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -201,6 +201,7 @@ struct inet6_dev { struct ipv6_devstat stats; struct timer_list rs_timer; + __s32 rs_interval; /* in jiffies */ __u8 rs_probes; __u8 addr_gen_mode; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 49dcad4fe99e..197a30d221e9 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -134,8 +134,8 @@ struct inet_connection_sock { } icsk_mtup; u32 icsk_user_timeout; - u64 icsk_ca_priv[64 / sizeof(u64)]; -#define ICSK_CA_PRIV_SIZE (8 * sizeof(u64)) + u64 icsk_ca_priv[88 / sizeof(u64)]; +#define ICSK_CA_PRIV_SIZE (11 * sizeof(u64)) }; #define ICSK_TIME_RETRANS 1 /* Retransmit timer */ diff --git a/include/net/ip.h b/include/net/ip.h index 9742b92dc933..bc43c0fcae12 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -219,6 +219,29 @@ static inline u64 snmp_fold_field64(void __percpu *mib, int offt, size_t syncp_o } #endif +#define snmp_get_cpu_field64_batch(buff64, stats_list, mib_statistic, offset) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff64[i] += snmp_get_cpu_field64( \ + mib_statistic, \ + c, stats_list[i].entry, \ + offset); \ + } \ +} + +#define snmp_get_cpu_field_batch(buff, stats_list, mib_statistic) \ +{ \ + int i, c; \ + for_each_possible_cpu(c) { \ + for (i = 0; stats_list[i].name; i++) \ + buff[i] += snmp_get_cpu_field( \ + mib_statistic, \ + c, stats_list[i].entry); \ + } \ +} + void inet_get_local_port_range(struct net *net, int *low, int *high); #ifdef CONFIG_SYSCTL diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d97305d0e71f..e0cd318d5103 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -64,6 +64,9 @@ static inline bool rt6_need_strict(const struct in6_addr *daddr) } void ip6_route_input(struct sk_buff *skb); +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags); struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags); diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 43a5a0e4524c..20ed9699fcd4 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -23,6 +23,7 @@ struct __ip6_tnl_parm { __u8 proto; /* tunnel protocol */ __u8 encap_limit; /* encapsulation limit for tunnel */ __u8 hop_limit; /* hop limit for tunnel */ + bool collect_md; __be32 flowinfo; /* traffic class and flowlabel for tunnel */ __u32 flags; /* tunnel flags */ struct in6_addr laddr; /* local tunnel end-point address */ diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 7d4a72e75f33..b9314b48e39f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -22,6 +22,7 @@ #include #include #include +#include struct fib_config { u8 fc_dst_len; @@ -122,6 +123,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_weight; #endif + unsigned int fib_offload_cnt; struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev @@ -173,6 +175,18 @@ struct fib_result_nl { __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); +static inline void fib_info_offload_inc(struct fib_info *fi) +{ + fi->fib_offload_cnt++; + fi->fib_flags |= RTNH_F_OFFLOAD; +} + +static inline void fib_info_offload_dec(struct fib_info *fi) +{ + if (--fi->fib_offload_cnt == 0) + fi->fib_flags &= ~RTNH_F_OFFLOAD; +} + #define FIB_RES_SADDR(net, res) \ ((FIB_RES_NH(res).nh_saddr_genid == \ atomic_read(&(net)->ipv4.dev_addr_genid)) ? \ @@ -185,6 +199,33 @@ __be32 fib_info_update_nh_saddr(struct net *net, struct fib_nh *nh); #define FIB_RES_PREFSRC(net, res) ((res).fi->fib_prefsrc ? : \ FIB_RES_SADDR(net, res)) +struct fib_notifier_info { + struct net *net; +}; + +struct fib_entry_notifier_info { + struct fib_notifier_info info; /* must be first */ + u32 dst; + int dst_len; + struct fib_info *fi; + u8 tos; + u8 type; + u32 tb_id; + u32 nlflags; +}; + +enum fib_event_type { + FIB_EVENT_ENTRY_ADD, + FIB_EVENT_ENTRY_DEL, + FIB_EVENT_RULE_ADD, + FIB_EVENT_RULE_DEL, +}; + +int register_fib_notifier(struct notifier_block *nb); +int unregister_fib_notifier(struct notifier_block *nb); +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info); + struct fib_table { struct hlist_node tb_hlist; u32 tb_id; @@ -196,13 +237,12 @@ struct fib_table { int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp, struct fib_result *res, int fib_flags); -int fib_table_insert(struct fib_table *, struct fib_config *); -int fib_table_delete(struct fib_table *, struct fib_config *); +int fib_table_insert(struct net *, struct fib_table *, struct fib_config *); +int fib_table_delete(struct net *, struct fib_table *, struct fib_config *); int fib_table_dump(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); -int fib_table_flush(struct fib_table *table); +int fib_table_flush(struct net *net, struct fib_table *table); struct fib_table *fib_trie_unmerge(struct fib_table *main_tb); -void fib_table_flush_external(struct fib_table *table); void fib_free_table(struct fib_table *tb); #ifndef CONFIG_IP_MULTIPLE_TABLES @@ -315,7 +355,6 @@ static inline int fib_num_tclassid_users(struct net *net) } #endif int fib_unmerge(struct net *net); -void fib_flush_external(struct net *net); /* Exported by fib_semantics.c */ int ip_fib_check_default(__be32 gw, struct net_device *dev); diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h index a5e7035fb93f..59557c07904b 100644 --- a/include/net/ip_tunnels.h +++ b/include/net/ip_tunnels.h @@ -222,6 +222,25 @@ static inline unsigned short ip_tunnel_info_af(const struct ip_tunnel_info return tun_info->mode & IP_TUNNEL_INFO_IPV6 ? AF_INET6 : AF_INET; } +static inline __be64 key32_to_tunnel_id(__be32 key) +{ +#ifdef __BIG_ENDIAN + return (__force __be64)key; +#else + return (__force __be64)((__force u64)key << 32); +#endif +} + +/* Returns the least-significant 32 bits of a __be64. */ +static inline __be32 tunnel_id_to_key32(__be64 tun_id) +{ +#ifdef __BIG_ENDIAN + return (__force __be32)tun_id; +#else + return (__force __be32)((__force u64)tun_id >> 32); +#endif +} + #ifdef CONFIG_INET int ip_tunnel_init(struct net_device *dev); @@ -236,6 +255,8 @@ void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops); void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, const u8 protocol); +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, + const u8 proto); int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd); int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict); int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu); diff --git a/include/net/kcm.h b/include/net/kcm.h index 2840b5825dcc..2a8965819db0 100644 --- a/include/net/kcm.h +++ b/include/net/kcm.h @@ -13,6 +13,7 @@ #include #include +#include #include extern unsigned int kcm_net_id; @@ -21,16 +22,8 @@ extern unsigned int kcm_net_id; #define KCM_STATS_INCR(stat) ((stat)++) struct kcm_psock_stats { - unsigned long long rx_msgs; - unsigned long long rx_bytes; unsigned long long tx_msgs; unsigned long long tx_bytes; - unsigned int rx_aborts; - unsigned int rx_mem_fail; - unsigned int rx_need_more_hdr; - unsigned int rx_msg_too_big; - unsigned int rx_msg_timeouts; - unsigned int rx_bad_hdr_len; unsigned long long reserved; unsigned long long unreserved; unsigned int tx_aborts; @@ -64,13 +57,6 @@ struct kcm_tx_msg { struct sk_buff *last_skb; }; -struct kcm_rx_msg { - int full_len; - int accum_len; - int offset; - int early_eaten; -}; - /* Socket structure for KCM client sockets */ struct kcm_sock { struct sock sk; @@ -87,6 +73,7 @@ struct kcm_sock { struct work_struct tx_work; struct list_head wait_psock_list; struct sk_buff *seq_skb; + u32 tx_stopped : 1; /* Don't use bit fields here, these are set under different locks */ bool tx_wait; @@ -104,11 +91,11 @@ struct bpf_prog; /* Structure for an attached lower socket */ struct kcm_psock { struct sock *sk; + struct strparser strp; struct kcm_mux *mux; int index; u32 tx_stopped : 1; - u32 rx_stopped : 1; u32 done : 1; u32 unattaching : 1; @@ -121,18 +108,12 @@ struct kcm_psock { struct kcm_psock_stats stats; /* Receive */ - struct sk_buff *rx_skb_head; - struct sk_buff **rx_skb_nextp; - struct sk_buff *ready_rx_msg; struct list_head psock_ready_list; - struct work_struct rx_work; - struct delayed_work rx_delayed_work; struct bpf_prog *bpf_prog; struct kcm_sock *rx_kcm; unsigned long long saved_rx_bytes; unsigned long long saved_rx_msgs; - struct timer_list rx_msg_timer; - unsigned int rx_need_bytes; + struct sk_buff *ready_rx_msg; /* Transmit */ struct kcm_sock *tx_kcm; @@ -146,6 +127,7 @@ struct kcm_net { struct mutex mutex; struct kcm_psock_stats aggregate_psock_stats; struct kcm_mux_stats aggregate_mux_stats; + struct strp_aggr_stats aggregate_strp_stats; struct list_head mux_list; int count; }; @@ -163,6 +145,7 @@ struct kcm_mux { struct kcm_mux_stats stats; struct kcm_psock_stats aggregate_psock_stats; + struct strp_aggr_stats aggregate_strp_stats; /* Receive */ spinlock_t rx_lock ____cacheline_aligned_in_smp; @@ -190,14 +173,6 @@ static inline void aggregate_psock_stats(struct kcm_psock_stats *stats, /* Save psock statistics in the mux when psock is being unattached. */ #define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) - SAVE_PSOCK_STATS(rx_msgs); - SAVE_PSOCK_STATS(rx_bytes); - SAVE_PSOCK_STATS(rx_aborts); - SAVE_PSOCK_STATS(rx_mem_fail); - SAVE_PSOCK_STATS(rx_need_more_hdr); - SAVE_PSOCK_STATS(rx_msg_too_big); - SAVE_PSOCK_STATS(rx_msg_timeouts); - SAVE_PSOCK_STATS(rx_bad_hdr_len); SAVE_PSOCK_STATS(tx_msgs); SAVE_PSOCK_STATS(tx_bytes); SAVE_PSOCK_STATS(reserved); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index e90095091aa0..b220dabeab45 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -11,6 +11,7 @@ #ifndef _NET_L3MDEV_H_ #define _NET_L3MDEV_H_ +#include #include /** @@ -18,30 +19,24 @@ * * @l3mdev_fib_table: Get FIB table id to use for lookups * - * @l3mdev_get_rtable: Get cached IPv4 rtable (dst_entry) for device + * @l3mdev_l3_rcv: Hook in L3 receive path * - * @l3mdev_get_saddr: Get source address for a flow + * @l3mdev_l3_out: Hook in L3 output path * - * @l3mdev_get_rt6_dst: Get cached IPv6 rt6_info (dst_entry) for device + * @l3mdev_link_scope_lookup: IPv6 lookup for linklocal and mcast destinations */ struct l3mdev_ops { u32 (*l3mdev_fib_table)(const struct net_device *dev); struct sk_buff * (*l3mdev_l3_rcv)(struct net_device *dev, struct sk_buff *skb, u16 proto); - - /* IPv4 ops */ - struct rtable * (*l3mdev_get_rtable)(const struct net_device *dev, - const struct flowi4 *fl4); - int (*l3mdev_get_saddr)(struct net_device *dev, - struct flowi4 *fl4); + struct sk_buff * (*l3mdev_l3_out)(struct net_device *dev, + struct sock *sk, struct sk_buff *skb, + u16 proto); /* IPv6 ops */ - struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, + struct dst_entry * (*l3mdev_link_scope_lookup)(const struct net_device *dev, struct flowi6 *fl6); - int (*l3mdev_get_saddr6)(struct net_device *dev, - const struct sock *sk, - struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -49,6 +44,8 @@ struct l3mdev_ops { int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg); +void l3mdev_update_flow(struct net *net, struct flowi *fl); + int l3mdev_master_ifindex_rcu(const struct net_device *dev); static inline int l3mdev_master_ifindex(struct net_device *dev) { @@ -80,7 +77,7 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) { /* netdev_master_upper_dev_get_rcu calls * list_first_or_null_rcu to walk the upper dev list. @@ -89,7 +86,7 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) * typecast to remove the const */ struct net_device *dev = (struct net_device *)_dev; - const struct net_device *master; + struct net_device *master; if (!dev) return NULL; @@ -104,26 +101,6 @@ const struct net_device *l3mdev_master_dev_rcu(const struct net_device *_dev) return master; } -/* get index of an interface to use for FIB lookups. For devices - * enslaved to an L3 master device FIB lookups are based on the - * master index - */ -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return l3mdev_master_ifindex_rcu(dev) ? : dev->ifindex; -} - -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - int oif; - - rcu_read_lock(); - oif = l3mdev_fib_oif_rcu(dev); - rcu_read_unlock(); - - return oif; -} - u32 l3mdev_fib_table_rcu(const struct net_device *dev); u32 l3mdev_fib_table_by_index(struct net *net, int ifindex); static inline u32 l3mdev_fib_table(const struct net_device *dev) @@ -137,39 +114,7 @@ static inline u32 l3mdev_fib_table(const struct net_device *dev) return tb_id; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - if (netif_is_l3_master(dev) && dev->l3mdev_ops->l3mdev_get_rtable) - return dev->l3mdev_ops->l3mdev_get_rtable(dev, fl4); - - return NULL; -} - -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - struct net_device *dev; - bool rc = false; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); - - return rc; -} - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); - -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6); +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -199,6 +144,34 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return l3mdev_l3_rcv(skb, AF_INET6); } +static inline +struct sk_buff *l3mdev_l3_out(struct sock *sk, struct sk_buff *skb, u16 proto) +{ + struct net_device *dev = skb_dst(skb)->dev; + + if (netif_is_l3_slave(dev)) { + struct net_device *master; + + master = netdev_master_upper_dev_get_rcu(dev); + if (master && master->l3mdev_ops->l3mdev_l3_out) + skb = master->l3mdev_ops->l3mdev_l3_out(master, sk, + skb, proto); + } + + return skb; +} + +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET); +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return l3mdev_l3_out(sk, skb, AF_INET6); +} #else static inline int l3mdev_master_ifindex_rcu(const struct net_device *dev) @@ -216,20 +189,11 @@ static inline int l3mdev_master_ifindex_by_index(struct net *net, int ifindex) } static inline -const struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) +struct net_device *l3mdev_master_dev_rcu(const struct net_device *dev) { return NULL; } -static inline int l3mdev_fib_oif_rcu(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} -static inline int l3mdev_fib_oif(struct net_device *dev) -{ - return dev ? dev->ifindex : 0; -} - static inline u32 l3mdev_fib_table_rcu(const struct net_device *dev) { return 0; @@ -243,35 +207,12 @@ static inline u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) return 0; } -static inline struct rtable *l3mdev_get_rtable(const struct net_device *dev, - const struct flowi4 *fl4) -{ - return NULL; -} - -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - return false; -} - -static inline int l3mdev_get_saddr(struct net *net, int ifindex, - struct flowi4 *fl4) -{ - return 0; -} - static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, struct flowi6 *fl6) { return NULL; } -static inline int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - return 0; -} - static inline struct sk_buff *l3mdev_ip_rcv(struct sk_buff *skb) { @@ -284,12 +225,28 @@ struct sk_buff *l3mdev_ip6_rcv(struct sk_buff *skb) return skb; } +static inline +struct sk_buff *l3mdev_ip_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + +static inline +struct sk_buff *l3mdev_ip6_out(struct sock *sk, struct sk_buff *skb) +{ + return skb; +} + static inline int l3mdev_fib_rule_match(struct net *net, struct flowi *fl, struct fib_lookup_arg *arg) { return 1; } +static inline +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ +} #endif #endif /* _NET_L3MDEV_H_ */ diff --git a/include/net/lwtunnel.h b/include/net/lwtunnel.h index e9f116e29c22..ea3f80f58fd6 100644 --- a/include/net/lwtunnel.h +++ b/include/net/lwtunnel.h @@ -13,6 +13,13 @@ /* lw tunnel state flags */ #define LWTUNNEL_STATE_OUTPUT_REDIRECT BIT(0) #define LWTUNNEL_STATE_INPUT_REDIRECT BIT(1) +#define LWTUNNEL_STATE_XMIT_REDIRECT BIT(2) + +enum { + LWTUNNEL_XMIT_DONE, + LWTUNNEL_XMIT_CONTINUE, +}; + struct lwtunnel_state { __u16 type; @@ -21,6 +28,7 @@ struct lwtunnel_state { int (*orig_output)(struct net *net, struct sock *sk, struct sk_buff *skb); int (*orig_input)(struct sk_buff *); int len; + __u16 headroom; __u8 data[0]; }; @@ -34,6 +42,7 @@ struct lwtunnel_encap_ops { struct lwtunnel_state *lwtstate); int (*get_encap_size)(struct lwtunnel_state *lwtstate); int (*cmp_encap)(struct lwtunnel_state *a, struct lwtunnel_state *b); + int (*xmit)(struct sk_buff *skb); }; #ifdef CONFIG_LWTUNNEL @@ -75,6 +84,24 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } + +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + if (lwtstate && (lwtstate->flags & LWTUNNEL_STATE_XMIT_REDIRECT)) + return true; + + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + if (lwtunnel_xmit_redirect(lwtstate) && lwtstate->headroom < mtu) + return lwtstate->headroom; + + return 0; +} + int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *op, @@ -90,6 +117,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int hdr_len); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb); int lwtunnel_input(struct sk_buff *skb); +int lwtunnel_xmit(struct sk_buff *skb); #else @@ -117,6 +145,17 @@ static inline bool lwtunnel_input_redirect(struct lwtunnel_state *lwtstate) return false; } +static inline bool lwtunnel_xmit_redirect(struct lwtunnel_state *lwtstate) +{ + return false; +} + +static inline unsigned int lwtunnel_headroom(struct lwtunnel_state *lwtstate, + unsigned int mtu) +{ + return 0; +} + static inline int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *op, unsigned int num) { @@ -170,6 +209,11 @@ static inline int lwtunnel_input(struct sk_buff *skb) return -EOPNOTSUPP; } +static inline int lwtunnel_xmit(struct sk_buff *skb) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LWTUNNEL */ #define MODULE_ALIAS_RTNL_LWT(encap_type) MODULE_ALIAS("rtnl-lwt-" __stringify(encap_type)) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index cca510a585c3..a810dfcb83c2 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -715,6 +715,7 @@ enum mac80211_tx_info_flags { * frame (PS-Poll or uAPSD). * @IEEE80211_TX_CTRL_RATE_INJECT: This frame is injected with rate information * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame + * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path * * These flags are used in tx_info->control.flags. */ @@ -723,6 +724,7 @@ enum mac80211_tx_control_flags { IEEE80211_TX_CTRL_PS_RESPONSE = BIT(1), IEEE80211_TX_CTRL_RATE_INJECT = BIT(2), IEEE80211_TX_CTRL_AMSDU = BIT(3), + IEEE80211_TX_CTRL_FAST_XMIT = BIT(4), }; /* @@ -1735,6 +1737,9 @@ struct ieee80211_sta_rates { * @supp_rates: Bitmap of supported rates (per band) * @ht_cap: HT capabilities of this STA; restricted to our own capabilities * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities + * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU + * that this station is allowed to transmit to us. + * Can be modified by driver. * @wme: indicates whether the STA supports QoS/WME (if local devices does, * otherwise always false) * @drv_priv: data area for driver use, will always be aligned to @@ -1775,6 +1780,7 @@ struct ieee80211_sta { u16 aid; struct ieee80211_sta_ht_cap ht_cap; struct ieee80211_sta_vht_cap vht_cap; + u8 max_rx_aggregation_subframes; bool wme; u8 uapsd_queues; u8 max_sp; @@ -2014,6 +2020,11 @@ struct ieee80211_txq { * @IEEE80211_HW_TX_FRAG_LIST: Hardware (or driver) supports sending frag_list * skbs, needed for zero-copy software A-MSDU. * + * @IEEE80211_HW_REPORTS_LOW_ACK: The driver (or firmware) reports low ack event + * by ieee80211_report_low_ack() based on its own algorithm. For such + * drivers, mac80211 packet loss mechanism will not be triggered and driver + * is completely depending on firmware event for station kickout. + * * @NUM_IEEE80211_HW_FLAGS: number of hardware flags, used for sizing arrays */ enum ieee80211_hw_flags { @@ -2054,6 +2065,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_USES_RSS, IEEE80211_HW_TX_AMSDU, IEEE80211_HW_TX_FRAG_LIST, + IEEE80211_HW_REPORTS_LOW_ACK, /* keep last, obviously */ NUM_IEEE80211_HW_FLAGS @@ -2141,6 +2153,14 @@ enum ieee80211_hw_flags { * the default is _GI | _BANDWIDTH. * Use the %IEEE80211_RADIOTAP_VHT_KNOWN_* values. * + * @radiotap_timestamp: Information for the radiotap timestamp field; if the + * 'units_pos' member is set to a non-negative value it must be set to + * a combination of a IEEE80211_RADIOTAP_TIMESTAMP_UNIT_* and a + * IEEE80211_RADIOTAP_TIMESTAMP_SPOS_* value, and then the timestamp + * field will be added and populated from the &struct ieee80211_rx_status + * device_timestamp. If the 'accuracy' member is non-negative, it's put + * into the accuracy radiotap field and the accuracy known flag is set. + * * @netdev_features: netdev features to be set in each netdev created * from this HW. Note that not all features are usable with mac80211, * other features will be rejected during HW registration. @@ -2159,6 +2179,8 @@ enum ieee80211_hw_flags { * @n_cipher_schemes: a size of an array of cipher schemes definitions. * @cipher_schemes: a pointer to an array of cipher scheme definitions * supported by HW. + * @max_nan_de_entries: maximum number of NAN DE functions supported by the + * device. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -2184,11 +2206,16 @@ struct ieee80211_hw { u8 offchannel_tx_hw_queue; u8 radiotap_mcs_details; u16 radiotap_vht_details; + struct { + int units_pos; + s16 accuracy; + } radiotap_timestamp; netdev_features_t netdev_features; u8 uapsd_queues; u8 uapsd_max_sp_len; u8 n_cipher_schemes; const struct ieee80211_cipher_scheme *cipher_schemes; + u8 max_nan_de_entries; }; static inline bool _ieee80211_hw_check(struct ieee80211_hw *hw, @@ -3085,11 +3112,8 @@ enum ieee80211_reconfig_type { * * @sta_add_debugfs: Drivers can use this callback to add debugfs files * when a station is added to mac80211's station list. This callback - * and @sta_remove_debugfs should be within a CONFIG_MAC80211_DEBUGFS - * conditional. This callback can sleep. - * - * @sta_remove_debugfs: Remove the debugfs files which were added using - * @sta_add_debugfs. This callback can sleep. + * should be within a CONFIG_MAC80211_DEBUGFS conditional. This + * callback can sleep. * * @sta_notify: Notifies low level driver about power state transition of an * associated station, AP, IBSS/WDS/mesh peer etc. For a VIF operating @@ -3147,6 +3171,12 @@ enum ieee80211_reconfig_type { * required function. * The callback can sleep. * + * @offset_tsf: Offset the TSF timer by the specified value in the + * firmware/hardware. Preferred to set_tsf as it avoids delay between + * calling set_tsf() and hardware getting programmed, which will show up + * as TSF delay. Is not a required function. + * The callback can sleep. + * * @reset_tsf: Reset the TSF timer and allow firmware/hardware to synchronize * with other STAs in the IBSS. This is only used in IBSS mode. This * function is optional if the firmware/hardware takes full care of @@ -3401,6 +3431,21 @@ enum ieee80211_reconfig_type { * synchronization which is needed in case driver has in its RSS queues * pending frames that were received prior to the control path action * currently taken (e.g. disassociation) but are not processed yet. + * + * @start_nan: join an existing NAN cluster, or create a new one. + * @stop_nan: leave the NAN cluster. + * @nan_change_conf: change NAN configuration. The data in cfg80211_nan_conf + * contains full new configuration and changes specify which parameters + * are changed with respect to the last NAN config. + * The driver gets both full configuration and the changed parameters since + * some devices may need the full configuration while others need only the + * changed parameters. + * @add_nan_func: Add a NAN function. Returns 0 on success. The data in + * cfg80211_nan_func must not be referenced outside the scope of + * this call. + * @del_nan_func: Remove a NAN function. The driver must call + * ieee80211_nan_func_terminated() with + * NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST reason code upon removal. */ struct ieee80211_ops { void (*tx)(struct ieee80211_hw *hw, @@ -3485,10 +3530,6 @@ struct ieee80211_ops { struct ieee80211_vif *vif, struct ieee80211_sta *sta, struct dentry *dir); - void (*sta_remove_debugfs)(struct ieee80211_hw *hw, - struct ieee80211_vif *vif, - struct ieee80211_sta *sta, - struct dentry *dir); #endif void (*sta_notify)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, enum sta_notify_cmd, struct ieee80211_sta *sta); @@ -3516,6 +3557,8 @@ struct ieee80211_ops { u64 (*get_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); void (*set_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u64 tsf); + void (*offset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, + s64 offset); void (*reset_tsf)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*tx_last_beacon)(struct ieee80211_hw *hw); int (*ampdu_action)(struct ieee80211_hw *hw, @@ -3640,6 +3683,21 @@ struct ieee80211_ops { void (*wake_tx_queue)(struct ieee80211_hw *hw, struct ieee80211_txq *txq); void (*sync_rx_queues)(struct ieee80211_hw *hw); + + int (*start_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf); + int (*stop_nan)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif); + int (*nan_change_conf)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + struct cfg80211_nan_conf *conf, u32 changes); + int (*add_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + const struct cfg80211_nan_func *nan_func); + void (*del_nan_func)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + u8 instance_id); }; /** @@ -5713,4 +5771,36 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *frame_cnt, unsigned long *byte_cnt); + +/** + * ieee80211_nan_func_terminated - notify about NAN function termination. + * + * This function is used to notify mac80211 about NAN function termination. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @inst_id: the local instance id + * @reason: termination reason (one of the NL80211_NAN_FUNC_TERM_REASON_*) + * @gfp: allocation flags + */ +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp); + +/** + * ieee80211_nan_func_match - notify about NAN function match event. + * + * This function is used to notify mac80211 about NAN function match. The + * cookie inside the match struct will be assigned by mac80211. + * Note that this function can't be called from hard irq. + * + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @match: match event information + * @gfp: allocation flags + */ +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp); + #endif /* MAC80211_H */ diff --git a/include/net/mpls.h b/include/net/mpls.h index 5b3b5addfb08..1dbc669b770e 100644 --- a/include/net/mpls.h +++ b/include/net/mpls.h @@ -19,21 +19,18 @@ #define MPLS_HLEN 4 +struct mpls_shim_hdr { + __be32 label_stack_entry; +}; + static inline bool eth_p_mpls(__be16 eth_type) { return eth_type == htons(ETH_P_MPLS_UC) || eth_type == htons(ETH_P_MPLS_MC); } -/* - * For non-MPLS skbs this will correspond to the network header. - * For MPLS skbs it will be before the network_header as the MPLS - * label stack lies between the end of the mac header and the network - * header. That is, for MPLS skbs the end of the mac header - * is the top of the MPLS label stack. - */ -static inline unsigned char *skb_mpls_header(struct sk_buff *skb) +static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) { - return skb_mac_header(skb) + skb->mac_len; + return (struct mpls_shim_hdr *)skb_network_header(skb); } #endif diff --git a/include/net/ncsi.h b/include/net/ncsi.h index 1dbf42f79750..68680baac0fd 100644 --- a/include/net/ncsi.h +++ b/include/net/ncsi.h @@ -31,6 +31,7 @@ struct ncsi_dev { struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*notifier)(struct ncsi_dev *nd)); int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_stop_dev(struct ncsi_dev *nd); void ncsi_unregister_dev(struct ncsi_dev *nd); #else /* !CONFIG_NET_NCSI */ static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, @@ -44,6 +45,10 @@ static inline int ncsi_start_dev(struct ncsi_dev *nd) return -ENOTTY; } +static void ncsi_stop_dev(struct ncsi_dev *nd) +{ +} + static inline void ncsi_unregister_dev(struct ncsi_dev *nd) { } diff --git a/include/net/netfilter/br_netfilter.h b/include/net/netfilter/br_netfilter.h index e8d1448425a7..0b0c35c37125 100644 --- a/include/net/netfilter/br_netfilter.h +++ b/include/net/netfilter/br_netfilter.h @@ -15,6 +15,12 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) void nf_bridge_update_protocol(struct sk_buff *skb); +int br_nf_hook_thresh(unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)); + static inline struct nf_bridge_info * nf_bridge_info_get(const struct sk_buff *skb) { diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 445b019c2078..50418052a520 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -42,7 +42,6 @@ union nf_conntrack_expect_proto { #include #include -#include #ifdef CONFIG_NETFILTER_DEBUG #define NF_CT_ASSERT(x) WARN_ON(!(x)) @@ -73,7 +72,7 @@ struct nf_conn_help { #include struct nf_conn { - /* Usage count in here is 1 for hash table/destruct timer, 1 per skb, + /* Usage count in here is 1 for hash table, 1 per skb, * plus 1 for any connection(s) we are `master' for * * Hint, SKB address this struct and refcnt via skb->nfct and @@ -96,8 +95,8 @@ struct nf_conn { /* Have we seen traffic both ways yet? (bitset) */ unsigned long status; - /* Timer function; drops refcnt when it goes off. */ - struct timer_list timeout; + /* jiffies32 when this ct is considered dead */ + u32 timeout; possible_net_t ct_net; @@ -220,21 +219,14 @@ static inline void nf_ct_refresh(struct nf_conn *ct, __nf_ct_refresh_acct(ct, 0, skb, extra_jiffies, 0); } -bool __nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, int do_acct); - /* kill conntrack and do accounting */ -static inline bool nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb) -{ - return __nf_ct_kill_acct(ct, ctinfo, skb, 1); -} +bool nf_ct_kill_acct(struct nf_conn *ct, enum ip_conntrack_info ctinfo, + const struct sk_buff *skb); /* kill conntrack without accounting */ static inline bool nf_ct_kill(struct nf_conn *ct) { - return __nf_ct_kill_acct(ct, 0, NULL, 0); + return nf_ct_delete(ct, 0, 0); } /* These are for NAT. Icky. */ @@ -291,21 +283,55 @@ static inline bool nf_is_loopback_packet(const struct sk_buff *skb) return skb->dev && skb->skb_iif && skb->dev->flags & IFF_LOOPBACK; } +#define nfct_time_stamp ((u32)(jiffies)) + /* jiffies until ct expires, 0 if already expired */ static inline unsigned long nf_ct_expires(const struct nf_conn *ct) { - long timeout = (long)ct->timeout.expires - (long)jiffies; + s32 timeout = ct->timeout - nfct_time_stamp; return timeout > 0 ? timeout : 0; } +static inline bool nf_ct_is_expired(const struct nf_conn *ct) +{ + return (__s32)(ct->timeout - nfct_time_stamp) <= 0; +} + +/* use after obtaining a reference count */ +static inline bool nf_ct_should_gc(const struct nf_conn *ct) +{ + return nf_ct_is_expired(ct) && nf_ct_is_confirmed(ct) && + !nf_ct_is_dying(ct); +} + struct kernel_param; int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp); int nf_conntrack_hash_resize(unsigned int hashsize); + +extern struct hlist_nulls_head *nf_conntrack_hash; extern unsigned int nf_conntrack_htable_size; +extern seqcount_t nf_conntrack_generation; extern unsigned int nf_conntrack_max; +/* must be called with rcu read lock held */ +static inline void +nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +{ + struct hlist_nulls_head *hptr; + unsigned int sequence, hsz; + + do { + sequence = read_seqcount_begin(&nf_conntrack_generation); + hsz = nf_conntrack_htable_size; + hptr = nf_conntrack_hash; + } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + + *hash = hptr; + *hsize = hsz; +} + struct nf_conn *nf_ct_tmpl_alloc(struct net *net, const struct nf_conntrack_zone *zone, gfp_t flags); diff --git a/include/net/netfilter/nf_conntrack_core.h b/include/net/netfilter/nf_conntrack_core.h index 79d7ac5c9740..62e17d1319ff 100644 --- a/include/net/netfilter/nf_conntrack_core.h +++ b/include/net/netfilter/nf_conntrack_core.h @@ -51,8 +51,6 @@ bool nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto); -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize); - /* Find a connection corresponding to a tuple. */ struct nf_conntrack_tuple_hash * nf_conntrack_find_get(struct net *net, @@ -83,7 +81,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, #define CONNTRACK_LOCKS 1024 -extern struct hlist_nulls_head *nf_conntrack_hash; extern spinlock_t nf_conntrack_locks[CONNTRACK_LOCKS]; void nf_conntrack_lock(spinlock_t *lock); diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index fa36447371c6..12d967b58726 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -12,12 +12,19 @@ #include #include +enum nf_ct_ecache_state { + NFCT_ECACHE_UNKNOWN, /* destroy event not sent */ + NFCT_ECACHE_DESTROY_FAIL, /* tried but failed to send destroy event */ + NFCT_ECACHE_DESTROY_SENT, /* sent destroy event after failure */ +}; + struct nf_conntrack_ecache { - unsigned long cache; /* bitops want long */ - unsigned long missed; /* missed events */ - u16 ctmask; /* bitmask of ct events to be delivered */ - u16 expmask; /* bitmask of expect events to be delivered */ - u32 portid; /* netlink portid of destroyer */ + unsigned long cache; /* bitops want long */ + unsigned long missed; /* missed events */ + u16 ctmask; /* bitmask of ct events to be delivered */ + u16 expmask; /* bitmask of expect events to be delivered */ + u32 portid; /* netlink portid of destroyer */ + enum nf_ct_ecache_state state; /* ecache state */ }; static inline struct nf_conntrack_ecache * diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index cdc920b4c4c2..8992e4229da9 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -63,10 +63,6 @@ struct nf_conntrack_l3proto { size_t nla_size; -#ifdef CONFIG_SYSCTL - const char *ctl_table_path; -#endif /* CONFIG_SYSCTL */ - /* Init l3proto pernet data */ int (*init_net)(struct net *net); diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index 1a5fb36f165f..de629f1520df 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -134,14 +134,6 @@ void nf_ct_l4proto_pernet_unregister(struct net *net, int nf_ct_l4proto_register(struct nf_conntrack_l4proto *proto); void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *proto); -static inline void nf_ct_kfree_compat_sysctl_table(struct nf_proto_net *pn) -{ -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - kfree(pn->ctl_compat_table); - pn->ctl_compat_table = NULL; -#endif -} - /* Generic netlink helpers */ int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple); diff --git a/include/net/netfilter/nf_log.h b/include/net/netfilter/nf_log.h index 83d855ba6af1..309cd267be4f 100644 --- a/include/net/netfilter/nf_log.h +++ b/include/net/netfilter/nf_log.h @@ -2,15 +2,10 @@ #define _NF_LOG_H #include +#include -/* those NF_LOG_* defines and struct nf_loginfo are legacy definitios that will - * disappear once iptables is replaced with pkttables. Please DO NOT use them - * for any new code! */ -#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ -#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ -#define NF_LOG_IPOPT 0x04 /* Log IP options */ -#define NF_LOG_UID 0x08 /* Log UID owning local socket */ -#define NF_LOG_MASK 0x0f +/* Log tcp sequence, tcp options, ip options and uid owning local socket */ +#define NF_LOG_DEFAULT_MASK 0x0f /* This flag indicates that copy_len field in nf_loginfo is set */ #define NF_LOG_F_COPY_LEN 0x1 @@ -60,8 +55,7 @@ struct nf_logger { int nf_log_register(u_int8_t pf, struct nf_logger *logger); void nf_log_unregister(struct nf_logger *logger); -void nf_log_set(struct net *net, u_int8_t pf, - const struct nf_logger *logger); +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger); void nf_log_unset(struct net *net, const struct nf_logger *logger); int nf_log_bind_pf(struct net *net, u_int8_t pf, diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 0dbce55437f2..2280cfe86c56 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -11,7 +11,6 @@ struct nf_queue_entry { struct sk_buff *skb; unsigned int id; - struct nf_hook_ops *elem; struct nf_hook_state state; u16 size; /* sizeof(entry) + saved route keys */ @@ -22,10 +21,10 @@ struct nf_queue_entry { /* Packet queuing */ struct nf_queue_handler { - int (*outfn)(struct nf_queue_entry *entry, - unsigned int queuenum); - void (*nf_hook_drop)(struct net *net, - struct nf_hook_ops *ops); + int (*outfn)(struct nf_queue_entry *entry, + unsigned int queuenum); + void (*nf_hook_drop)(struct net *net, + const struct nf_hook_entry *hooks); }; void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); @@ -41,23 +40,19 @@ static inline void init_hashrandom(u32 *jhash_initval) *jhash_initval = prandom_u32(); } -static inline u32 hash_v4(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v4(const struct iphdr *iph, u32 initval) { - const struct iphdr *iph = ip_hdr(skb); - /* packets in either direction go into same queue */ if ((__force u32)iph->saddr < (__force u32)iph->daddr) return jhash_3words((__force u32)iph->saddr, - (__force u32)iph->daddr, iph->protocol, jhash_initval); + (__force u32)iph->daddr, iph->protocol, initval); return jhash_3words((__force u32)iph->daddr, - (__force u32)iph->saddr, iph->protocol, jhash_initval); + (__force u32)iph->saddr, iph->protocol, initval); } -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) -static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) +static inline u32 hash_v6(const struct ipv6hdr *ip6h, u32 initval) { - const struct ipv6hdr *ip6h = ipv6_hdr(skb); u32 a, b, c; if ((__force u32)ip6h->saddr.s6_addr32[3] < @@ -75,20 +70,50 @@ static inline u32 hash_v6(const struct sk_buff *skb, u32 jhash_initval) else c = (__force u32) ip6h->daddr.s6_addr32[1]; - return jhash_3words(a, b, c, jhash_initval); + return jhash_3words(a, b, c, initval); +} + +static inline u32 hash_bridge(const struct sk_buff *skb, u32 initval) +{ + struct ipv6hdr *ip6h, _ip6h; + struct iphdr *iph, _iph; + + switch (eth_hdr(skb)->h_proto) { + case htons(ETH_P_IP): + iph = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*iph), &_iph); + if (iph) + return hash_v4(iph, initval); + break; + case htons(ETH_P_IPV6): + ip6h = skb_header_pointer(skb, skb_network_offset(skb), + sizeof(*ip6h), &_ip6h); + if (ip6h) + return hash_v6(ip6h, initval); + break; + } + + return 0; } -#endif static inline u32 nfqueue_hash(const struct sk_buff *skb, u16 queue, u16 queues_total, u8 family, - u32 jhash_initval) + u32 initval) { - if (family == NFPROTO_IPV4) - queue += ((u64) hash_v4(skb, jhash_initval) * queues_total) >> 32; -#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) - else if (family == NFPROTO_IPV6) - queue += ((u64) hash_v6(skb, jhash_initval) * queues_total) >> 32; -#endif + switch (family) { + case NFPROTO_IPV4: + queue += reciprocal_scale(hash_v4(ip_hdr(skb), initval), + queues_total); + break; + case NFPROTO_IPV6: + queue += reciprocal_scale(hash_v6(ipv6_hdr(skb), initval), + queues_total); + break; + case NFPROTO_BRIDGE: + queue += reciprocal_scale(hash_bridge(skb, initval), + queues_total); + break; + } return queue; } diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index f2f13399ce44..5031e072567b 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -19,6 +19,7 @@ struct nft_pktinfo { const struct net_device *out; u8 pf; u8 hook; + bool tprot_set; u8 tprot; /* for x_tables compatibility */ struct xt_action_param xt; @@ -36,6 +37,23 @@ static inline void nft_set_pktinfo(struct nft_pktinfo *pkt, pkt->pf = pkt->xt.family = state->pf; } +static inline void nft_set_pktinfo_proto_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb) +{ + pkt->tprot_set = false; + pkt->tprot = 0; + pkt->xt.thoff = 0; + pkt->xt.fragoff = 0; +} + +static inline void nft_set_pktinfo_unspec(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + nft_set_pktinfo_proto_unspec(pkt, skb); +} + /** * struct nft_verdict - nf_tables verdict * @@ -127,6 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type) return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE; } +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest); unsigned int nft_parse_register(const struct nlattr *attr); int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg); @@ -251,7 +270,8 @@ struct nft_set_ops { int (*insert)(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem); + const struct nft_set_elem *elem, + struct nft_set_ext **ext); void (*activate)(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem); diff --git a/include/net/netfilter/nf_tables_bridge.h b/include/net/netfilter/nf_tables_bridge.h deleted file mode 100644 index 511fb79f6dad..000000000000 --- a/include/net/netfilter/nf_tables_bridge.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _NET_NF_TABLES_BRIDGE_H -#define _NET_NF_TABLES_BRIDGE_H - -int nft_bridge_iphdr_validate(struct sk_buff *skb); -int nft_bridge_ip6hdr_validate(struct sk_buff *skb); - -#endif /* _NET_NF_TABLES_BRIDGE_H */ diff --git a/include/net/netfilter/nf_tables_core.h b/include/net/netfilter/nf_tables_core.h index a9060dd99db7..00f4f6b1b1ba 100644 --- a/include/net/netfilter/nf_tables_core.h +++ b/include/net/netfilter/nf_tables_core.h @@ -28,6 +28,9 @@ extern const struct nft_expr_ops nft_cmp_fast_ops; int nft_cmp_module_init(void); void nft_cmp_module_exit(void); +int nft_range_module_init(void); +void nft_range_module_exit(void); + int nft_lookup_module_init(void); void nft_lookup_module_exit(void); diff --git a/include/net/netfilter/nf_tables_ipv4.h b/include/net/netfilter/nf_tables_ipv4.h index ca6ef6bf775e..968f00b82fb5 100644 --- a/include/net/netfilter/nf_tables_ipv4.h +++ b/include/net/netfilter/nf_tables_ipv4.h @@ -14,11 +14,54 @@ nft_set_pktinfo_ipv4(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); ip = ip_hdr(pkt->skb); + pkt->tprot_set = true; pkt->tprot = ip->protocol; pkt->xt.thoff = ip_hdrlen(pkt->skb); pkt->xt.fragoff = ntohs(ip->frag_off) & IP_OFFSET; } +static inline int +__nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + struct iphdr *iph, _iph; + u32 len, thoff; + + iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), + &_iph); + if (!iph) + return -1; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return -1; + + len = ntohs(iph->tot_len); + thoff = iph->ihl * 4; + if (skb->len < len) + return -1; + else if (len < thoff) + return -1; + + pkt->tprot_set = true; + pkt->tprot = iph->protocol; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; + + return 0; +} + +static inline void +nft_set_pktinfo_ipv4_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv4_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); +} + extern struct nft_af_info nft_af_ipv4; #endif diff --git a/include/net/netfilter/nf_tables_ipv6.h b/include/net/netfilter/nf_tables_ipv6.h index 8ad39a6a5fe1..d150b5066201 100644 --- a/include/net/netfilter/nf_tables_ipv6.h +++ b/include/net/netfilter/nf_tables_ipv6.h @@ -4,7 +4,7 @@ #include #include -static inline int +static inline void nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, struct sk_buff *skb, const struct nf_hook_state *state) @@ -15,15 +15,64 @@ nft_set_pktinfo_ipv6(struct nft_pktinfo *pkt, nft_set_pktinfo(pkt, skb, state); protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - /* If malformed, drop it */ + if (protohdr < 0) { + nft_set_pktinfo_proto_unspec(pkt, skb); + return; + } + + pkt->tprot_set = true; + pkt->tprot = protohdr; + pkt->xt.thoff = thoff; + pkt->xt.fragoff = frag_off; +} + +static inline int +__nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ +#if IS_ENABLED(CONFIG_IPV6) + struct ipv6hdr *ip6h, _ip6h; + unsigned int thoff = 0; + unsigned short frag_off; + int protohdr; + u32 pkt_len; + + ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), + &_ip6h); + if (!ip6h) + return -1; + + if (ip6h->version != 6) + return -1; + + pkt_len = ntohs(ip6h->payload_len); + if (pkt_len + sizeof(*ip6h) > skb->len) + return -1; + + protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); if (protohdr < 0) return -1; + pkt->tprot_set = true; pkt->tprot = protohdr; pkt->xt.thoff = thoff; pkt->xt.fragoff = frag_off; return 0; +#else + return -1; +#endif +} + +static inline void +nft_set_pktinfo_ipv6_validate(struct nft_pktinfo *pkt, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + nft_set_pktinfo(pkt, skb, state); + if (__nft_set_pktinfo_ipv6_validate(pkt, skb, state) < 0) + nft_set_pktinfo_proto_unspec(pkt, skb); } extern struct nft_af_info nft_af_ipv6; diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index 38b1a80517f0..e469e85de3f9 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -15,10 +15,6 @@ struct nf_proto_net { #ifdef CONFIG_SYSCTL struct ctl_table_header *ctl_table_header; struct ctl_table *ctl_table; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - struct ctl_table_header *ctl_compat_header; - struct ctl_table *ctl_compat_table; -#endif #endif unsigned int users; }; @@ -58,10 +54,6 @@ struct nf_ip_net { struct nf_udp_net udp; struct nf_icmp_net icmp; struct nf_icmp_net icmpv6; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct ctl_table_header *ctl_table_header; - struct ctl_table *ctl_table; -#endif }; struct ct_pcpu { diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index d061ffeb1e71..7adf4386ac8f 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -40,7 +40,6 @@ struct netns_ipv4 { #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; - struct fib_table __rcu *fib_local; struct fib_table __rcu *fib_main; struct fib_table __rcu *fib_default; #endif diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 36d723579af2..58487b1cc99a 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -16,6 +16,6 @@ struct netns_nf { #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; #endif - struct list_head hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; + struct nf_hook_entry __rcu *hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; }; #endif diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 24cd3949a9a4..27bb9633c69d 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -11,7 +11,7 @@ struct ctl_table_header; struct xfrm_policy_hash { - struct hlist_head *table; + struct hlist_head __rcu *table; unsigned int hmask; u8 dbits4; u8 sbits4; @@ -38,14 +38,12 @@ struct netns_xfrm { * mode. Also, it can be used by ah/esp icmp error handler to find * offending SA. */ - struct hlist_head *state_bydst; - struct hlist_head *state_bysrc; - struct hlist_head *state_byspi; + struct hlist_head __rcu *state_bydst; + struct hlist_head __rcu *state_bysrc; + struct hlist_head __rcu *state_byspi; unsigned int state_hmask; unsigned int state_num; struct work_struct state_hash_work; - struct hlist_head state_gc_list; - struct work_struct state_gc_work; struct list_head policy_all; struct hlist_head *policy_byidx; @@ -73,7 +71,7 @@ struct netns_xfrm { struct dst_ops xfrm6_dst_ops; #endif spinlock_t xfrm_state_lock; - rwlock_t xfrm_policy_lock; + spinlock_t xfrm_policy_lock; struct mutex xfrm_cfg_mutex; /* flow cache part */ diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index c99508d426cc..767b03a3fe67 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -69,17 +69,19 @@ struct tcf_exts { int police; }; -static inline void tcf_exts_init(struct tcf_exts *exts, int action, int police) +static inline int tcf_exts_init(struct tcf_exts *exts, int action, int police) { #ifdef CONFIG_NET_CLS_ACT exts->type = 0; exts->nr_actions = 0; exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *), GFP_KERNEL); - WARN_ON(!exts->actions); /* TODO: propagate the error to callers */ + if (!exts->actions) + return -ENOMEM; #endif exts->action = action; exts->police = police; + return 0; } /** @@ -121,7 +123,7 @@ static inline void tcf_exts_to_list(const struct tcf_exts *exts, for (i = 0; i < exts->nr_actions; i++) { struct tc_action *a = exts->actions[i]; - list_add(&a->list, actions); + list_add_tail(&a->list, actions); } #endif } @@ -484,4 +486,20 @@ struct tc_cls_matchall_offload { unsigned long cookie; }; +enum tc_clsbpf_command { + TC_CLSBPF_ADD, + TC_CLSBPF_REPLACE, + TC_CLSBPF_DESTROY, + TC_CLSBPF_STATS, +}; + +struct tc_cls_bpf_offload { + enum tc_clsbpf_command command; + struct tcf_exts *exts; + struct bpf_prog *prog; + const char *name; + bool exts_integrated; + u32 gen_flags; +}; + #endif diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index 7caa99b482c6..cd334c9584e9 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -90,8 +90,8 @@ int unregister_qdisc(struct Qdisc_ops *qops); void qdisc_get_default(char *id, size_t len); int qdisc_set_default(const char *id); -void qdisc_list_add(struct Qdisc *q); -void qdisc_list_del(struct Qdisc *q); +void qdisc_hash_add(struct Qdisc *q); +void qdisc_hash_del(struct Qdisc *q); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle); struct Qdisc *qdisc_lookup_class(struct net_device *dev, u32 handle); struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, diff --git a/include/net/pptp.h b/include/net/pptp.h new file mode 100644 index 000000000000..92e9f1fe2628 --- /dev/null +++ b/include/net/pptp.h @@ -0,0 +1,23 @@ +#ifndef _NET_PPTP_H +#define _NET_PPTP_H + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + struct gre_base_hdr gre_hd; + __be16 payload_len; + __be16 call_id; + __be32 seq; + __be32 ack; +} __packed; + + +#endif diff --git a/include/net/route.h b/include/net/route.h index ad777d79af94..0429d47cad25 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include #include @@ -285,15 +284,6 @@ static inline struct rtable *ip_route_connect(struct flowi4 *fl4, ip_route_connect_init(fl4, dst, src, tos, oif, protocol, sport, dport, sk); - if (!src && oif) { - int rc; - - rc = l3mdev_get_saddr(net, oif, fl4); - if (rc < 0) - return ERR_PTR(rc); - - src = fl4->saddr; - } if (!dst || !src) { rt = __ip_route_output_key(net, fl4); if (IS_ERR(rt)) diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 909aff2db2b3..e6aa0a249672 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,14 @@ struct qdisc_size_table { u16 data[]; }; +/* similar to sk_buff_head, but skb->prev pointer is undefined. */ +struct qdisc_skb_head { + struct sk_buff *head; + struct sk_buff *tail; + __u32 qlen; + spinlock_t lock; +}; + struct Qdisc { int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch, @@ -61,7 +69,7 @@ struct Qdisc { u32 limit; const struct Qdisc_ops *ops; struct qdisc_size_table __rcu *stab; - struct list_head list; + struct hlist_node hash; u32 handle; u32 parent; void *u32_node; @@ -76,7 +84,7 @@ struct Qdisc { * For performance sake on SMP, we put highly modified fields at the end */ struct sk_buff *gso_skb ____cacheline_aligned_in_smp; - struct sk_buff_head q; + struct qdisc_skb_head q; struct gnet_stats_basic_packed bstats; seqcount_t running; struct gnet_stats_queue qstats; @@ -592,7 +600,7 @@ static inline void qdisc_qstats_drop(struct Qdisc *sch) static inline void qdisc_qstats_cpu_drop(struct Qdisc *sch) { - qstats_drop_inc(this_cpu_ptr(sch->cpu_qstats)); + this_cpu_inc(sch->cpu_qstats->drops); } static inline void qdisc_qstats_overlimit(struct Qdisc *sch) @@ -600,10 +608,27 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch) sch->qstats.overlimits++; } -static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, - struct sk_buff_head *list) +static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh) { - __skb_queue_tail(list, skb); + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; +} + +static inline int __qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; qdisc_qstats_backlog_inc(sch, skb); return NET_XMIT_SUCCESS; @@ -614,14 +639,16 @@ static inline int qdisc_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch) return __qdisc_enqueue_tail(skb, sch, &sch->q); } -static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, - struct sk_buff_head *list) +static inline struct sk_buff *__qdisc_dequeue_head(struct qdisc_skb_head *qh) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = qh->head; if (likely(skb != NULL)) { - qdisc_qstats_backlog_dec(sch, skb); - qdisc_bstats_update(sch, skb); + qh->head = skb->next; + qh->qlen--; + if (qh->head == NULL) + qh->tail = NULL; + skb->next = NULL; } return skb; @@ -629,7 +656,14 @@ static inline struct sk_buff *__qdisc_dequeue_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch) { - return __qdisc_dequeue_head(sch, &sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(sch, skb); + qdisc_bstats_update(sch, skb); + } + + return skb; } /* Instead of calling kfree_skb() while root qdisc lock is held, @@ -642,10 +676,10 @@ static inline void __qdisc_drop(struct sk_buff *skb, struct sk_buff **to_free) } static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch, - struct sk_buff_head *list, + struct qdisc_skb_head *qh, struct sk_buff **to_free) { - struct sk_buff *skb = __skb_dequeue(list); + struct sk_buff *skb = __qdisc_dequeue_head(qh); if (likely(skb != NULL)) { unsigned int len = qdisc_pkt_len(skb); @@ -666,7 +700,9 @@ static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch, static inline struct sk_buff *qdisc_peek_head(struct Qdisc *sch) { - return skb_peek(&sch->q); + const struct qdisc_skb_head *qh = &sch->q; + + return qh->head; } /* generic pseudo peek method for non-work-conserving qdisc */ @@ -701,15 +737,19 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) return skb; } -static inline void __qdisc_reset_queue(struct sk_buff_head *list) +static inline void __qdisc_reset_queue(struct qdisc_skb_head *qh) { /* * We do not know the backlog in bytes of this list, it * is up to the caller to correct it */ - if (!skb_queue_empty(list)) { - rtnl_kfree_skbs(list->next, list->prev); - __skb_queue_head_init(list); + ASSERT_RTNL(); + if (qh->qlen) { + rtnl_kfree_skbs(qh->head, qh->tail); + + qh->head = NULL; + qh->tail = NULL; + qh->qlen = 0; } } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 632e205ca54b..87a7f42e7639 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -83,9 +83,9 @@ #endif /* Round an int up to the next multiple of 4. */ -#define WORD_ROUND(s) (((s)+3)&~3) +#define SCTP_PAD4(s) (((s)+3)&~3) /* Truncate to the previous multiple of 4. */ -#define WORD_TRUNC(s) ((s)&~3) +#define SCTP_TRUNC4(s) ((s)&~3) /* * Function declarations. @@ -433,7 +433,7 @@ static inline int sctp_frag_point(const struct sctp_association *asoc, int pmtu) if (asoc->user_frag) frag = min_t(int, frag, asoc->user_frag); - frag = WORD_TRUNC(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); + frag = SCTP_TRUNC4(min_t(int, frag, SCTP_MAX_CHUNK_LEN)); return frag; } @@ -462,7 +462,7 @@ _sctp_walk_params((pos), (chunk), ntohs((chunk)->chunk_hdr.length), member) for (pos.v = chunk->member;\ pos.v <= (void *)chunk + end - ntohs(pos.p->length) &&\ ntohs(pos.p->length) >= sizeof(sctp_paramhdr_t);\ - pos.v += WORD_ROUND(ntohs(pos.p->length))) + pos.v += SCTP_PAD4(ntohs(pos.p->length))) #define sctp_walk_errors(err, chunk_hdr)\ _sctp_walk_errors((err), (chunk_hdr), ntohs((chunk_hdr)->length)) @@ -472,7 +472,7 @@ for (err = (sctp_errhdr_t *)((void *)chunk_hdr + \ sizeof(sctp_chunkhdr_t));\ (void *)err <= (void *)chunk_hdr + end - ntohs(err->length) &&\ ntohs(err->length) >= sizeof(sctp_errhdr_t); \ - err = (sctp_errhdr_t *)((void *)err + WORD_ROUND(ntohs(err->length)))) + err = (sctp_errhdr_t *)((void *)err + SCTP_PAD4(ntohs(err->length)))) #define sctp_walk_fwdtsn(pos, chunk)\ _sctp_walk_fwdtsn((pos), (chunk), ntohs((chunk)->chunk_hdr->length) - sizeof(struct sctp_fwdtsn_chunk)) diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index bafe2a0ab908..ca6c971dd74a 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -307,85 +307,27 @@ static inline __u16 sctp_data_size(struct sctp_chunk *chunk) } /* Compare two TSNs */ +#define TSN_lt(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) < 0)) -/* RFC 1982 - Serial Number Arithmetic - * - * 2. Comparison - * Then, s1 is said to be equal to s2 if and only if i1 is equal to i2, - * in all other cases, s1 is not equal to s2. - * - * s1 is said to be less than s2 if, and only if, s1 is not equal to s2, - * and - * - * (i1 < i2 and i2 - i1 < 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 > 2^(SERIAL_BITS - 1)) - * - * s1 is said to be greater than s2 if, and only if, s1 is not equal to - * s2, and - * - * (i1 < i2 and i2 - i1 > 2^(SERIAL_BITS - 1)) or - * (i1 > i2 and i1 - i2 < 2^(SERIAL_BITS - 1)) - */ - -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on TSNs in this document SHOULD use Serial - * Number Arithmetic as defined in [RFC1982] where SERIAL_BITS = 32. - */ - -enum { - TSN_SIGN_BIT = (1<<31) -}; - -static inline int TSN_lt(__u32 s, __u32 t) -{ - return ((s) - (t)) & TSN_SIGN_BIT; -} - -static inline int TSN_lte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); -} +#define TSN_lte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((a) - (b)) <= 0)) /* Compare two SSNs */ +#define SSN_lt(a,b) \ + (typecheck(__u16, a) && \ + typecheck(__u16, b) && \ + ((__s16)((a) - (b)) < 0)) -/* - * RFC 2960 - * 1.6 Serial Number Arithmetic - * - * Comparisons and arithmetic on Stream Sequence Numbers in this document - * SHOULD use Serial Number Arithmetic as defined in [RFC1982] where - * SERIAL_BITS = 16. - */ -enum { - SSN_SIGN_BIT = (1<<15) -}; - -static inline int SSN_lt(__u16 s, __u16 t) -{ - return ((s) - (t)) & SSN_SIGN_BIT; -} - -static inline int SSN_lte(__u16 s, __u16 t) -{ - return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); -} - -/* - * ADDIP 3.1.1 - * The valid range of Serial Number is from 0 to 4294967295 (2**32 - 1). Serial - * Numbers wrap back to 0 after reaching 4294967295. - */ -enum { - ADDIP_SERIAL_SIGN_BIT = (1<<31) -}; - -static inline int ADDIP_SERIAL_gte(__u32 s, __u32 t) -{ - return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); -} +/* ADDIP 3.1.1 */ +#define ADDIP_SERIAL_gte(a,b) \ + (typecheck(__u32, a) && \ + typecheck(__u32, b) && \ + ((__s32)((b) - (a)) <= 0)) /* Check VTAG of the packet matches the sender's own tag. */ static inline int diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index ced0df374e60..11c3bf262a85 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -537,6 +537,7 @@ struct sctp_datamsg { struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *, struct sctp_sndrcvinfo *, struct iov_iter *); +void sctp_datamsg_free(struct sctp_datamsg *); void sctp_datamsg_put(struct sctp_datamsg *); void sctp_chunk_fail(struct sctp_chunk *, int error); int sctp_chunk_abandoned(struct sctp_chunk *); @@ -1069,7 +1070,7 @@ struct sctp_outq { void sctp_outq_init(struct sctp_association *, struct sctp_outq *); void sctp_outq_teardown(struct sctp_outq *); void sctp_outq_free(struct sctp_outq*); -int sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); +void sctp_outq_tail(struct sctp_outq *, struct sctp_chunk *chunk, gfp_t); int sctp_outq_sack(struct sctp_outq *, struct sctp_chunk *); int sctp_outq_is_empty(const struct sctp_outq *); void sctp_outq_restart(struct sctp_outq *); @@ -1077,7 +1078,7 @@ void sctp_outq_restart(struct sctp_outq *); void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); -int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); void sctp_prsctp_prune(struct sctp_association *asoc, struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ diff --git a/include/net/sock.h b/include/net/sock.h index 8741988e6880..ebf75db08e06 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1020,7 +1020,6 @@ struct proto { void (*unhash)(struct sock *sk); void (*rehash)(struct sock *sk); int (*get_port)(struct sock *sk, unsigned short snum); - void (*clear_sk)(struct sock *sk, int size); /* Keeping track of sockets in use */ #ifdef CONFIG_PROC_FS @@ -1114,6 +1113,16 @@ static inline bool sk_stream_is_writeable(const struct sock *sk) sk_stream_memory_free(sk); } +static inline int sk_under_cgroup_hierarchy(struct sock *sk, + struct cgroup *ancestor) +{ +#ifdef CONFIG_SOCK_CGROUP_DATA + return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), + ancestor); +#else + return -ENOTSUPP; +#endif +} static inline bool sk_has_memory_pressure(const struct sock *sk) { @@ -1232,8 +1241,6 @@ static inline int __sk_prot_rehash(struct sock *sk) return sk->sk_prot->hash(sk); } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size); - /* About 10 seconds */ #define SOCK_DESTROY_TIME (10*HZ) diff --git a/include/net/strparser.h b/include/net/strparser.h new file mode 100644 index 000000000000..0c28ad97c52f --- /dev/null +++ b/include/net/strparser.h @@ -0,0 +1,142 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#ifndef __NET_STRPARSER_H_ +#define __NET_STRPARSER_H_ + +#include +#include + +#define STRP_STATS_ADD(stat, count) ((stat) += (count)) +#define STRP_STATS_INCR(stat) ((stat)++) + +struct strp_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; +}; + +struct strp_aggr_stats { + unsigned long long rx_msgs; + unsigned long long rx_bytes; + unsigned int rx_mem_fail; + unsigned int rx_need_more_hdr; + unsigned int rx_msg_too_big; + unsigned int rx_msg_timeouts; + unsigned int rx_bad_hdr_len; + unsigned int rx_aborts; + unsigned int rx_interrupted; + unsigned int rx_unrecov_intr; +}; + +struct strparser; + +/* Callbacks are called with lock held for the attached socket */ +struct strp_callbacks { + int (*parse_msg)(struct strparser *strp, struct sk_buff *skb); + void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb); + int (*read_sock_done)(struct strparser *strp, int err); + void (*abort_parser)(struct strparser *strp, int err); +}; + +struct strp_rx_msg { + int full_len; + int offset; +}; + +static inline struct strp_rx_msg *strp_rx_msg(struct sk_buff *skb) +{ + return (struct strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Structure for an attached lower socket */ +struct strparser { + struct sock *sk; + + u32 rx_stopped : 1; + u32 rx_paused : 1; + u32 rx_aborted : 1; + u32 rx_interrupted : 1; + u32 rx_unrecov_intr : 1; + + struct sk_buff **rx_skb_nextp; + struct timer_list rx_msg_timer; + struct sk_buff *rx_skb_head; + unsigned int rx_need_bytes; + struct delayed_work rx_delayed_work; + struct work_struct rx_work; + struct strp_stats stats; + struct strp_callbacks cb; +}; + +/* Must be called with lock held for attached socket */ +static inline void strp_pause(struct strparser *strp) +{ + strp->rx_paused = 1; +} + +/* May be called without holding lock for attached socket */ +void strp_unpause(struct strparser *strp); + +static inline void save_strp_stats(struct strparser *strp, + struct strp_aggr_stats *agg_stats) +{ + /* Save psock statistics in the mux when psock is being unattached. */ + +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += \ + strp->stats._stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); +#undef SAVE_PSOCK_STATS + + if (strp->rx_aborted) + agg_stats->rx_aborts++; + if (strp->rx_interrupted) + agg_stats->rx_interrupted++; + if (strp->rx_unrecov_intr) + agg_stats->rx_unrecov_intr++; +} + +static inline void aggregate_strp_stats(struct strp_aggr_stats *stats, + struct strp_aggr_stats *agg_stats) +{ +#define SAVE_PSOCK_STATS(_stat) (agg_stats->_stat += stats->_stat) + SAVE_PSOCK_STATS(rx_msgs); + SAVE_PSOCK_STATS(rx_bytes); + SAVE_PSOCK_STATS(rx_mem_fail); + SAVE_PSOCK_STATS(rx_need_more_hdr); + SAVE_PSOCK_STATS(rx_msg_too_big); + SAVE_PSOCK_STATS(rx_msg_timeouts); + SAVE_PSOCK_STATS(rx_bad_hdr_len); + SAVE_PSOCK_STATS(rx_aborts); + SAVE_PSOCK_STATS(rx_interrupted); + SAVE_PSOCK_STATS(rx_unrecov_intr); +#undef SAVE_PSOCK_STATS + +} + +void strp_done(struct strparser *strp); +void strp_stop(struct strparser *strp); +void strp_check_rcv(struct strparser *strp); +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb); +void strp_data_ready(struct strparser *strp); + +#endif /* __NET_STRPARSER_H_ */ diff --git a/include/net/switchdev.h b/include/net/switchdev.h index 62f6a967a1b7..eba80c4fc56f 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -68,7 +68,6 @@ struct switchdev_attr { enum switchdev_obj_id { SWITCHDEV_OBJ_ID_UNDEFINED, SWITCHDEV_OBJ_ID_PORT_VLAN, - SWITCHDEV_OBJ_ID_IPV4_FIB, SWITCHDEV_OBJ_ID_PORT_FDB, SWITCHDEV_OBJ_ID_PORT_MDB, }; @@ -92,21 +91,6 @@ struct switchdev_obj_port_vlan { #define SWITCHDEV_OBJ_PORT_VLAN(obj) \ container_of(obj, struct switchdev_obj_port_vlan, obj) -/* SWITCHDEV_OBJ_ID_IPV4_FIB */ -struct switchdev_obj_ipv4_fib { - struct switchdev_obj obj; - u32 dst; - int dst_len; - struct fib_info *fi; - u8 tos; - u8 type; - u32 nlflags; - u32 tb_id; -}; - -#define SWITCHDEV_OBJ_IPV4_FIB(obj) \ - container_of(obj, struct switchdev_obj_ipv4_fib, obj) - /* SWITCHDEV_OBJ_ID_PORT_FDB */ struct switchdev_obj_port_fdb { struct switchdev_obj obj; @@ -209,11 +193,6 @@ int switchdev_port_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); int switchdev_port_bridge_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags); -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id); -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id); -void switchdev_fib_ipv4_abort(struct fib_info *fi); int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlm_flags); @@ -222,7 +201,7 @@ int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], u16 vid); int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx); + struct net_device *filter_dev, int *idx); void switchdev_port_fwd_mark_set(struct net_device *dev, struct net_device *group_dev, bool joining); @@ -304,25 +283,6 @@ static inline int switchdev_port_bridge_dellink(struct net_device *dev, return -EOPNOTSUPP; } -static inline int switchdev_fib_ipv4_add(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, - u32 nlflags, u32 tb_id) -{ - return 0; -} - -static inline int switchdev_fib_ipv4_del(u32 dst, int dst_len, - struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - return 0; -} - -static inline void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ -} - static inline int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, @@ -342,15 +302,9 @@ static inline int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) -{ - return idx; -} - -static inline void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) + int *idx) { + return *idx; } static inline bool switchdev_port_same_parent_id(struct net_device *a, diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index 5164bd7a38fb..9fd2bea0a6e0 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -50,9 +50,11 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); int ife_validate_meta_u16(void *val, int len); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi); void ife_release_meta_gen(struct tcf_meta_info *mi); int register_ife_op(struct tcf_meta_ops *mops); int unregister_ife_op(struct tcf_meta_ops *mops); diff --git a/include/net/tc_act/tc_skbmod.h b/include/net/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..644a2116b47b --- /dev/null +++ b/include/net/tc_act/tc_skbmod.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __NET_TC_SKBMOD_H +#define __NET_TC_SKBMOD_H + +#include +#include + +struct tcf_skbmod_params { + struct rcu_head rcu; + u64 flags; /*up to 64 types of operations; extend if needed */ + u8 eth_dst[ETH_ALEN]; + u16 eth_type; + u8 eth_src[ETH_ALEN]; +}; + +struct tcf_skbmod { + struct tc_action common; + struct tcf_skbmod_params __rcu *skbmod_p; +}; +#define to_skbmod(a) ((struct tcf_skbmod *)a) + +#endif /* __NET_TC_SKBMOD_H */ diff --git a/include/net/tc_act/tc_tunnel_key.h b/include/net/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..253f8da6c2a6 --- /dev/null +++ b/include/net/tc_act/tc_tunnel_key.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NET_TC_TUNNEL_KEY_H +#define __NET_TC_TUNNEL_KEY_H + +#include + +struct tcf_tunnel_key_params { + struct rcu_head rcu; + int tcft_action; + int action; + struct metadata_dst *tcft_enc_metadata; +}; + +struct tcf_tunnel_key { + struct tc_action common; + struct tcf_tunnel_key_params __rcu *params; +}; + +#define to_tunnel_key(a) ((struct tcf_tunnel_key *)a) + +#endif /* __NET_TC_TUNNEL_KEY_H */ diff --git a/include/net/tc_act/tc_vlan.h b/include/net/tc_act/tc_vlan.h index e29f52e8bdf1..48cca321ee6c 100644 --- a/include/net/tc_act/tc_vlan.h +++ b/include/net/tc_act/tc_vlan.h @@ -11,6 +11,7 @@ #define __NET_TC_VLAN_H #include +#include #define VLAN_F_POP 0x1 #define VLAN_F_PUSH 0x2 @@ -20,7 +21,32 @@ struct tcf_vlan { int tcfv_action; u16 tcfv_push_vid; __be16 tcfv_push_proto; + u8 tcfv_push_prio; }; #define to_vlan(a) ((struct tcf_vlan *)a) +static inline bool is_tcf_vlan(const struct tc_action *a) +{ +#ifdef CONFIG_NET_CLS_ACT + if (a->ops && a->ops->type == TCA_ACT_VLAN) + return true; +#endif + return false; +} + +static inline u32 tcf_vlan_action(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_action; +} + +static inline u16 tcf_vlan_push_vid(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_vid; +} + +static inline __be16 tcf_vlan_push_proto(const struct tc_action *a) +{ + return to_vlan(a)->tcfv_push_proto; +} + #endif /* __NET_TC_VLAN_H */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 7717302cab91..f83b7f220a65 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -227,10 +227,9 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_COOKIE_NOT_REQD 0x200 /* Force enable TFO on all listeners, i.e., not requiring the - * TCP_FASTOPEN socket option. SOCKOPT1/2 determine how to set max_qlen. + * TCP_FASTOPEN socket option. */ #define TFO_SERVER_WO_SOCKOPT1 0x400 -#define TFO_SERVER_WO_SOCKOPT2 0x800 extern struct inet_timewait_death_row tcp_death_row; @@ -534,6 +533,8 @@ __u32 cookie_v6_init_sequence(const struct sk_buff *skb, __u16 *mss); #endif /* tcp_output.c */ +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs); void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, int nonagle); bool tcp_may_send_now(struct sock *sk); @@ -604,8 +605,6 @@ static inline int tcp_bound_to_half_wnd(struct tcp_sock *tp, int pktsize) void tcp_get_info(struct sock *, struct tcp_info *); /* Read 'sendfile()'-style from a TCP socket */ -typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, - unsigned int, size_t); int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, sk_read_actor_t recv_actor); @@ -643,7 +642,7 @@ static inline void tcp_fast_path_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - if (skb_queue_empty(&tp->out_of_order_queue) && + if (RB_EMPTY_ROOT(&tp->out_of_order_queue) && tp->rcv_wnd && atomic_read(&sk->sk_rmem_alloc) < sk->sk_rcvbuf && !tp->urg_data) @@ -674,7 +673,7 @@ static inline bool tcp_ca_dst_locked(const struct dst_entry *dst) /* Minimum RTT in usec. ~0 means not available. */ static inline u32 tcp_min_rtt(const struct tcp_sock *tp) { - return tp->rtt_min[0].rtt; + return minmax_get(&tp->rtt_min); } /* Compute the actual receive window we are currently advertising. @@ -766,8 +765,16 @@ struct tcp_skb_cb { __u32 ack_seq; /* Sequence number ACK'd */ union { struct { - /* There is space for up to 20 bytes */ - __u32 in_flight;/* Bytes in flight when packet sent */ + /* There is space for up to 24 bytes */ + __u32 in_flight:30,/* Bytes in flight at transmit */ + is_app_limited:1, /* cwnd not fully used? */ + unused:1; + /* pkts S/ACKed so far upon tx of skb, incl retrans: */ + __u32 delivered; + /* start of send pipeline phase */ + struct skb_mstamp first_tx_mstamp; + /* when we reached the "delivered" count */ + struct skb_mstamp delivered_mstamp; } tx; /* only used for outgoing skbs */ union { struct inet_skb_parm h4; @@ -863,6 +870,27 @@ struct ack_sample { u32 in_flight; }; +/* A rate sample measures the number of (original/retransmitted) data + * packets delivered "delivered" over an interval of time "interval_us". + * The tcp_rate.c code fills in the rate sample, and congestion + * control modules that define a cong_control function to run at the end + * of ACK processing can optionally chose to consult this sample when + * setting cwnd and pacing rate. + * A sample is invalid if "delivered" or "interval_us" is negative. + */ +struct rate_sample { + struct skb_mstamp prior_mstamp; /* starting timestamp for interval */ + u32 prior_delivered; /* tp->delivered at "prior_mstamp" */ + s32 delivered; /* number of packets delivered over interval */ + long interval_us; /* time for tp->delivered to incr "delivered" */ + long rtt_us; /* RTT of last (S)ACKed packet (or -1) */ + int losses; /* number of packets marked lost upon ACK */ + u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */ + u32 prior_in_flight; /* in flight before this ACK */ + bool is_app_limited; /* is sample from packet with bubble in pipe? */ + bool is_retrans; /* is sample from retransmission? */ +}; + struct tcp_congestion_ops { struct list_head list; u32 key; @@ -887,6 +915,14 @@ struct tcp_congestion_ops { u32 (*undo_cwnd)(struct sock *sk); /* hook for packet ack accounting (optional) */ void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample); + /* suggest number of segments for each skb to transmit (optional) */ + u32 (*tso_segs_goal)(struct sock *sk); + /* returns the multiplier used in tcp_sndbuf_expand (optional) */ + u32 (*sndbuf_expand)(struct sock *sk); + /* call when packets are delivered to update cwnd and pacing rate, + * after all the ca_state processing. (optional) + */ + void (*cong_control)(struct sock *sk, const struct rate_sample *rs); /* get info for inet_diag (optional) */ size_t (*get_info)(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); @@ -949,6 +985,14 @@ static inline void tcp_ca_event(struct sock *sk, const enum tcp_ca_event event) icsk->icsk_ca_ops->cwnd_event(sk, event); } +/* From tcp_rate.c */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb); +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs); +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs); +void tcp_rate_check_app_limited(struct sock *sk); + /* These functions determine how the current flow behaves in respect of SACK * handling. SACK is negotiated with the peer, and therefore it can vary * between different flows. @@ -1164,6 +1208,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp) } bool tcp_prequeue(struct sock *sk, struct sk_buff *skb); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb); #undef STATE_TRACE @@ -1853,6 +1898,8 @@ static inline int tcp_inq(struct sock *sk) return answ; } +int tcp_peek_len(struct socket *sock); + static inline void tcp_segs_in(struct tcp_sock *tp, const struct sk_buff *skb) { u16 segs_in; diff --git a/include/net/udp.h b/include/net/udp.h index 8894d7144189..ea53a87d880f 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -251,6 +251,7 @@ int udp_get_port(struct sock *sk, unsigned short snum, int (*saddr_cmp)(const struct sock *, const struct sock *)); void udp_err(struct sk_buff *, u32); +int udp_abort(struct sock *sk, int err); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); int udp_push_pending_frames(struct sock *sk); void udp_flush_pending_frames(struct sock *sk); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index b96d0360c095..0255613a54a4 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -350,24 +350,6 @@ static inline __be32 vxlan_vni_field(__be32 vni) #endif } -static inline __be32 vxlan_tun_id_to_vni(__be64 tun_id) -{ -#if defined(__BIG_ENDIAN) - return (__force __be32)tun_id; -#else - return (__force __be32)((__force u64)tun_id >> 32); -#endif -} - -static inline __be64 vxlan_vni_to_tun_id(__be32 vni) -{ -#if defined(__BIG_ENDIAN) - return (__force __be64)vni; -#else - return (__force __be64)((u64)(__force u32)vni << 32); -#endif -} - static inline size_t vxlan_rco_start(__be32 vni_field) { return be32_to_cpu(vni_field & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 17934312eecb..31947b9c21d6 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -187,7 +187,7 @@ struct xfrm_state { struct xfrm_replay_state_esn *preplay_esn; /* The functions for replay detection. */ - struct xfrm_replay *repl; + const struct xfrm_replay *repl; /* internal flag that only holds state for delayed aevent at the * moment diff --git a/include/rxrpc/packet.h b/include/rxrpc/packet.h index b2017440b765..703a64b4681a 100644 --- a/include/rxrpc/packet.h +++ b/include/rxrpc/packet.h @@ -24,6 +24,7 @@ typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ */ struct rxrpc_wire_header { __be32 epoch; /* client boot timestamp */ +#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ __be32 cid; /* connection and channel ID */ #define RXRPC_MAXCALLS 4 /* max active calls per conn */ @@ -33,8 +34,6 @@ struct rxrpc_wire_header { #define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ __be32 callNumber; /* call ID (0 for connection-level packets) */ -#define RXRPC_PROCESS_MAXCALLS (1<<2) /* maximum number of active calls per conn (power of 2) */ - __be32 seq; /* sequence number of pkt in call stream */ __be32 serial; /* serial number of pkt sent to network */ @@ -92,10 +91,14 @@ struct rxrpc_wire_header { struct rxrpc_jumbo_header { uint8_t flags; /* packet flags (as per rxrpc_header) */ uint8_t pad; - __be16 _rsvd; /* reserved (used by kerberos security as cksum) */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; }; #define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) /*****************************************************************************/ /* @@ -120,6 +123,7 @@ struct rxrpc_ackpacket { #define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ #define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ #define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ uint8_t nAcks; /* number of ACKs */ #define RXRPC_MAXACKS 255 @@ -130,6 +134,13 @@ struct rxrpc_ackpacket { } __packed; +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + /* * ACK packets can have a further piece of information tagged on the end */ diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h new file mode 100644 index 000000000000..0383e5e9a0f3 --- /dev/null +++ b/include/trace/events/rxrpc.h @@ -0,0 +1,625 @@ +/* AF_RXRPC tracepoints + * + * Copyright (C) 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM rxrpc + +#if !defined(_TRACE_RXRPC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_RXRPC_H + +#include + +TRACE_EVENT(rxrpc_conn, + TP_PROTO(struct rxrpc_connection *conn, enum rxrpc_conn_trace op, + int usage, const void *where), + + TP_ARGS(conn, op, usage, where), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->op = op; + __entry->usage = usage; + __entry->where = where; + ), + + TP_printk("C=%p %s u=%d sp=%pSR", + __entry->conn, + rxrpc_conn_traces[__entry->op], + __entry->usage, + __entry->where) + ); + +TRACE_EVENT(rxrpc_client, + TP_PROTO(struct rxrpc_connection *conn, int channel, + enum rxrpc_client_trace op), + + TP_ARGS(conn, channel, op), + + TP_STRUCT__entry( + __field(struct rxrpc_connection *, conn ) + __field(u32, cid ) + __field(int, channel ) + __field(int, usage ) + __field(enum rxrpc_client_trace, op ) + __field(enum rxrpc_conn_cache_state, cs ) + ), + + TP_fast_assign( + __entry->conn = conn; + __entry->channel = channel; + __entry->usage = atomic_read(&conn->usage); + __entry->op = op; + __entry->cid = conn->proto.cid; + __entry->cs = conn->cache_state; + ), + + TP_printk("C=%p h=%2d %s %s i=%08x u=%d", + __entry->conn, + __entry->channel, + rxrpc_client_traces[__entry->op], + rxrpc_conn_cache_states[__entry->cs], + __entry->cid, + __entry->usage) + ); + +TRACE_EVENT(rxrpc_call, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_call_trace op, + int usage, const void *where, const void *aux), + + TP_ARGS(call, op, usage, where, aux), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(int, op ) + __field(int, usage ) + __field(const void *, where ) + __field(const void *, aux ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->op = op; + __entry->usage = usage; + __entry->where = where; + __entry->aux = aux; + ), + + TP_printk("c=%p %s u=%d sp=%pSR a=%p", + __entry->call, + rxrpc_call_traces[__entry->op], + __entry->usage, + __entry->where, + __entry->aux) + ); + +TRACE_EVENT(rxrpc_skb, + TP_PROTO(struct sk_buff *skb, enum rxrpc_skb_trace op, + int usage, int mod_count, const void *where), + + TP_ARGS(skb, op, usage, mod_count, where), + + TP_STRUCT__entry( + __field(struct sk_buff *, skb ) + __field(enum rxrpc_skb_trace, op ) + __field(int, usage ) + __field(int, mod_count ) + __field(const void *, where ) + ), + + TP_fast_assign( + __entry->skb = skb; + __entry->op = op; + __entry->usage = usage; + __entry->mod_count = mod_count; + __entry->where = where; + ), + + TP_printk("s=%p %s u=%d m=%d p=%pSR", + __entry->skb, + rxrpc_skb_traces[__entry->op], + __entry->usage, + __entry->mod_count, + __entry->where) + ); + +TRACE_EVENT(rxrpc_rx_packet, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") + ); + +TRACE_EVENT(rxrpc_rx_done, + TP_PROTO(int result, int abort_code), + + TP_ARGS(result, abort_code), + + TP_STRUCT__entry( + __field(int, result ) + __field(int, abort_code ) + ), + + TP_fast_assign( + __entry->result = result; + __entry->abort_code = abort_code; + ), + + TP_printk("r=%d a=%d", __entry->result, __entry->abort_code) + ); + +TRACE_EVENT(rxrpc_abort, + TP_PROTO(const char *why, u32 cid, u32 call_id, rxrpc_seq_t seq, + int abort_code, int error), + + TP_ARGS(why, cid, call_id, seq, abort_code, error), + + TP_STRUCT__entry( + __array(char, why, 4 ) + __field(u32, cid ) + __field(u32, call_id ) + __field(rxrpc_seq_t, seq ) + __field(int, abort_code ) + __field(int, error ) + ), + + TP_fast_assign( + memcpy(__entry->why, why, 4); + __entry->cid = cid; + __entry->call_id = call_id; + __entry->abort_code = abort_code; + __entry->error = error; + __entry->seq = seq; + ), + + TP_printk("%08x:%08x s=%u a=%d e=%d %s", + __entry->cid, __entry->call_id, __entry->seq, + __entry->abort_code, __entry->error, __entry->why) + ); + +TRACE_EVENT(rxrpc_transmit, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_transmit_trace why), + + TP_ARGS(call, why), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_transmit_trace, why ) + __field(rxrpc_seq_t, tx_hard_ack ) + __field(rxrpc_seq_t, tx_top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->tx_hard_ack = call->tx_hard_ack; + __entry->tx_top = call->tx_top; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_transmit_traces[__entry->why], + __entry->tx_hard_ack + 1, + __entry->tx_top - __entry->tx_hard_ack) + ); + +TRACE_EVENT(rxrpc_rx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t first, u8 reason, u8 n_acks), + + TP_ARGS(call, first, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, first ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->first = first; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk("c=%p %s f=%08x n=%u", + __entry->call, + rxrpc_ack_names[__entry->reason], + __entry->first, + __entry->n_acks) + ); + +TRACE_EVENT(rxrpc_tx_data, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + rxrpc_serial_t serial, u8 flags, bool retrans, bool lose), + + TP_ARGS(call, seq, serial, flags, retrans, lose), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_serial_t, serial ) + __field(u8, flags ) + __field(bool, retrans ) + __field(bool, lose ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->serial = serial; + __entry->flags = flags; + __entry->retrans = retrans; + __entry->lose = lose; + ), + + TP_printk("c=%p DATA %08x q=%08x fl=%02x%s%s", + __entry->call, + __entry->serial, + __entry->seq, + __entry->flags, + __entry->retrans ? " *RETRANS*" : "", + __entry->lose ? " *LOSE*" : "") + ); + +TRACE_EVENT(rxrpc_tx_ack, + TP_PROTO(struct rxrpc_call *call, rxrpc_serial_t serial, + rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, + u8 reason, u8 n_acks), + + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, ack_first ) + __field(rxrpc_serial_t, ack_serial ) + __field(u8, reason ) + __field(u8, n_acks ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->serial = serial; + __entry->ack_first = ack_first; + __entry->ack_serial = ack_serial; + __entry->reason = reason; + __entry->n_acks = n_acks; + ), + + TP_printk(" c=%p ACK %08x %s f=%08x r=%08x n=%u", + __entry->call, + __entry->serial, + rxrpc_ack_names[__entry->reason], + __entry->ack_first, + __entry->ack_serial, + __entry->n_acks) + ); + +TRACE_EVENT(rxrpc_receive, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_receive_trace why, + rxrpc_serial_t serial, rxrpc_seq_t seq), + + TP_ARGS(call, why, serial, seq), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_receive_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(rxrpc_seq_t, seq ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->seq = seq; + __entry->hard_ack = call->rx_hard_ack; + __entry->top = call->rx_top; + ), + + TP_printk("c=%p %s r=%08x q=%08x w=%08x-%08x", + __entry->call, + rxrpc_receive_traces[__entry->why], + __entry->serial, + __entry->seq, + __entry->hard_ack, + __entry->top) + ); + +TRACE_EVENT(rxrpc_recvmsg, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_recvmsg_trace why, + rxrpc_seq_t seq, unsigned int offset, unsigned int len, + int ret), + + TP_ARGS(call, why, seq, offset, len, ret), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_recvmsg_trace, why ) + __field(rxrpc_seq_t, seq ) + __field(unsigned int, offset ) + __field(unsigned int, len ) + __field(int, ret ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->seq = seq; + __entry->offset = offset; + __entry->len = len; + __entry->ret = ret; + ), + + TP_printk("c=%p %s q=%08x o=%u l=%u ret=%d", + __entry->call, + rxrpc_recvmsg_traces[__entry->why], + __entry->seq, + __entry->offset, + __entry->len, + __entry->ret) + ); + +TRACE_EVENT(rxrpc_rtt_tx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_tx_trace why, + rxrpc_serial_t send_serial), + + TP_ARGS(call, why, send_serial), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_tx_trace, why ) + __field(rxrpc_serial_t, send_serial ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + ), + + TP_printk("c=%p %s sr=%08x", + __entry->call, + rxrpc_rtt_tx_traces[__entry->why], + __entry->send_serial) + ); + +TRACE_EVENT(rxrpc_rtt_rx, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + s64 rtt, u8 nr, s64 avg), + + TP_ARGS(call, why, send_serial, resp_serial, rtt, nr, avg), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_rtt_rx_trace, why ) + __field(u8, nr ) + __field(rxrpc_serial_t, send_serial ) + __field(rxrpc_serial_t, resp_serial ) + __field(s64, rtt ) + __field(u64, avg ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->send_serial = send_serial; + __entry->resp_serial = resp_serial; + __entry->rtt = rtt; + __entry->nr = nr; + __entry->avg = avg; + ), + + TP_printk("c=%p %s sr=%08x rr=%08x rtt=%lld nr=%u avg=%lld", + __entry->call, + rxrpc_rtt_rx_traces[__entry->why], + __entry->send_serial, + __entry->resp_serial, + __entry->rtt, + __entry->nr, + __entry->avg) + ); + +TRACE_EVENT(rxrpc_timer, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now, unsigned long now_j), + + TP_ARGS(call, why, now, now_j), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_timer_trace, why ) + __field_struct(ktime_t, now ) + __field_struct(ktime_t, expire_at ) + __field_struct(ktime_t, ack_at ) + __field_struct(ktime_t, resend_at ) + __field(unsigned long, now_j ) + __field(unsigned long, timer ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->now = now; + __entry->expire_at = call->expire_at; + __entry->ack_at = call->ack_at; + __entry->resend_at = call->resend_at; + __entry->now_j = now_j; + __entry->timer = call->timer.expires; + ), + + TP_printk("c=%p %s x=%lld a=%lld r=%lld t=%ld", + __entry->call, + rxrpc_timer_traces[__entry->why], + ktime_to_ns(ktime_sub(__entry->expire_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->ack_at, __entry->now)), + ktime_to_ns(ktime_sub(__entry->resend_at, __entry->now)), + __entry->timer - __entry->now_j) + ); + +TRACE_EVENT(rxrpc_rx_lose, + TP_PROTO(struct rxrpc_skb_priv *sp), + + TP_ARGS(sp), + + TP_STRUCT__entry( + __field_struct(struct rxrpc_host_header, hdr ) + ), + + TP_fast_assign( + memcpy(&__entry->hdr, &sp->hdr, sizeof(__entry->hdr)); + ), + + TP_printk("%08x:%08x:%08x:%04x %08x %08x %02x %02x %s *LOSE*", + __entry->hdr.epoch, __entry->hdr.cid, + __entry->hdr.callNumber, __entry->hdr.serviceId, + __entry->hdr.serial, __entry->hdr.seq, + __entry->hdr.type, __entry->hdr.flags, + __entry->hdr.type <= 15 ? rxrpc_pkts[__entry->hdr.type] : "?UNK") + ); + +TRACE_EVENT(rxrpc_propose_ack, + TP_PROTO(struct rxrpc_call *call, enum rxrpc_propose_ack_trace why, + u8 ack_reason, rxrpc_serial_t serial, bool immediate, + bool background, enum rxrpc_propose_ack_outcome outcome), + + TP_ARGS(call, why, ack_reason, serial, immediate, background, + outcome), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_propose_ack_trace, why ) + __field(rxrpc_serial_t, serial ) + __field(u8, ack_reason ) + __field(bool, immediate ) + __field(bool, background ) + __field(enum rxrpc_propose_ack_outcome, outcome ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->why = why; + __entry->serial = serial; + __entry->ack_reason = ack_reason; + __entry->immediate = immediate; + __entry->background = background; + __entry->outcome = outcome; + ), + + TP_printk("c=%p %s %s r=%08x i=%u b=%u%s", + __entry->call, + rxrpc_propose_ack_traces[__entry->why], + rxrpc_ack_names[__entry->ack_reason], + __entry->serial, + __entry->immediate, + __entry->background, + rxrpc_propose_ack_outcomes[__entry->outcome]) + ); + +TRACE_EVENT(rxrpc_retransmit, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, u8 annotation, + s64 expiry), + + TP_ARGS(call, seq, annotation, expiry), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(rxrpc_seq_t, seq ) + __field(u8, annotation ) + __field(s64, expiry ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->seq = seq; + __entry->annotation = annotation; + __entry->expiry = expiry; + ), + + TP_printk("c=%p q=%x a=%02x xp=%lld", + __entry->call, + __entry->seq, + __entry->annotation, + __entry->expiry) + ); + +TRACE_EVENT(rxrpc_congest, + TP_PROTO(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, + rxrpc_serial_t ack_serial, enum rxrpc_congest_change change), + + TP_ARGS(call, summary, ack_serial, change), + + TP_STRUCT__entry( + __field(struct rxrpc_call *, call ) + __field(enum rxrpc_congest_change, change ) + __field(rxrpc_seq_t, hard_ack ) + __field(rxrpc_seq_t, top ) + __field(rxrpc_seq_t, lowest_nak ) + __field(rxrpc_serial_t, ack_serial ) + __field_struct(struct rxrpc_ack_summary, sum ) + ), + + TP_fast_assign( + __entry->call = call; + __entry->change = change; + __entry->hard_ack = call->tx_hard_ack; + __entry->top = call->tx_top; + __entry->lowest_nak = call->acks_lowest_nak; + __entry->ack_serial = ack_serial; + memcpy(&__entry->sum, summary, sizeof(__entry->sum)); + ), + + TP_printk("c=%p %08x %s %08x %s cw=%u ss=%u nr=%u,%u nw=%u,%u r=%u b=%u u=%u d=%u l=%x%s%s%s", + __entry->call, + __entry->ack_serial, + rxrpc_ack_names[__entry->sum.ack_reason], + __entry->hard_ack, + rxrpc_congest_modes[__entry->sum.mode], + __entry->sum.cwnd, + __entry->sum.ssthresh, + __entry->sum.nr_acks, __entry->sum.nr_nacks, + __entry->sum.nr_new_acks, __entry->sum.nr_new_nacks, + __entry->sum.nr_rot_new_acks, + __entry->top - __entry->hard_ack, + __entry->sum.cumulative_acks, + __entry->sum.dup_acks, + __entry->lowest_nak, __entry->sum.new_low_nack ? "!" : "", + rxrpc_congest_changes[__entry->change], + __entry->sum.retrans_timeo ? " rTxTo" : "") + ); + +#endif /* _TRACE_RXRPC_H */ + +/* This part must be outside protection */ +#include diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index 185f8ea2702f..d0352a971ebd 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -71,6 +71,7 @@ header-y += binfmts.h header-y += blkpg.h header-y += blktrace_api.h header-y += bpf_common.h +header-y += bpf_perf_event.h header-y += bpf.h header-y += bpqether.h header-y += bsg.h diff --git a/include/uapi/linux/batman_adv.h b/include/uapi/linux/batman_adv.h index 0fbf6fd4711b..734fe83ab645 100644 --- a/include/uapi/linux/batman_adv.h +++ b/include/uapi/linux/batman_adv.h @@ -22,6 +22,42 @@ #define BATADV_NL_MCAST_GROUP_TPMETER "tpmeter" +/** + * enum batadv_tt_client_flags - TT client specific flags + * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table + * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new + * update telling its new real location has not been received/sent yet + * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. + * This information is used by the "AP Isolation" feature + * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This + * information is used by the Extended Isolation feature + * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table + * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has + * not been announced yet + * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept + * in the table for one more originator interval for consistency purposes + * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of + * the network but no nnode has already announced it + * + * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. + * Bits from 8 to 15 are called _local flags_ because they are used for local + * computations only. + * + * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with + * the other nodes in the network. To achieve this goal these flags are included + * in the TT CRC computation. + */ +enum batadv_tt_client_flags { + BATADV_TT_CLIENT_DEL = (1 << 0), + BATADV_TT_CLIENT_ROAM = (1 << 1), + BATADV_TT_CLIENT_WIFI = (1 << 4), + BATADV_TT_CLIENT_ISOLA = (1 << 5), + BATADV_TT_CLIENT_NOPURGE = (1 << 8), + BATADV_TT_CLIENT_NEW = (1 << 9), + BATADV_TT_CLIENT_PENDING = (1 << 10), + BATADV_TT_CLIENT_TEMP = (1 << 11), +}; + /** * enum batadv_nl_attrs - batman-adv netlink attributes * @@ -40,6 +76,26 @@ * @BATADV_ATTR_TPMETER_BYTES: amount of acked bytes during run * @BATADV_ATTR_TPMETER_COOKIE: session cookie to match tp_meter session * @BATADV_ATTR_PAD: attribute used for padding for 64-bit alignment + * @BATADV_ATTR_ACTIVE: Flag indicating if the hard interface is active + * @BATADV_ATTR_TT_ADDRESS: Client MAC address + * @BATADV_ATTR_TT_TTVN: Translation table version + * @BATADV_ATTR_TT_LAST_TTVN: Previous translation table version + * @BATADV_ATTR_TT_CRC32: CRC32 over translation table + * @BATADV_ATTR_TT_VID: VLAN ID + * @BATADV_ATTR_TT_FLAGS: Translation table client flags + * @BATADV_ATTR_FLAG_BEST: Flags indicating entry is the best + * @BATADV_ATTR_LAST_SEEN_MSECS: Time in milliseconds since last seen + * @BATADV_ATTR_NEIGH_ADDRESS: Neighbour MAC address + * @BATADV_ATTR_TQ: TQ to neighbour + * @BATADV_ATTR_THROUGHPUT: Estimated throughput to Neighbour + * @BATADV_ATTR_BANDWIDTH_UP: Reported uplink bandwidth + * @BATADV_ATTR_BANDWIDTH_DOWN: Reported downlink bandwidth + * @BATADV_ATTR_ROUTER: Gateway router MAC address + * @BATADV_ATTR_BLA_OWN: Flag indicating own originator + * @BATADV_ATTR_BLA_ADDRESS: Bridge loop avoidance claim MAC address + * @BATADV_ATTR_BLA_VID: BLA VLAN ID + * @BATADV_ATTR_BLA_BACKBONE: BLA gateway originator MAC address + * @BATADV_ATTR_BLA_CRC: BLA CRC * @__BATADV_ATTR_AFTER_LAST: internal use * @NUM_BATADV_ATTR: total number of batadv_nl_attrs available * @BATADV_ATTR_MAX: highest attribute number currently defined @@ -60,6 +116,26 @@ enum batadv_nl_attrs { BATADV_ATTR_TPMETER_BYTES, BATADV_ATTR_TPMETER_COOKIE, BATADV_ATTR_PAD, + BATADV_ATTR_ACTIVE, + BATADV_ATTR_TT_ADDRESS, + BATADV_ATTR_TT_TTVN, + BATADV_ATTR_TT_LAST_TTVN, + BATADV_ATTR_TT_CRC32, + BATADV_ATTR_TT_VID, + BATADV_ATTR_TT_FLAGS, + BATADV_ATTR_FLAG_BEST, + BATADV_ATTR_LAST_SEEN_MSECS, + BATADV_ATTR_NEIGH_ADDRESS, + BATADV_ATTR_TQ, + BATADV_ATTR_THROUGHPUT, + BATADV_ATTR_BANDWIDTH_UP, + BATADV_ATTR_BANDWIDTH_DOWN, + BATADV_ATTR_ROUTER, + BATADV_ATTR_BLA_OWN, + BATADV_ATTR_BLA_ADDRESS, + BATADV_ATTR_BLA_VID, + BATADV_ATTR_BLA_BACKBONE, + BATADV_ATTR_BLA_CRC, /* add attributes above here, update the policy in netlink.c */ __BATADV_ATTR_AFTER_LAST, NUM_BATADV_ATTR = __BATADV_ATTR_AFTER_LAST, @@ -73,6 +149,15 @@ enum batadv_nl_attrs { * @BATADV_CMD_GET_MESH_INFO: Query basic information about batman-adv device * @BATADV_CMD_TP_METER: Start a tp meter session * @BATADV_CMD_TP_METER_CANCEL: Cancel a tp meter session + * @BATADV_CMD_GET_ROUTING_ALGOS: Query the list of routing algorithms. + * @BATADV_CMD_GET_HARDIFS: Query list of hard interfaces + * @BATADV_CMD_GET_TRANSTABLE_LOCAL: Query list of local translations + * @BATADV_CMD_GET_TRANSTABLE_GLOBAL Query list of global translations + * @BATADV_CMD_GET_ORIGINATORS: Query list of originators + * @BATADV_CMD_GET_NEIGHBORS: Query list of neighbours + * @BATADV_CMD_GET_GATEWAYS: Query list of gateways + * @BATADV_CMD_GET_BLA_CLAIM: Query list of bridge loop avoidance claims + * @BATADV_CMD_GET_BLA_BACKBONE: Query list of bridge loop avoidance backbones * @__BATADV_CMD_AFTER_LAST: internal use * @BATADV_CMD_MAX: highest used command number */ @@ -81,6 +166,15 @@ enum batadv_nl_commands { BATADV_CMD_GET_MESH_INFO, BATADV_CMD_TP_METER, BATADV_CMD_TP_METER_CANCEL, + BATADV_CMD_GET_ROUTING_ALGOS, + BATADV_CMD_GET_HARDIFS, + BATADV_CMD_GET_TRANSTABLE_LOCAL, + BATADV_CMD_GET_TRANSTABLE_GLOBAL, + BATADV_CMD_GET_ORIGINATORS, + BATADV_CMD_GET_NEIGHBORS, + BATADV_CMD_GET_GATEWAYS, + BATADV_CMD_GET_BLA_CLAIM, + BATADV_CMD_GET_BLA_BACKBONE, /* add new commands above here */ __BATADV_CMD_AFTER_LAST, BATADV_CMD_MAX = __BATADV_CMD_AFTER_LAST - 1 diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 9e5fc168c8a3..f09c70b97eca 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -95,6 +95,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, BPF_PROG_TYPE_XDP, + BPF_PROG_TYPE_PERF_EVENT, }; #define BPF_PSEUDO_MAP_FD 1 @@ -375,6 +376,56 @@ enum bpf_func_id { */ BPF_FUNC_probe_write_user, + /** + * bpf_current_task_under_cgroup(map, index) - Check cgroup2 membership of current task + * @map: pointer to bpf_map in BPF_MAP_TYPE_CGROUP_ARRAY type + * @index: index of the cgroup in the bpf_map + * Return: + * == 0 current failed the cgroup2 descendant test + * == 1 current succeeded the cgroup2 descendant test + * < 0 error + */ + BPF_FUNC_current_task_under_cgroup, + + /** + * bpf_skb_change_tail(skb, len, flags) + * The helper will resize the skb to the given new size, + * to be used f.e. with control messages. + * @skb: pointer to skb + * @len: new skb length + * @flags: reserved + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_change_tail, + + /** + * bpf_skb_pull_data(skb, len) + * The helper will pull in non-linear data in case the + * skb is non-linear and not all of len are part of the + * linear section. Only needed for read/write with direct + * packet access. + * @skb: pointer to skb + * @len: len to make read/writeable + * Return: 0 on success or negative error + */ + BPF_FUNC_skb_pull_data, + + /** + * bpf_csum_update(skb, csum) + * Adds csum into skb->csum in case of CHECKSUM_COMPLETE. + * @skb: pointer to skb + * @csum: csum to add + * Return: csum on success or negative error + */ + BPF_FUNC_csum_update, + + /** + * bpf_set_hash_invalid(skb) + * Invalidate current skb>hash. + * @skb: pointer to skb + */ + BPF_FUNC_set_hash_invalid, + __BPF_FUNC_MAX_ID, }; diff --git a/include/uapi/linux/bpf_perf_event.h b/include/uapi/linux/bpf_perf_event.h new file mode 100644 index 000000000000..067427259820 --- /dev/null +++ b/include/uapi/linux/bpf_perf_event.h @@ -0,0 +1,18 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#ifndef _UAPI__LINUX_BPF_PERF_EVENT_H__ +#define _UAPI__LINUX_BPF_PERF_EVENT_H__ + +#include +#include + +struct bpf_perf_event_data { + struct pt_regs regs; + __u64 sample_period; +}; + +#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */ diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index b8f38e84d93a..099a4200732c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -1362,7 +1362,14 @@ enum ethtool_link_mode_bit_indices { ETHTOOL_LINK_MODE_100000baseSR4_Full_BIT = 37, ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT = 38, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT = 39, - ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT = 40, + ETHTOOL_LINK_MODE_1000baseX_Full_BIT = 41, + ETHTOOL_LINK_MODE_10000baseCR_Full_BIT = 42, + ETHTOOL_LINK_MODE_10000baseSR_Full_BIT = 43, + ETHTOOL_LINK_MODE_10000baseLR_Full_BIT = 44, + ETHTOOL_LINK_MODE_10000baseLRM_Full_BIT = 45, + ETHTOOL_LINK_MODE_10000baseER_Full_BIT = 46, + /* Last allowed bit for __ETHTOOL_LINK_MODE_LEGACY_MASK is bit * 31. Please do NOT define any SUPPORTED_* or ADVERTISED_* @@ -1371,7 +1378,7 @@ enum ethtool_link_mode_bit_indices { */ __ETHTOOL_LINK_MODE_LAST - = ETHTOOL_LINK_MODE_50000baseSR2_Full_BIT, + = ETHTOOL_LINK_MODE_10000baseER_Full_BIT, }; #define __ETHTOOL_LINK_MODE_LEGACY_MASK(base_name) \ diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h index c186f64fffca..ab92bca6d448 100644 --- a/include/uapi/linux/if_bridge.h +++ b/include/uapi/linux/if_bridge.h @@ -140,7 +140,7 @@ struct bridge_vlan_xstats { __u64 tx_bytes; __u64 tx_packets; __u16 vid; - __u16 pad1; + __u16 flags; __u32 pad2; }; diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index a1b5202c5f6b..b4fba662cd32 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -318,6 +318,7 @@ enum { IFLA_BRPORT_FLUSH, IFLA_BRPORT_MULTICAST_ROUTER, IFLA_BRPORT_PAD, + IFLA_BRPORT_MCAST_FLOOD, __IFLA_BRPORT_MAX }; #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1) @@ -463,6 +464,7 @@ enum { enum ipvlan_mode { IPVLAN_MODE_L2 = 0, IPVLAN_MODE_L3, + IPVLAN_MODE_L3S, IPVLAN_MODE_MAX }; @@ -617,7 +619,7 @@ enum { enum { IFLA_VF_UNSPEC, IFLA_VF_MAC, /* Hardware queue specific attributes */ - IFLA_VF_VLAN, + IFLA_VF_VLAN, /* VLAN ID and QoS */ IFLA_VF_TX_RATE, /* Max TX Bandwidth Allocation */ IFLA_VF_SPOOFCHK, /* Spoof Checking on/off switch */ IFLA_VF_LINK_STATE, /* link state enable/disable/auto switch */ @@ -629,6 +631,7 @@ enum { IFLA_VF_TRUST, /* Trust VF */ IFLA_VF_IB_NODE_GUID, /* VF Infiniband node GUID */ IFLA_VF_IB_PORT_GUID, /* VF Infiniband port GUID */ + IFLA_VF_VLAN_LIST, /* nested list of vlans, option for QinQ */ __IFLA_VF_MAX, }; @@ -645,6 +648,22 @@ struct ifla_vf_vlan { __u32 qos; }; +enum { + IFLA_VF_VLAN_INFO_UNSPEC, + IFLA_VF_VLAN_INFO, /* VLAN ID, QoS and VLAN protocol */ + __IFLA_VF_VLAN_INFO_MAX, +}; + +#define IFLA_VF_VLAN_INFO_MAX (__IFLA_VF_VLAN_INFO_MAX - 1) +#define MAX_VLAN_LIST_LEN 1 + +struct ifla_vf_vlan_info { + __u32 vf; + __u32 vlan; /* 0 - 4095, 0 disables VLAN filter */ + __u32 qos; + __be16 vlan_proto; /* VLAN protocol either 802.1Q or 802.1ad */ +}; + struct ifla_vf_tx_rate { __u32 vf; __u32 rate; /* Max TX bandwidth in Mbps, 0 disables throttling */ @@ -825,6 +844,7 @@ enum { IFLA_STATS_LINK_64, IFLA_STATS_LINK_XSTATS, IFLA_STATS_LINK_XSTATS_SLAVE, + IFLA_STATS_LINK_OFFLOAD_XSTATS, __IFLA_STATS_MAX, }; @@ -844,6 +864,14 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* These are stats embedded into IFLA_STATS_LINK_OFFLOAD_XSTATS */ +enum { + IFLA_OFFLOAD_XSTATS_UNSPEC, + IFLA_OFFLOAD_XSTATS_CPU_HIT, /* struct rtnl_link_stats64 */ + __IFLA_OFFLOAD_XSTATS_MAX +}; +#define IFLA_OFFLOAD_XSTATS_MAX (__IFLA_OFFLOAD_XSTATS_MAX - 1) + /* XDP section */ enum { diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h index 777b6cdb1b7b..92f3c8677523 100644 --- a/include/uapi/linux/if_tunnel.h +++ b/include/uapi/linux/if_tunnel.h @@ -27,9 +27,23 @@ #define GRE_SEQ __cpu_to_be16(0x1000) #define GRE_STRICT __cpu_to_be16(0x0800) #define GRE_REC __cpu_to_be16(0x0700) -#define GRE_FLAGS __cpu_to_be16(0x00F8) +#define GRE_ACK __cpu_to_be16(0x0080) +#define GRE_FLAGS __cpu_to_be16(0x0078) #define GRE_VERSION __cpu_to_be16(0x0007) +#define GRE_IS_CSUM(f) ((f) & GRE_CSUM) +#define GRE_IS_ROUTING(f) ((f) & GRE_ROUTING) +#define GRE_IS_KEY(f) ((f) & GRE_KEY) +#define GRE_IS_SEQ(f) ((f) & GRE_SEQ) +#define GRE_IS_STRICT(f) ((f) & GRE_STRICT) +#define GRE_IS_REC(f) ((f) & GRE_REC) +#define GRE_IS_ACK(f) ((f) & GRE_ACK) + +#define GRE_VERSION_0 __cpu_to_be16(0x0000) +#define GRE_VERSION_1 __cpu_to_be16(0x0001) +#define GRE_PROTO_PPP __cpu_to_be16(0x880b) +#define GRE_PPTP_KEY_MASK __cpu_to_be32(0xffff) + struct ip_tunnel_parm { char name[IFNAMSIZ]; int link; @@ -60,6 +74,7 @@ enum { IFLA_IPTUN_ENCAP_FLAGS, IFLA_IPTUN_ENCAP_SPORT, IFLA_IPTUN_ENCAP_DPORT, + IFLA_IPTUN_COLLECT_METADATA, __IFLA_IPTUN_MAX, }; #define IFLA_IPTUN_MAX (__IFLA_IPTUN_MAX - 1) diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h index abbd1dc5d683..509cd961068d 100644 --- a/include/uapi/linux/inet_diag.h +++ b/include/uapi/linux/inet_diag.h @@ -73,6 +73,7 @@ enum { INET_DIAG_BC_S_COND, INET_DIAG_BC_D_COND, INET_DIAG_BC_DEV_COND, /* u32 ifindex */ + INET_DIAG_BC_MARK_COND, }; struct inet_diag_hostcond { @@ -82,6 +83,11 @@ struct inet_diag_hostcond { __be32 addr[0]; }; +struct inet_diag_markcond { + __u32 mark; + __u32 mask; +}; + /* Base info structure. It contains socket identity (addrs/ports/cookie) * and, alas, the information shown by netstat. */ struct inet_diag_msg { @@ -117,6 +123,8 @@ enum { INET_DIAG_LOCALS, INET_DIAG_PEERS, INET_DIAG_PAD, + INET_DIAG_MARK, + INET_DIAG_BBRINFO, __INET_DIAG_MAX, }; @@ -150,8 +158,20 @@ struct tcp_dctcp_info { __u32 dctcp_ab_tot; }; +/* INET_DIAG_BBRINFO */ + +struct tcp_bbr_info { + /* u64 bw: max-filtered BW (app throughput) estimate in Byte per sec: */ + __u32 bbr_bw_lo; /* lower 32 bits of bw */ + __u32 bbr_bw_hi; /* upper 32 bits of bw */ + __u32 bbr_min_rtt; /* min-filtered RTT in uSec */ + __u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ + __u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ +}; + union tcp_cc_info { struct tcpvegas_info vegas; struct tcp_dctcp_info dctcp; + struct tcp_bbr_info bbr; }; #endif /* _UAPI_INET_DIAG_H_ */ diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..8c2772340c3f 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -177,6 +177,7 @@ enum { DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, DEVCONF_KEEP_ADDR_ON_DOWN, + DEVCONF_RTR_SOLICIT_MAX_INTERVAL, DEVCONF_MAX }; diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h index 237fac4bc17b..15d8510cdae0 100644 --- a/include/uapi/linux/mii.h +++ b/include/uapi/linux/mii.h @@ -48,6 +48,7 @@ #define BMCR_SPEED100 0x2000 /* Select 100Mbps */ #define BMCR_LOOPBACK 0x4000 /* TXD loopback bits */ #define BMCR_RESET 0x8000 /* Reset to default state */ +#define BMCR_SPEED10 0x0000 /* Select 10Mbps */ /* Basic mode status register. */ #define BMSR_ERCAP 0x0001 /* Ext-reg capability */ diff --git a/include/uapi/linux/netfilter/nf_log.h b/include/uapi/linux/netfilter/nf_log.h new file mode 100644 index 000000000000..8be21e02387d --- /dev/null +++ b/include/uapi/linux/netfilter/nf_log.h @@ -0,0 +1,12 @@ +#ifndef _NETFILTER_NF_LOG_H +#define _NETFILTER_NF_LOG_H + +#define NF_LOG_TCPSEQ 0x01 /* Log TCP sequence numbers */ +#define NF_LOG_TCPOPT 0x02 /* Log TCP options */ +#define NF_LOG_IPOPT 0x04 /* Log IP options */ +#define NF_LOG_UID 0x08 /* Log UID owning local socket */ +#define NF_LOG_NFLOG 0x10 /* Unsupported, don't reuse */ +#define NF_LOG_MACDECODE 0x20 /* Decode MAC header */ +#define NF_LOG_MASK 0x2f + +#endif /* _NETFILTER_NF_LOG_H */ diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index c674ba2563b7..c6c4477c136b 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -546,6 +546,35 @@ enum nft_cmp_attributes { }; #define NFTA_CMP_MAX (__NFTA_CMP_MAX - 1) +/** + * enum nft_range_ops - nf_tables range operator + * + * @NFT_RANGE_EQ: equal + * @NFT_RANGE_NEQ: not equal + */ +enum nft_range_ops { + NFT_RANGE_EQ, + NFT_RANGE_NEQ, +}; + +/** + * enum nft_range_attributes - nf_tables range expression netlink attributes + * + * @NFTA_RANGE_SREG: source register of data to compare (NLA_U32: nft_registers) + * @NFTA_RANGE_OP: cmp operation (NLA_U32: nft_cmp_ops) + * @NFTA_RANGE_FROM_DATA: data range from (NLA_NESTED: nft_data_attributes) + * @NFTA_RANGE_TO_DATA: data range to (NLA_NESTED: nft_data_attributes) + */ +enum nft_range_attributes { + NFTA_RANGE_UNSPEC, + NFTA_RANGE_SREG, + NFTA_RANGE_OP, + NFTA_RANGE_FROM_DATA, + NFTA_RANGE_TO_DATA, + __NFTA_RANGE_MAX +}; +#define NFTA_RANGE_MAX (__NFTA_RANGE_MAX - 1) + enum nft_lookup_flags { NFT_LOOKUP_F_INV = (1 << 0), }; @@ -575,6 +604,10 @@ enum nft_dynset_ops { NFT_DYNSET_OP_UPDATE, }; +enum nft_dynset_flags { + NFT_DYNSET_F_INV = (1 << 0), +}; + /** * enum nft_dynset_attributes - dynset expression attributes * @@ -585,6 +618,7 @@ enum nft_dynset_ops { * @NFTA_DYNSET_SREG_DATA: source register of the data (NLA_U32) * @NFTA_DYNSET_TIMEOUT: timeout value for the new element (NLA_U64) * @NFTA_DYNSET_EXPR: expression (NLA_NESTED: nft_expr_attributes) + * @NFTA_DYNSET_FLAGS: flags (NLA_U32) */ enum nft_dynset_attributes { NFTA_DYNSET_UNSPEC, @@ -596,6 +630,7 @@ enum nft_dynset_attributes { NFTA_DYNSET_TIMEOUT, NFTA_DYNSET_EXPR, NFTA_DYNSET_PAD, + NFTA_DYNSET_FLAGS, __NFTA_DYNSET_MAX, }; #define NFTA_DYNSET_MAX (__NFTA_DYNSET_MAX - 1) @@ -723,6 +758,28 @@ enum nft_meta_keys { NFT_META_PRANDOM, }; +/** + * enum nft_hash_attributes - nf_tables hash expression netlink attributes + * + * @NFTA_HASH_SREG: source register (NLA_U32) + * @NFTA_HASH_DREG: destination register (NLA_U32) + * @NFTA_HASH_LEN: source data length (NLA_U32) + * @NFTA_HASH_MODULUS: modulus value (NLA_U32) + * @NFTA_HASH_SEED: seed value (NLA_U32) + * @NFTA_HASH_OFFSET: add this offset value to hash result (NLA_U32) + */ +enum nft_hash_attributes { + NFTA_HASH_UNSPEC, + NFTA_HASH_SREG, + NFTA_HASH_DREG, + NFTA_HASH_LEN, + NFTA_HASH_MODULUS, + NFTA_HASH_SEED, + NFTA_HASH_OFFSET, + __NFTA_HASH_MAX, +}; +#define NFTA_HASH_MAX (__NFTA_HASH_MAX - 1) + /** * enum nft_meta_attributes - nf_tables meta expression netlink attributes * @@ -866,12 +923,14 @@ enum nft_log_attributes { * @NFTA_QUEUE_NUM: netlink queue to send messages to (NLA_U16) * @NFTA_QUEUE_TOTAL: number of queues to load balance packets on (NLA_U16) * @NFTA_QUEUE_FLAGS: various flags (NLA_U16) + * @NFTA_QUEUE_SREG_QNUM: source register of queue number (NLA_U32: nft_registers) */ enum nft_queue_attributes { NFTA_QUEUE_UNSPEC, NFTA_QUEUE_NUM, NFTA_QUEUE_TOTAL, NFTA_QUEUE_FLAGS, + NFTA_QUEUE_SREG_QNUM, __NFTA_QUEUE_MAX }; #define NFTA_QUEUE_MAX (__NFTA_QUEUE_MAX - 1) @@ -880,6 +939,25 @@ enum nft_queue_attributes { #define NFT_QUEUE_FLAG_CPU_FANOUT 0x02 /* use current CPU (no hashing) */ #define NFT_QUEUE_FLAG_MASK 0x03 +enum nft_quota_flags { + NFT_QUOTA_F_INV = (1 << 0), +}; + +/** + * enum nft_quota_attributes - nf_tables quota expression netlink attributes + * + * @NFTA_QUOTA_BYTES: quota in bytes (NLA_U16) + * @NFTA_QUOTA_FLAGS: flags (NLA_U32) + */ +enum nft_quota_attributes { + NFTA_QUOTA_UNSPEC, + NFTA_QUOTA_BYTES, + NFTA_QUOTA_FLAGS, + NFTA_QUOTA_PAD, + __NFTA_QUOTA_MAX +}; +#define NFTA_QUOTA_MAX (__NFTA_QUOTA_MAX - 1) + /** * enum nft_reject_types - nf_tables reject expression reject types * @@ -1051,7 +1129,7 @@ enum nft_gen_attributes { * @NFTA_TRACE_NFPROTO: nf protocol processed (NLA_U32) * @NFTA_TRACE_POLICY: policy that decided fate of packet (NLA_U32) */ -enum nft_trace_attibutes { +enum nft_trace_attributes { NFTA_TRACE_UNSPEC, NFTA_TRACE_TABLE, NFTA_TRACE_CHAIN, @@ -1082,4 +1160,30 @@ enum nft_trace_types { __NFT_TRACETYPE_MAX }; #define NFT_TRACETYPE_MAX (__NFT_TRACETYPE_MAX - 1) + +/** + * enum nft_ng_attributes - nf_tables number generator expression netlink attributes + * + * @NFTA_NG_DREG: destination register (NLA_U32) + * @NFTA_NG_MODULUS: maximum counter value (NLA_U32) + * @NFTA_NG_TYPE: operation type (NLA_U32) + * @NFTA_NG_OFFSET: offset to be added to the counter (NLA_U32) + */ +enum nft_ng_attributes { + NFTA_NG_UNSPEC, + NFTA_NG_DREG, + NFTA_NG_MODULUS, + NFTA_NG_TYPE, + NFTA_NG_OFFSET, + __NFTA_NG_MAX +}; +#define NFTA_NG_MAX (__NFTA_NG_MAX - 1) + +enum nft_ng_types { + NFT_NG_INCREMENTAL, + NFT_NG_RANDOM, + __NFT_NG_MAX +}; +#define NFT_NG_MAX (__NFT_NG_MAX - 1) + #endif /* _LINUX_NF_TABLES_H */ diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h index 9df789709abe..6deb8867c5fc 100644 --- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h +++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h @@ -231,13 +231,13 @@ enum ctattr_secctx { enum ctattr_stats_cpu { CTA_STATS_UNSPEC, - CTA_STATS_SEARCHED, + CTA_STATS_SEARCHED, /* no longer used */ CTA_STATS_FOUND, - CTA_STATS_NEW, + CTA_STATS_NEW, /* no longer used */ CTA_STATS_INVALID, CTA_STATS_IGNORE, - CTA_STATS_DELETE, - CTA_STATS_DELETE_LIST, + CTA_STATS_DELETE, /* no longer used */ + CTA_STATS_DELETE_LIST, /* no longer used */ CTA_STATS_INSERT, CTA_STATS_INSERT_FAILED, CTA_STATS_DROP, diff --git a/include/uapi/linux/netfilter/xt_hashlimit.h b/include/uapi/linux/netfilter/xt_hashlimit.h index 6db90372f09c..3efc0ca18345 100644 --- a/include/uapi/linux/netfilter/xt_hashlimit.h +++ b/include/uapi/linux/netfilter/xt_hashlimit.h @@ -6,6 +6,7 @@ /* timings are in milliseconds. */ #define XT_HASHLIMIT_SCALE 10000 +#define XT_HASHLIMIT_SCALE_v2 1000000llu /* 1/10,000 sec period => max of 10,000/sec. Min rate is then 429490 * seconds, or one packet every 59 hours. */ @@ -63,6 +64,20 @@ struct hashlimit_cfg1 { __u8 srcmask, dstmask; }; +struct hashlimit_cfg2 { + __u64 avg; /* Average secs between packets * scale */ + __u64 burst; /* Period multiplier for upper limit. */ + __u32 mode; /* bitmask of XT_HASHLIMIT_HASH_* */ + + /* user specified */ + __u32 size; /* how many buckets */ + __u32 max; /* max number of entries */ + __u32 gc_interval; /* gc interval */ + __u32 expire; /* when do entries expire? */ + + __u8 srcmask, dstmask; +}; + struct xt_hashlimit_mtinfo1 { char name[IFNAMSIZ]; struct hashlimit_cfg1 cfg; @@ -71,4 +86,12 @@ struct xt_hashlimit_mtinfo1 { struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); }; +struct xt_hashlimit_mtinfo2 { + char name[NAME_MAX]; + struct hashlimit_cfg2 cfg; + + /* Used internally by the kernel */ + struct xt_hashlimit_htable *hinfo __attribute__((aligned(8))); +}; + #endif /* _UAPI_XT_HASHLIMIT_H */ diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 220694151434..56368e9b4622 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -48,6 +48,7 @@ #define NL80211_MULTICAST_GROUP_REG "regulatory" #define NL80211_MULTICAST_GROUP_MLME "mlme" #define NL80211_MULTICAST_GROUP_VENDOR "vendor" +#define NL80211_MULTICAST_GROUP_NAN "nan" #define NL80211_MULTICAST_GROUP_TESTMODE "testmode" /** @@ -838,6 +839,41 @@ * not running. The driver indicates the status of the scan through * cfg80211_scan_done(). * + * @NL80211_CMD_START_NAN: Start NAN operation, identified by its + * %NL80211_ATTR_WDEV interface. This interface must have been previously + * created with %NL80211_CMD_NEW_INTERFACE. After it has been started, the + * NAN interface will create or join a cluster. This command must have a + * valid %NL80211_ATTR_NAN_MASTER_PREF attribute and optional + * %NL80211_ATTR_NAN_DUAL attributes. + * After this command NAN functions can be added. + * @NL80211_CMD_STOP_NAN: Stop the NAN operation, identified by + * its %NL80211_ATTR_WDEV interface. + * @NL80211_CMD_ADD_NAN_FUNCTION: Add a NAN function. The function is defined + * with %NL80211_ATTR_NAN_FUNC nested attribute. When called, this + * operation returns the strictly positive and unique instance id + * (%NL80211_ATTR_NAN_FUNC_INST_ID) and a cookie (%NL80211_ATTR_COOKIE) + * of the function upon success. + * Since instance ID's can be re-used, this cookie is the right + * way to identify the function. This will avoid races when a termination + * event is handled by the user space after it has already added a new + * function that got the same instance id from the kernel as the one + * which just terminated. + * This cookie may be used in NAN events even before the command + * returns, so userspace shouldn't process NAN events until it processes + * the response to this command. + * Look at %NL80211_ATTR_SOCKET_OWNER as well. + * @NL80211_CMD_DEL_NAN_FUNCTION: Delete a NAN function by cookie. + * This command is also used as a notification sent when a NAN function is + * terminated. This will contain a %NL80211_ATTR_NAN_FUNC_INST_ID + * and %NL80211_ATTR_COOKIE attributes. + * @NL80211_CMD_CHANGE_NAN_CONFIG: Change current NAN configuration. NAN + * must be operational (%NL80211_CMD_START_NAN was executed). + * It must contain at least one of the following attributes: + * %NL80211_ATTR_NAN_MASTER_PREF, %NL80211_ATTR_NAN_DUAL. + * @NL80211_CMD_NAN_FUNC_MATCH: Notification sent when a match is reported. + * This will contain a %NL80211_ATTR_NAN_MATCH nested attribute and + * %NL80211_ATTR_COOKIE. + * * @NL80211_CMD_MAX: highest used command number * @__NL80211_CMD_AFTER_LAST: internal use */ @@ -1026,6 +1062,13 @@ enum nl80211_commands { NL80211_CMD_ABORT_SCAN, + NL80211_CMD_START_NAN, + NL80211_CMD_STOP_NAN, + NL80211_CMD_ADD_NAN_FUNCTION, + NL80211_CMD_DEL_NAN_FUNCTION, + NL80211_CMD_CHANGE_NAN_CONFIG, + NL80211_CMD_NAN_MATCH, + /* add new commands above here */ /* used to define NL80211_CMD_MAX below */ @@ -1343,7 +1386,13 @@ enum nl80211_commands { * enum nl80211_band value is used as the index (nla_type() of the nested * data. If a band is not included, it will be configured to allow all * rates based on negotiated supported rates information. This attribute - * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. + * is used with %NL80211_CMD_SET_TX_BITRATE_MASK and with starting AP, + * and joining mesh networks (not IBSS yet). In the later case, it must + * specify just a single bitrate, which is to be used for the beacon. + * The driver must also specify support for this with the extended + * features NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + * NL80211_EXT_FEATURE_BEACON_RATE_HT and + * NL80211_EXT_FEATURE_BEACON_RATE_VHT. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. @@ -1733,6 +1782,12 @@ enum nl80211_commands { * regulatory indoor configuration would be owned by the netlink socket * that configured the indoor setting, and the indoor operation would be * cleared when the socket is closed. + * If set during NAN interface creation, the interface will be destroyed + * if the socket is closed just like any other interface. Moreover, only + * the netlink socket that created the interface will be allowed to add + * and remove functions. NAN notifications will be sent in unicast to that + * socket. Without this attribute, any socket can add functions and the + * notifications will be sent to the %NL80211_MCGRP_NAN multicast group. * * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is * the TDLS link initiator. @@ -1867,6 +1922,21 @@ enum nl80211_commands { * @NL80211_ATTR_MESH_PEER_AID: Association ID for the mesh peer (u16). This is * used to pull the stored data for mesh peer in power save state. * + * @NL80211_ATTR_NAN_MASTER_PREF: the master preference to be used by + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. Its type is u8 and it can't be 0. + * Also, values 1 and 255 are reserved for certification purposes and + * should not be used during a normal device operation. + * @NL80211_ATTR_NAN_DUAL: NAN dual band operation config (see + * &enum nl80211_nan_dual_band_conf). This attribute is used with + * %NL80211_CMD_START_NAN and optionally with + * %NL80211_CMD_CHANGE_NAN_CONFIG. + * @NL80211_ATTR_NAN_FUNC: a function that can be added to NAN. See + * &enum nl80211_nan_func_attributes for description of this nested + * attribute. + * @NL80211_ATTR_NAN_MATCH: used to report a match. This is a nested attribute. + * See &enum nl80211_nan_match_attributes. + * * @NUM_NL80211_ATTR: total number of nl80211_attrs available * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use @@ -2261,6 +2331,11 @@ enum nl80211_attrs { NL80211_ATTR_MESH_PEER_AID, + NL80211_ATTR_NAN_MASTER_PREF, + NL80211_ATTR_NAN_DUAL, + NL80211_ATTR_NAN_FUNC, + NL80211_ATTR_NAN_MATCH, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -2339,6 +2414,7 @@ enum nl80211_attrs { * commands to create and destroy one * @NL80211_IF_TYPE_OCB: Outside Context of a BSS * This mode corresponds to the MIB variable dot11OCBActivated=true + * @NL80211_IFTYPE_NAN: NAN device interface type (not a netdev) * @NL80211_IFTYPE_MAX: highest interface type number currently defined * @NUM_NL80211_IFTYPES: number of defined interface types * @@ -2359,6 +2435,7 @@ enum nl80211_iftype { NL80211_IFTYPE_P2P_GO, NL80211_IFTYPE_P2P_DEVICE, NL80211_IFTYPE_OCB, + NL80211_IFTYPE_NAN, /* keep last */ NUM_NL80211_IFTYPES, @@ -4551,6 +4628,12 @@ enum nl80211_feature_flags { * (if available). * @NL80211_EXT_FEATURE_SET_SCAN_DWELL: This driver supports configuration of * channel dwell time. + * @NL80211_EXT_FEATURE_BEACON_RATE_LEGACY: Driver supports beacon rate + * configuration (AP/mesh), supporting a legacy (non HT/VHT) rate. + * @NL80211_EXT_FEATURE_BEACON_RATE_HT: Driver supports beacon rate + * configuration (AP/mesh) with HT rates. + * @NL80211_EXT_FEATURE_BEACON_RATE_VHT: Driver supports beacon rate + * configuration (AP/mesh) with VHT rates. * * @NUM_NL80211_EXT_FEATURES: number of extended features. * @MAX_NL80211_EXT_FEATURES: highest extended feature index. @@ -4562,6 +4645,9 @@ enum nl80211_ext_feature_index { NL80211_EXT_FEATURE_SCAN_START_TIME, NL80211_EXT_FEATURE_BSS_PARENT_TSF, NL80211_EXT_FEATURE_SET_SCAN_DWELL, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY, + NL80211_EXT_FEATURE_BEACON_RATE_HT, + NL80211_EXT_FEATURE_BEACON_RATE_VHT, /* add new features before the definition below */ NUM_NL80211_EXT_FEATURES, @@ -4855,4 +4941,186 @@ enum nl80211_bss_select_attr { NL80211_BSS_SELECT_ATTR_MAX = __NL80211_BSS_SELECT_ATTR_AFTER_LAST - 1 }; +/** + * enum nl80211_nan_dual_band_conf - NAN dual band configuration + * + * Defines the NAN dual band mode of operation + * + * @NL80211_NAN_BAND_DEFAULT: device default mode + * @NL80211_NAN_BAND_2GHZ: 2.4GHz mode + * @NL80211_NAN_BAND_5GHZ: 5GHz mode + */ +enum nl80211_nan_dual_band_conf { + NL80211_NAN_BAND_DEFAULT = 1 << 0, + NL80211_NAN_BAND_2GHZ = 1 << 1, + NL80211_NAN_BAND_5GHZ = 1 << 2, +}; + +/** + * enum nl80211_nan_function_type - NAN function type + * + * Defines the function type of a NAN function + * + * @NL80211_NAN_FUNC_PUBLISH: function is publish + * @NL80211_NAN_FUNC_SUBSCRIBE: function is subscribe + * @NL80211_NAN_FUNC_FOLLOW_UP: function is follow-up + */ +enum nl80211_nan_function_type { + NL80211_NAN_FUNC_PUBLISH, + NL80211_NAN_FUNC_SUBSCRIBE, + NL80211_NAN_FUNC_FOLLOW_UP, + + /* keep last */ + __NL80211_NAN_FUNC_TYPE_AFTER_LAST, + NL80211_NAN_FUNC_MAX_TYPE = __NL80211_NAN_FUNC_TYPE_AFTER_LAST - 1, +}; + +/** + * enum nl80211_nan_publish_type - NAN publish tx type + * + * Defines how to send publish Service Discovery Frames + * + * @NL80211_NAN_SOLICITED_PUBLISH: publish function is solicited + * @NL80211_NAN_UNSOLICITED_PUBLISH: publish function is unsolicited + */ +enum nl80211_nan_publish_type { + NL80211_NAN_SOLICITED_PUBLISH = 1 << 0, + NL80211_NAN_UNSOLICITED_PUBLISH = 1 << 1, +}; + +/** + * enum nl80211_nan_func_term_reason - NAN functions termination reason + * + * Defines termination reasons of a NAN function + * + * @NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST: requested by user + * @NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED: timeout + * @NL80211_NAN_FUNC_TERM_REASON_ERROR: errored + */ +enum nl80211_nan_func_term_reason { + NL80211_NAN_FUNC_TERM_REASON_USER_REQUEST, + NL80211_NAN_FUNC_TERM_REASON_TTL_EXPIRED, + NL80211_NAN_FUNC_TERM_REASON_ERROR, +}; + +#define NL80211_NAN_FUNC_SERVICE_ID_LEN 6 +#define NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN 0xff +#define NL80211_NAN_FUNC_SRF_MAX_LEN 0xff + +/** + * enum nl80211_nan_func_attributes - NAN function attributes + * @__NL80211_NAN_FUNC_INVALID: invalid + * @NL80211_NAN_FUNC_TYPE: &enum nl80211_nan_function_type (u8). + * @NL80211_NAN_FUNC_SERVICE_ID: 6 bytes of the service ID hash as + * specified in NAN spec. This is a binary attribute. + * @NL80211_NAN_FUNC_PUBLISH_TYPE: relevant if the function's type is + * publish. Defines the transmission type for the publish Service Discovery + * Frame, see &enum nl80211_nan_publish_type. Its type is u8. + * @NL80211_NAN_FUNC_PUBLISH_BCAST: relevant if the function is a solicited + * publish. Should the solicited publish Service Discovery Frame be sent to + * the NAN Broadcast address. This is a flag. + * @NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE: relevant if the function's type is + * subscribe. Is the subscribe active. This is a flag. + * @NL80211_NAN_FUNC_FOLLOW_UP_ID: relevant if the function's type is follow up. + * The instance ID for the follow up Service Discovery Frame. This is u8. + * @NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID: relevant if the function's type + * is follow up. This is a u8. + * The requestor instance ID for the follow up Service Discovery Frame. + * @NL80211_NAN_FUNC_FOLLOW_UP_DEST: the MAC address of the recipient of the + * follow up Service Discovery Frame. This is a binary attribute. + * @NL80211_NAN_FUNC_CLOSE_RANGE: is this function limited for devices in a + * close range. The range itself (RSSI) is defined by the device. + * This is a flag. + * @NL80211_NAN_FUNC_TTL: strictly positive number of DWs this function should + * stay active. If not present infinite TTL is assumed. This is a u32. + * @NL80211_NAN_FUNC_SERVICE_INFO: array of bytes describing the service + * specific info. This is a binary attribute. + * @NL80211_NAN_FUNC_SRF: Service Receive Filter. This is a nested attribute. + * See &enum nl80211_nan_srf_attributes. + * @NL80211_NAN_FUNC_RX_MATCH_FILTER: Receive Matching filter. This is a nested + * attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_TX_MATCH_FILTER: Transmit Matching filter. This is a + * nested attribute. It is a list of binary values. + * @NL80211_NAN_FUNC_INSTANCE_ID: The instance ID of the function. + * Its type is u8 and it cannot be 0. + * @NL80211_NAN_FUNC_TERM_REASON: NAN function termination reason. + * See &enum nl80211_nan_func_term_reason. + * + * @NUM_NL80211_NAN_FUNC_ATTR: internal + * @NL80211_NAN_FUNC_ATTR_MAX: highest NAN function attribute + */ +enum nl80211_nan_func_attributes { + __NL80211_NAN_FUNC_INVALID, + NL80211_NAN_FUNC_TYPE, + NL80211_NAN_FUNC_SERVICE_ID, + NL80211_NAN_FUNC_PUBLISH_TYPE, + NL80211_NAN_FUNC_PUBLISH_BCAST, + NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE, + NL80211_NAN_FUNC_FOLLOW_UP_ID, + NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID, + NL80211_NAN_FUNC_FOLLOW_UP_DEST, + NL80211_NAN_FUNC_CLOSE_RANGE, + NL80211_NAN_FUNC_TTL, + NL80211_NAN_FUNC_SERVICE_INFO, + NL80211_NAN_FUNC_SRF, + NL80211_NAN_FUNC_RX_MATCH_FILTER, + NL80211_NAN_FUNC_TX_MATCH_FILTER, + NL80211_NAN_FUNC_INSTANCE_ID, + NL80211_NAN_FUNC_TERM_REASON, + + /* keep last */ + NUM_NL80211_NAN_FUNC_ATTR, + NL80211_NAN_FUNC_ATTR_MAX = NUM_NL80211_NAN_FUNC_ATTR - 1 +}; + +/** + * enum nl80211_nan_srf_attributes - NAN Service Response filter attributes + * @__NL80211_NAN_SRF_INVALID: invalid + * @NL80211_NAN_SRF_INCLUDE: present if the include bit of the SRF set. + * This is a flag. + * @NL80211_NAN_SRF_BF: Bloom Filter. Present if and only if + * &NL80211_NAN_SRF_MAC_ADDRS isn't present. This attribute is binary. + * @NL80211_NAN_SRF_BF_IDX: index of the Bloom Filter. Mandatory if + * &NL80211_NAN_SRF_BF is present. This is a u8. + * @NL80211_NAN_SRF_MAC_ADDRS: list of MAC addresses for the SRF. Present if + * and only if &NL80211_NAN_SRF_BF isn't present. This is a nested + * attribute. Each nested attribute is a MAC address. + * @NUM_NL80211_NAN_SRF_ATTR: internal + * @NL80211_NAN_SRF_ATTR_MAX: highest NAN SRF attribute + */ +enum nl80211_nan_srf_attributes { + __NL80211_NAN_SRF_INVALID, + NL80211_NAN_SRF_INCLUDE, + NL80211_NAN_SRF_BF, + NL80211_NAN_SRF_BF_IDX, + NL80211_NAN_SRF_MAC_ADDRS, + + /* keep last */ + NUM_NL80211_NAN_SRF_ATTR, + NL80211_NAN_SRF_ATTR_MAX = NUM_NL80211_NAN_SRF_ATTR - 1, +}; + +/** + * enum nl80211_nan_match_attributes - NAN match attributes + * @__NL80211_NAN_MATCH_INVALID: invalid + * @NL80211_NAN_MATCH_FUNC_LOCAL: the local function that had the + * match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * @NL80211_NAN_MATCH_FUNC_PEER: the peer function + * that caused the match. This is a nested attribute. + * See &enum nl80211_nan_func_attributes. + * + * @NUM_NL80211_NAN_MATCH_ATTR: internal + * @NL80211_NAN_MATCH_ATTR_MAX: highest NAN match attribute + */ +enum nl80211_nan_match_attributes { + __NL80211_NAN_MATCH_INVALID, + NL80211_NAN_MATCH_FUNC_LOCAL, + NL80211_NAN_MATCH_FUNC_PEER, + + /* keep last */ + NUM_NL80211_NAN_MATCH_ATTR, + NL80211_NAN_MATCH_ATTR_MAX = NUM_NL80211_NAN_MATCH_ATTR - 1 +}; + #endif /* __LINUX_NL80211_H */ diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h index 54c3b4f4aceb..59ed3992c760 100644 --- a/include/uapi/linux/openvswitch.h +++ b/include/uapi/linux/openvswitch.h @@ -605,13 +605,13 @@ struct ovs_action_push_mpls { * @vlan_tci: Tag control identifier (TCI) to push. The CFI bit must be set * (but it will not be set in the 802.1Q header that is pushed). * - * The @vlan_tpid value is typically %ETH_P_8021Q. The only acceptable TPID - * values are those that the kernel module also parses as 802.1Q headers, to - * prevent %OVS_ACTION_ATTR_PUSH_VLAN followed by %OVS_ACTION_ATTR_POP_VLAN - * from having surprising results. + * The @vlan_tpid value is typically %ETH_P_8021Q or %ETH_P_8021AD. + * The only acceptable TPID values are those that the kernel module also parses + * as 802.1Q or 802.1AD headers, to prevent %OVS_ACTION_ATTR_PUSH_VLAN followed + * by %OVS_ACTION_ATTR_POP_VLAN from having surprising results. */ struct ovs_action_push_vlan { - __be16 vlan_tpid; /* 802.1Q TPID. */ + __be16 vlan_tpid; /* 802.1Q or 802.1ad TPID. */ __be16 vlan_tci; /* 802.1Q TCI (VLAN ID and priority). */ }; @@ -721,9 +721,10 @@ enum ovs_nat_attr { * is copied from the value to the packet header field, rest of the bits are * left unchanged. The non-masked value bits must be passed in as zeroes. * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute. - * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the - * packet. - * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet. + * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q or 802.1ad header + * onto the packet. + * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q or 802.1ad header + * from the packet. * @OVS_ACTION_ATTR_SAMPLE: Probabilitically executes actions, as specified in * the nested %OVS_SAMPLE_ATTR_* attributes. * @OVS_ACTION_ATTR_PUSH_MPLS: Push a new MPLS label stack entry onto the diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h index d1c1ccaba787..8fd715f806a2 100644 --- a/include/uapi/linux/pkt_cls.h +++ b/include/uapi/linux/pkt_cls.h @@ -396,6 +396,7 @@ enum { TCA_BPF_FD, TCA_BPF_NAME, TCA_BPF_FLAGS, + TCA_BPF_FLAGS_GEN, __TCA_BPF_MAX, }; @@ -428,6 +429,24 @@ enum { TCA_FLOWER_KEY_UDP_DST, /* be16 */ TCA_FLOWER_FLAGS, + TCA_FLOWER_KEY_VLAN_ID, /* be16 */ + TCA_FLOWER_KEY_VLAN_PRIO, /* u8 */ + TCA_FLOWER_KEY_VLAN_ETH_TYPE, /* be16 */ + + TCA_FLOWER_KEY_ENC_KEY_ID, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST, /* be32 */ + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK,/* be32 */ + TCA_FLOWER_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK,/* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK,/* struct in6_addr */ + + TCA_FLOWER_KEY_TCP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_TCP_DST_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_SRC_MASK, /* be16 */ + TCA_FLOWER_KEY_UDP_DST_MASK, /* be16 */ __TCA_FLOWER_MAX, }; diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h index 2382eed50278..df7451d35131 100644 --- a/include/uapi/linux/pkt_sched.h +++ b/include/uapi/linux/pkt_sched.h @@ -792,6 +792,8 @@ enum { TCA_FQ_ORPHAN_MASK, /* mask applied to orphaned skb hashes */ + TCA_FQ_LOW_RATE_THRESHOLD, /* per packet delay under this rate */ + __TCA_FQ_MAX }; @@ -809,7 +811,7 @@ struct tc_fq_qd_stats { __u32 flows; __u32 inactive_flows; __u32 throttled_flows; - __u32 pad; + __u32 unthrottle_latency_ns; }; /* Heavy-Hitter Filter */ diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 25a9ad8bcef1..e7a31f830690 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -235,6 +235,7 @@ enum LINUX_MIB_TCPSPURIOUSRTOS, /* TCPSpuriousRTOs */ LINUX_MIB_TCPMD5NOTFOUND, /* TCPMD5NotFound */ LINUX_MIB_TCPMD5UNEXPECTED, /* TCPMD5Unexpected */ + LINUX_MIB_TCPMD5FAILURE, /* TCPMD5Failure */ LINUX_MIB_SACKSHIFTED, LINUX_MIB_SACKMERGED, LINUX_MIB_SACKSHIFTFALLBACK, diff --git a/include/uapi/linux/tc_act/tc_ife.h b/include/uapi/linux/tc_act/tc_ife.h index 4ece02a77b9a..cd18360eca24 100644 --- a/include/uapi/linux/tc_act/tc_ife.h +++ b/include/uapi/linux/tc_act/tc_ife.h @@ -32,8 +32,9 @@ enum { #define IFE_META_HASHID 2 #define IFE_META_PRIO 3 #define IFE_META_QMAP 4 +#define IFE_META_TCINDEX 5 /*Can be overridden at runtime by module option*/ -#define __IFE_META_MAX 5 +#define __IFE_META_MAX 6 #define IFE_META_MAX (__IFE_META_MAX - 1) #endif diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h new file mode 100644 index 000000000000..10fc07da6c69 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_skbmod.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2016, Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#ifndef __LINUX_TC_SKBMOD_H +#define __LINUX_TC_SKBMOD_H + +#include + +#define TCA_ACT_SKBMOD 15 + +#define SKBMOD_F_DMAC 0x1 +#define SKBMOD_F_SMAC 0x2 +#define SKBMOD_F_ETYPE 0x4 +#define SKBMOD_F_SWAPMAC 0x8 + +struct tc_skbmod { + tc_gen; + __u64 flags; +}; + +enum { + TCA_SKBMOD_UNSPEC, + TCA_SKBMOD_TM, + TCA_SKBMOD_PARMS, + TCA_SKBMOD_DMAC, + TCA_SKBMOD_SMAC, + TCA_SKBMOD_ETYPE, + TCA_SKBMOD_PAD, + __TCA_SKBMOD_MAX +}; +#define TCA_SKBMOD_MAX (__TCA_SKBMOD_MAX - 1) + +#endif diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h new file mode 100644 index 000000000000..890106ff16e6 --- /dev/null +++ b/include/uapi/linux/tc_act/tc_tunnel_key.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_TC_TUNNEL_KEY_H +#define __LINUX_TC_TUNNEL_KEY_H + +#include + +#define TCA_ACT_TUNNEL_KEY 17 + +#define TCA_TUNNEL_KEY_ACT_SET 1 +#define TCA_TUNNEL_KEY_ACT_RELEASE 2 + +struct tc_tunnel_key { + tc_gen; + int t_action; +}; + +enum { + TCA_TUNNEL_KEY_UNSPEC, + TCA_TUNNEL_KEY_TM, + TCA_TUNNEL_KEY_PARMS, + TCA_TUNNEL_KEY_ENC_IPV4_SRC, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV4_DST, /* be32 */ + TCA_TUNNEL_KEY_ENC_IPV6_SRC, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_IPV6_DST, /* struct in6_addr */ + TCA_TUNNEL_KEY_ENC_KEY_ID, /* be64 */ + TCA_TUNNEL_KEY_PAD, + __TCA_TUNNEL_KEY_MAX, +}; + +#define TCA_TUNNEL_KEY_MAX (__TCA_TUNNEL_KEY_MAX - 1) + +#endif diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h index 31151ff6264f..bddb272b843f 100644 --- a/include/uapi/linux/tc_act/tc_vlan.h +++ b/include/uapi/linux/tc_act/tc_vlan.h @@ -16,6 +16,7 @@ #define TCA_VLAN_ACT_POP 1 #define TCA_VLAN_ACT_PUSH 2 +#define TCA_VLAN_ACT_MODIFY 3 struct tc_vlan { tc_gen; @@ -29,6 +30,7 @@ enum { TCA_VLAN_PUSH_VLAN_ID, TCA_VLAN_PUSH_VLAN_PROTOCOL, TCA_VLAN_PAD, + TCA_VLAN_PUSH_VLAN_PRIORITY, __TCA_VLAN_MAX, }; #define TCA_VLAN_MAX (__TCA_VLAN_MAX - 1) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 482898fc433a..73ac0db487f8 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -167,6 +167,7 @@ struct tcp_info { __u8 tcpi_backoff; __u8 tcpi_options; __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4; + __u8 tcpi_delivery_rate_app_limited:1; __u32 tcpi_rto; __u32 tcpi_ato; @@ -211,6 +212,8 @@ struct tcp_info { __u32 tcpi_min_rtt; __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */ + + __u64 tcpi_delivery_rate; }; /* for TCP_MD5SIG socket option */ diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index 5f3f6d09fb79..f9edd20fe9ba 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -59,6 +59,9 @@ enum { TIPC_NL_MON_SET, TIPC_NL_MON_GET, TIPC_NL_MON_PEER_GET, + TIPC_NL_PEER_REMOVE, + TIPC_NL_BEARER_ADD, + TIPC_NL_UDP_GET_REMOTEIP, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -98,6 +101,7 @@ enum { TIPC_NLA_UDP_UNSPEC, TIPC_NLA_UDP_LOCAL, /* sockaddr_storage */ TIPC_NLA_UDP_REMOTE, /* sockaddr_storage */ + TIPC_NLA_UDP_MULTI_REMOTEIP, /* flag */ __TIPC_NLA_UDP_MAX, TIPC_NLA_UDP_MAX = __TIPC_NLA_UDP_MAX - 1 diff --git a/include/uapi/linux/xfrm.h b/include/uapi/linux/xfrm.h index 143338978b48..1fc62b239f1b 100644 --- a/include/uapi/linux/xfrm.h +++ b/include/uapi/linux/xfrm.h @@ -298,7 +298,7 @@ enum xfrm_attr_type_t { XFRMA_ALG_AUTH_TRUNC, /* struct xfrm_algo_auth */ XFRMA_MARK, /* struct xfrm_mark */ XFRMA_TFCPAD, /* __u32 */ - XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_esn */ + XFRMA_REPLAY_ESN_VAL, /* struct xfrm_replay_state_esn */ XFRMA_SA_EXTRA_FLAGS, /* __u32 */ XFRMA_PROTO, /* __u8 */ XFRMA_ADDRESS_FILTER, /* struct xfrm_address_filter */ diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 633a650d7aeb..a2ac051c342f 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -538,7 +538,7 @@ static int __init register_perf_event_array_map(void) } late_initcall(register_perf_event_array_map); -#ifdef CONFIG_SOCK_CGROUP_DATA +#ifdef CONFIG_CGROUPS static void *cgroup_fd_array_get_ptr(struct bpf_map *map, struct file *map_file /* not used */, int fd) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 03fd23d4d587..aa6d98154106 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1018,7 +1018,7 @@ void bpf_user_rnd_init_once(void) prandom_init_once(&bpf_user_rnd_state); } -u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_user_rnd_u32) { /* Should someone ever have the rather unwise idea to use some * of the registers passed into this function, then note that @@ -1031,7 +1031,7 @@ u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) state = &get_cpu_var(bpf_user_rnd_state); res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(bpf_user_rnd_state); return res; } diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 1ea3afba1a4f..39918402e6e9 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -16,6 +16,7 @@ #include #include #include +#include /* If kernel subsystem is allowing eBPF programs to call this function, * inside its own verifier_ops->get_func_proto() callback it should return @@ -26,48 +27,32 @@ * if program is allowed to access maps, so check rcu_read_lock_held in * all three functions. */ -static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - /* verifier checked that R1 contains a valid pointer to bpf_map - * and R2 points to a program stack and map->key_size bytes were - * initialized - */ - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value; - WARN_ON_ONCE(!rcu_read_lock_held()); - - value = map->ops->map_lookup_elem(map, key); - - /* lookup() returns either pointer to element value or NULL - * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type - */ - return (unsigned long) value; + return (unsigned long) map->ops->map_lookup_elem(map, key); } const struct bpf_func_proto bpf_map_lookup_elem_proto = { .func = bpf_map_lookup_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, }; -static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, + void *, value, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - void *value = (void *) (unsigned long) r3; - WARN_ON_ONCE(!rcu_read_lock_held()); - - return map->ops->map_update_elem(map, key, value, r4); + return map->ops->map_update_elem(map, key, value, flags); } const struct bpf_func_proto bpf_map_update_elem_proto = { .func = bpf_map_update_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -75,19 +60,16 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; - void *key = (void *) (unsigned long) r2; - WARN_ON_ONCE(!rcu_read_lock_held()); - return map->ops->map_delete_elem(map, key); } const struct bpf_func_proto bpf_map_delete_elem_proto = { .func = bpf_map_delete_elem, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_CONST_MAP_PTR, .arg2_type = ARG_PTR_TO_MAP_KEY, @@ -99,7 +81,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_smp_processor_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_smp_processor_id) { return smp_processor_id(); } @@ -110,7 +92,7 @@ const struct bpf_func_proto bpf_get_smp_processor_id_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_ktime_get_ns(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_ktime_get_ns) { /* NMI safe access to clock monotonic */ return ktime_get_mono_fast_ns(); @@ -122,11 +104,11 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_pid_tgid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; - if (!task) + if (unlikely(!task)) return -EINVAL; return (u64) task->tgid << 32 | task->pid; @@ -138,18 +120,18 @@ const struct bpf_func_proto bpf_get_current_pid_tgid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_uid_gid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_uid_gid) { struct task_struct *task = current; kuid_t uid; kgid_t gid; - if (!task) + if (unlikely(!task)) return -EINVAL; current_uid_gid(&uid, &gid); return (u64) from_kgid(&init_user_ns, gid) << 32 | - from_kuid(&init_user_ns, uid); + from_kuid(&init_user_ns, uid); } const struct bpf_func_proto bpf_get_current_uid_gid_proto = { @@ -158,10 +140,9 @@ const struct bpf_func_proto bpf_get_current_uid_gid_proto = { .ret_type = RET_INTEGER, }; -static u64 bpf_get_current_comm(u64 r1, u64 size, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_get_current_comm, char *, buf, u32, size) { struct task_struct *task = current; - char *buf = (char *) (long) r1; if (unlikely(!task)) goto err_clear; diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c index bf4495fcd25d..732ae16d12b7 100644 --- a/kernel/bpf/stackmap.c +++ b/kernel/bpf/stackmap.c @@ -116,10 +116,9 @@ free_smap: return ERR_PTR(err); } -u64 bpf_get_stackid(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags) { - struct pt_regs *regs = (struct pt_regs *) (long) r1; - struct bpf_map *map = (struct bpf_map *) (long) r2; struct bpf_stack_map *smap = container_of(map, struct bpf_stack_map, map); struct perf_callchain_entry *trace; struct stack_map_bucket *bucket, *new_bucket, *old_bucket; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index daea765d72e6..99a7e5b388f2 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include @@ -126,76 +127,16 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -struct reg_state { - enum bpf_reg_type type; - union { - /* valid when type == CONST_IMM | PTR_TO_STACK | UNKNOWN_VALUE */ - s64 imm; - - /* valid when type == PTR_TO_PACKET* */ - struct { - u32 id; - u16 off; - u16 range; - }; - - /* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE | - * PTR_TO_MAP_VALUE_OR_NULL - */ - struct bpf_map *map_ptr; - }; -}; - -enum bpf_stack_slot_type { - STACK_INVALID, /* nothing was stored in this stack slot */ - STACK_SPILL, /* register spilled into stack */ - STACK_MISC /* BPF program wrote some data into this slot */ -}; - -#define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ - -/* state of the program: - * type of all registers and stack info - */ -struct verifier_state { - struct reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; -}; - -/* linked list of verifier states used to prune search */ -struct verifier_state_list { - struct verifier_state state; - struct verifier_state_list *next; -}; - /* verifier_state + insn_idx are pushed to stack when branch is encountered */ -struct verifier_stack_elem { +struct bpf_verifier_stack_elem { /* verifer state is 'st' * before processing instruction 'insn_idx' * and after processing instruction 'prev_insn_idx' */ - struct verifier_state st; + struct bpf_verifier_state st; int insn_idx; int prev_insn_idx; - struct verifier_stack_elem *next; -}; - -#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ - -/* single container for all structs - * one verifier_env per bpf_check() call - */ -struct verifier_env { - struct bpf_prog *prog; /* eBPF program being verified */ - struct verifier_stack_elem *head; /* stack of verifier states to be processed */ - int stack_size; /* number of states to be processed */ - struct verifier_state cur_state; /* current verifier state */ - struct verifier_state_list **explored_states; /* search pruning optimization */ - struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ - u32 used_map_cnt; /* number of used maps */ - u32 id_gen; /* used to generate unique reg IDs */ - bool allow_ptr_leaks; + struct bpf_verifier_stack_elem *next; }; #define BPF_COMPLEXITY_LIMIT_INSNS 65536 @@ -204,6 +145,7 @@ struct verifier_env { struct bpf_call_arg_meta { struct bpf_map *map_ptr; bool raw_mode; + bool pkt_access; int regno; int access_size; }; @@ -240,6 +182,7 @@ static const char * const reg_type_str[] = { [CONST_PTR_TO_MAP] = "map_ptr", [PTR_TO_MAP_VALUE] = "map_value", [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", + [PTR_TO_MAP_VALUE_ADJ] = "map_value_adj", [FRAME_PTR] = "fp", [PTR_TO_STACK] = "fp", [CONST_IMM] = "imm", @@ -247,9 +190,9 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; -static void print_verifier_state(struct verifier_state *state) +static void print_verifier_state(struct bpf_verifier_state *state) { - struct reg_state *reg; + struct bpf_reg_state *reg; enum bpf_reg_type t; int i; @@ -267,10 +210,17 @@ static void print_verifier_state(struct verifier_state *state) else if (t == UNKNOWN_VALUE && reg->imm) verbose("%lld", reg->imm); else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE || - t == PTR_TO_MAP_VALUE_OR_NULL) + t == PTR_TO_MAP_VALUE_OR_NULL || + t == PTR_TO_MAP_VALUE_ADJ) verbose("(ks=%d,vs=%d)", reg->map_ptr->key_size, reg->map_ptr->value_size); + if (reg->min_value != BPF_REGISTER_MIN_RANGE) + verbose(",min_value=%llu", + (unsigned long long)reg->min_value); + if (reg->max_value != BPF_REGISTER_MAX_RANGE) + verbose(",max_value=%llu", + (unsigned long long)reg->max_value); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -425,9 +375,9 @@ static void print_bpf_insn(struct bpf_insn *insn) } } -static int pop_stack(struct verifier_env *env, int *prev_insn_idx) +static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; int insn_idx; if (env->head == NULL) @@ -444,12 +394,12 @@ static int pop_stack(struct verifier_env *env, int *prev_insn_idx) return insn_idx; } -static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx, - int prev_insn_idx) +static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_stack_elem *elem; + struct bpf_verifier_stack_elem *elem; - elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL); + elem = kmalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL); if (!elem) goto err; @@ -475,13 +425,15 @@ static const int caller_saved[CALLER_SAVED_REGS] = { BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5 }; -static void init_reg_state(struct reg_state *regs) +static void init_reg_state(struct bpf_reg_state *regs) { int i; for (i = 0; i < MAX_BPF_REG; i++) { regs[i].type = NOT_INIT; regs[i].imm = 0; + regs[i].min_value = BPF_REGISTER_MIN_RANGE; + regs[i].max_value = BPF_REGISTER_MAX_RANGE; } /* frame pointer */ @@ -491,20 +443,26 @@ static void init_reg_state(struct reg_state *regs) regs[BPF_REG_1].type = PTR_TO_CTX; } -static void mark_reg_unknown_value(struct reg_state *regs, u32 regno) +static void mark_reg_unknown_value(struct bpf_reg_state *regs, u32 regno) { BUG_ON(regno >= MAX_BPF_REG); regs[regno].type = UNKNOWN_VALUE; regs[regno].imm = 0; } +static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) +{ + regs[regno].min_value = BPF_REGISTER_MIN_RANGE; + regs[regno].max_value = BPF_REGISTER_MAX_RANGE; +} + enum reg_arg_type { SRC_OP, /* register is used as source operand */ DST_OP, /* register is used as destination operand */ DST_OP_NO_MARK /* same as above, check only, don't mark */ }; -static int check_reg_arg(struct reg_state *regs, u32 regno, +static int check_reg_arg(struct bpf_reg_state *regs, u32 regno, enum reg_arg_type t) { if (regno >= MAX_BPF_REG) { @@ -564,8 +522,8 @@ static bool is_spillable_regtype(enum bpf_reg_type type) /* check_stack_read/write functions track spill/fill of registers, * stack boundary and alignment are checked in check_mem_access() */ -static int check_stack_write(struct verifier_state *state, int off, int size, - int value_regno) +static int check_stack_write(struct bpf_verifier_state *state, int off, + int size, int value_regno) { int i; /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0, @@ -590,7 +548,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, } else { /* regular write of data into stack */ state->spilled_regs[(MAX_BPF_STACK + off) / BPF_REG_SIZE] = - (struct reg_state) {}; + (struct bpf_reg_state) {}; for (i = 0; i < size; i++) state->stack_slot_type[MAX_BPF_STACK + off + i] = STACK_MISC; @@ -598,7 +556,7 @@ static int check_stack_write(struct verifier_state *state, int off, int size, return 0; } -static int check_stack_read(struct verifier_state *state, int off, int size, +static int check_stack_read(struct bpf_verifier_state *state, int off, int size, int value_regno) { u8 *slot_type; @@ -639,7 +597,7 @@ static int check_stack_read(struct verifier_state *state, int off, int size, } /* check read/write into map element returned by bpf_map_lookup_elem() */ -static int check_map_access(struct verifier_env *env, u32 regno, int off, +static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { struct bpf_map *map = env->cur_state.regs[regno].map_ptr; @@ -654,24 +612,31 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff -static bool may_write_pkt_data(enum bpf_prog_type type) +static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, + const struct bpf_call_arg_meta *meta) { - switch (type) { + switch (env->prog->type) { + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: case BPF_PROG_TYPE_XDP: + if (meta) + return meta->pkt_access; + + env->seen_direct_write = true; return true; default: return false; } } -static int check_packet_access(struct verifier_env *env, u32 regno, int off, +static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *reg = ®s[regno]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *reg = ®s[regno]; off += reg->off; - if (off < 0 || off + size > reg->range) { + if (off < 0 || size <= 0 || off + size > reg->range) { verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno, reg->id, reg->off, reg->range); return -EACCES; @@ -680,9 +645,13 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, } /* check access to 'struct bpf_context' fields */ -static int check_ctx_access(struct verifier_env *env, int off, int size, +static int check_ctx_access(struct bpf_verifier_env *env, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type) { + /* for analyzer ctx accesses are already validated and converted */ + if (env->analyzer_ops) + return 0; + if (env->prog->aux->ops->is_valid_access && env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ @@ -695,7 +664,7 @@ static int check_ctx_access(struct verifier_env *env, int off, int size, return -EACCES; } -static bool is_pointer_value(struct verifier_env *env, int regno) +static bool is_pointer_value(struct bpf_verifier_env *env, int regno) { if (env->allow_ptr_leaks) return false; @@ -709,28 +678,19 @@ static bool is_pointer_value(struct verifier_env *env, int regno) } } -static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, - int off, int size) +static int check_ptr_alignment(struct bpf_verifier_env *env, + struct bpf_reg_state *reg, int off, int size) { - if (reg->type != PTR_TO_PACKET) { + if (reg->type != PTR_TO_PACKET && reg->type != PTR_TO_MAP_VALUE_ADJ) { if (off % size != 0) { - verbose("misaligned access off %d size %d\n", off, size); + verbose("misaligned access off %d size %d\n", + off, size); return -EACCES; } else { return 0; } } - switch (env->prog->type) { - case BPF_PROG_TYPE_SCHED_CLS: - case BPF_PROG_TYPE_SCHED_ACT: - case BPF_PROG_TYPE_XDP: - break; - default: - verbose("verifier is misconfigured\n"); - return -EACCES; - } - if (IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) /* misaligned access to packet is ok on x86,arm,arm64 */ return 0; @@ -741,7 +701,8 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, } /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + if (reg->type == PTR_TO_PACKET && + (NET_IP_ALIGN + reg->off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", NET_IP_ALIGN, reg->off, off, size); return -EACCES; @@ -755,12 +716,12 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, * if t==write && value_regno==-1, some unknown value is stored into memory * if t==read && value_regno==-1, don't care what we read from memory */ -static int check_mem_access(struct verifier_env *env, u32 regno, int off, +static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno) { - struct verifier_state *state = &env->cur_state; - struct reg_state *reg = &state->regs[regno]; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *reg = &state->regs[regno]; int size, err = 0; if (reg->type == PTR_TO_STACK) @@ -774,12 +735,52 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, if (err) return err; - if (reg->type == PTR_TO_MAP_VALUE) { + if (reg->type == PTR_TO_MAP_VALUE || + reg->type == PTR_TO_MAP_VALUE_ADJ) { if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into map\n", value_regno); return -EACCES; } + + /* If we adjusted the register to this map value at all then we + * need to change off and size to min_value and max_value + * respectively to make sure our theoretical access will be + * safe. + */ + if (reg->type == PTR_TO_MAP_VALUE_ADJ) { + if (log_level) + print_verifier_state(state); + env->varlen_map_value_access = true; + /* The minimum value is only important with signed + * comparisons where we can't assume the floor of a + * value is 0. If we are using signed variables for our + * index'es we need to make sure that whatever we use + * will have a set floor within our range. + */ + if ((s64)reg->min_value < 0) { + verbose("R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", + regno); + return -EACCES; + } + err = check_map_access(env, regno, reg->min_value + off, + size); + if (err) { + verbose("R%d min value is outside of the array range\n", + regno); + return err; + } + + /* If we haven't set a max value then we need to bail + * since we can't be sure we won't do bad things. + */ + if (reg->max_value == BPF_REGISTER_MAX_RANGE) { + verbose("R%d unbounded memory access, make sure to bounds check any array access into a map\n", + regno); + return -EACCES; + } + off += reg->max_value; + } err = check_map_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); @@ -795,9 +796,8 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (env->allow_ptr_leaks) - /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = reg_type; + /* note that reg.[id|off|range] == 0 */ + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { @@ -817,7 +817,7 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { + if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL)) { verbose("cannot write into packet\n"); return -EACCES; } @@ -846,9 +846,9 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, return err; } -static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) +static int check_xadd(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || @@ -882,12 +882,12 @@ static int check_xadd(struct verifier_env *env, struct bpf_insn *insn) * bytes from that pointer, make sure that it's within stack boundary * and all elements of stack are initialized */ -static int check_stack_boundary(struct verifier_env *env, int regno, +static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs; int off, i; if (regs[regno].type != PTR_TO_STACK) { @@ -926,18 +926,18 @@ static int check_stack_boundary(struct verifier_env *env, int regno, return 0; } -static int check_func_arg(struct verifier_env *env, u32 regno, +static int check_func_arg(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, struct bpf_call_arg_meta *meta) { - struct reg_state *reg = env->cur_state.regs + regno; - enum bpf_reg_type expected_type; + struct bpf_reg_state *regs = env->cur_state.regs, *reg = ®s[regno]; + enum bpf_reg_type expected_type, type = reg->type; int err = 0; if (arg_type == ARG_DONTCARE) return 0; - if (reg->type == NOT_INIT) { + if (type == NOT_INIT) { verbose("R%d !read_ok\n", regno); return -EACCES; } @@ -950,16 +950,29 @@ static int check_func_arg(struct verifier_env *env, u32 regno, return 0; } + if (type == PTR_TO_PACKET && !may_access_direct_pkt_data(env, meta)) { + verbose("helper access to the packet is not allowed\n"); + return -EACCES; + } + if (arg_type == ARG_PTR_TO_MAP_KEY || arg_type == ARG_PTR_TO_MAP_VALUE) { expected_type = PTR_TO_STACK; + if (type != PTR_TO_PACKET && type != expected_type) + goto err_type; } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { expected_type = CONST_IMM; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_CONST_MAP_PTR) { expected_type = CONST_PTR_TO_MAP; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_CTX) { expected_type = PTR_TO_CTX; + if (type != expected_type) + goto err_type; } else if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_RAW_STACK) { expected_type = PTR_TO_STACK; @@ -967,20 +980,16 @@ static int check_func_arg(struct verifier_env *env, u32 regno, * passed in as argument, it's a CONST_IMM type. Final test * happens during stack boundary checking. */ - if (reg->type == CONST_IMM && reg->imm == 0) - expected_type = CONST_IMM; + if (type == CONST_IMM && reg->imm == 0) + /* final test in check_stack_boundary() */; + else if (type != PTR_TO_PACKET && type != expected_type) + goto err_type; meta->raw_mode = arg_type == ARG_PTR_TO_RAW_STACK; } else { verbose("unsupported arg_type %d\n", arg_type); return -EFAULT; } - if (reg->type != expected_type) { - verbose("R%d type=%s expected=%s\n", regno, - reg_type_str[reg->type], reg_type_str[expected_type]); - return -EACCES; - } - if (arg_type == ARG_CONST_MAP_PTR) { /* bpf_map_xxx(map_ptr) call: remember that map_ptr */ meta->map_ptr = reg->map_ptr; @@ -998,8 +1007,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->key\n"); return -EACCES; } - err = check_stack_boundary(env, regno, meta->map_ptr->key_size, - false, NULL); + if (type == PTR_TO_PACKET) + err = check_packet_access(env, regno, 0, + meta->map_ptr->key_size); + else + err = check_stack_boundary(env, regno, + meta->map_ptr->key_size, + false, NULL); } else if (arg_type == ARG_PTR_TO_MAP_VALUE) { /* bpf_map_xxx(..., map_ptr, ..., value) call: * check [value, value + map->value_size) validity @@ -1009,9 +1023,13 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("invalid map_ptr to access map->value\n"); return -EACCES; } - err = check_stack_boundary(env, regno, - meta->map_ptr->value_size, - false, NULL); + if (type == PTR_TO_PACKET) + err = check_packet_access(env, regno, 0, + meta->map_ptr->value_size); + else + err = check_stack_boundary(env, regno, + meta->map_ptr->value_size, + false, NULL); } else if (arg_type == ARG_CONST_STACK_SIZE || arg_type == ARG_CONST_STACK_SIZE_OR_ZERO) { bool zero_size_allowed = (arg_type == ARG_CONST_STACK_SIZE_OR_ZERO); @@ -1025,11 +1043,18 @@ static int check_func_arg(struct verifier_env *env, u32 regno, verbose("ARG_CONST_STACK_SIZE cannot be first argument\n"); return -EACCES; } - err = check_stack_boundary(env, regno - 1, reg->imm, - zero_size_allowed, meta); + if (regs[regno - 1].type == PTR_TO_PACKET) + err = check_packet_access(env, regno - 1, 0, reg->imm); + else + err = check_stack_boundary(env, regno - 1, reg->imm, + zero_size_allowed, meta); } return err; +err_type: + verbose("R%d type=%s expected=%s\n", regno, + reg_type_str[type], reg_type_str[expected_type]); + return -EACCES; } static int check_map_func_compatibility(struct bpf_map *map, int func_id) @@ -1053,7 +1078,8 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) goto error; break; case BPF_MAP_TYPE_CGROUP_ARRAY: - if (func_id != BPF_FUNC_skb_under_cgroup) + if (func_id != BPF_FUNC_skb_under_cgroup && + func_id != BPF_FUNC_current_task_under_cgroup) goto error; break; default: @@ -1075,6 +1101,7 @@ static int check_map_func_compatibility(struct bpf_map *map, int func_id) if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) goto error; break; + case BPF_FUNC_current_task_under_cgroup: case BPF_FUNC_skb_under_cgroup: if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) goto error; @@ -1108,10 +1135,10 @@ static int check_raw_mode(const struct bpf_func_proto *fn) return count > 1 ? -EINVAL : 0; } -static void clear_all_pkt_pointers(struct verifier_env *env) +static void clear_all_pkt_pointers(struct bpf_verifier_env *env) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_verifier_state *state = &env->cur_state; + struct bpf_reg_state *regs = state->regs, *reg; int i; for (i = 0; i < MAX_BPF_REG; i++) @@ -1131,12 +1158,12 @@ static void clear_all_pkt_pointers(struct verifier_env *env) } } -static int check_call(struct verifier_env *env, int func_id) +static int check_call(struct bpf_verifier_env *env, int func_id) { - struct verifier_state *state = &env->cur_state; + struct bpf_verifier_state *state = &env->cur_state; const struct bpf_func_proto *fn = NULL; - struct reg_state *regs = state->regs; - struct reg_state *reg; + struct bpf_reg_state *regs = state->regs; + struct bpf_reg_state *reg; struct bpf_call_arg_meta meta; bool changes_data; int i, err; @@ -1164,6 +1191,7 @@ static int check_call(struct verifier_env *env, int func_id) changes_data = bpf_helper_changes_skb_data(fn->func); memset(&meta, 0, sizeof(meta)); + meta.pkt_access = fn->pkt_access; /* We only support one arg being in raw mode at the moment, which * is sufficient for the helper functions we have right now. @@ -1214,6 +1242,7 @@ static int check_call(struct verifier_env *env, int func_id) regs[BPF_REG_0].type = NOT_INIT; } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + regs[BPF_REG_0].max_value = regs[BPF_REG_0].min_value = 0; /* remember map_ptr, so that check_map_access() * can check 'value_size' boundary of memory access * to map element returned from bpf_map_lookup_elem() @@ -1238,12 +1267,13 @@ static int check_call(struct verifier_env *env, int func_id) return 0; } -static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) +static int check_packet_ptr_add(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; - struct reg_state tmp_reg; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1311,10 +1341,10 @@ add_imm: return 0; } -static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_alu(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; u8 opcode = BPF_OP(insn->code); s64 imm_log2; @@ -1324,7 +1354,7 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) */ if (BPF_SRC(insn->code) == BPF_X) { - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; if (src_reg->type == UNKNOWN_VALUE && src_reg->imm > 0 && dst_reg->imm && opcode == BPF_ADD) { @@ -1413,11 +1443,12 @@ static int evaluate_reg_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) +static int evaluate_reg_imm_alu(struct bpf_verifier_env *env, + struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; - struct reg_state *dst_reg = ®s[insn->dst_reg]; - struct reg_state *src_reg = ®s[insn->src_reg]; + struct bpf_reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *dst_reg = ®s[insn->dst_reg]; + struct bpf_reg_state *src_reg = ®s[insn->src_reg]; u8 opcode = BPF_OP(insn->code); /* dst_reg->type == CONST_IMM here, simulate execution of 'add' insn. @@ -1433,10 +1464,110 @@ static int evaluate_reg_imm_alu(struct verifier_env *env, struct bpf_insn *insn) return 0; } -/* check validity of 32-bit and 64-bit arithmetic operations */ -static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) +static void check_reg_overflow(struct bpf_reg_state *reg) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; + if (reg->max_value > BPF_REGISTER_MAX_RANGE) + reg->max_value = BPF_REGISTER_MAX_RANGE; + if ((s64)reg->min_value < BPF_REGISTER_MIN_RANGE) + reg->min_value = BPF_REGISTER_MIN_RANGE; +} + +static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, + struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; + u64 min_val = BPF_REGISTER_MIN_RANGE, max_val = BPF_REGISTER_MAX_RANGE; + bool min_set = false, max_set = false; + u8 opcode = BPF_OP(insn->code); + + dst_reg = ®s[insn->dst_reg]; + if (BPF_SRC(insn->code) == BPF_X) { + check_reg_overflow(®s[insn->src_reg]); + min_val = regs[insn->src_reg].min_value; + max_val = regs[insn->src_reg].max_value; + + /* If the source register is a random pointer then the + * min_value/max_value values represent the range of the known + * accesses into that value, not the actual min/max value of the + * register itself. In this case we have to reset the reg range + * values so we know it is not safe to look at. + */ + if (regs[insn->src_reg].type != CONST_IMM && + regs[insn->src_reg].type != UNKNOWN_VALUE) { + min_val = BPF_REGISTER_MIN_RANGE; + max_val = BPF_REGISTER_MAX_RANGE; + } + } else if (insn->imm < BPF_REGISTER_MAX_RANGE && + (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { + min_val = max_val = insn->imm; + min_set = max_set = true; + } + + /* We don't know anything about what was done to this register, mark it + * as unknown. + */ + if (min_val == BPF_REGISTER_MIN_RANGE && + max_val == BPF_REGISTER_MAX_RANGE) { + reset_reg_range_values(regs, insn->dst_reg); + return; + } + + switch (opcode) { + case BPF_ADD: + dst_reg->min_value += min_val; + dst_reg->max_value += max_val; + break; + case BPF_SUB: + dst_reg->min_value -= min_val; + dst_reg->max_value -= max_val; + break; + case BPF_MUL: + dst_reg->min_value *= min_val; + dst_reg->max_value *= max_val; + break; + case BPF_AND: + /* & is special since it could end up with 0 bits set. */ + dst_reg->min_value &= min_val; + dst_reg->max_value = max_val; + break; + case BPF_LSH: + /* Gotta have special overflow logic here, if we're shifting + * more than MAX_RANGE then just assume we have an invalid + * range. + */ + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->min_value = BPF_REGISTER_MIN_RANGE; + else + dst_reg->min_value <<= min_val; + + if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) + dst_reg->max_value = BPF_REGISTER_MAX_RANGE; + else + dst_reg->max_value <<= max_val; + break; + case BPF_RSH: + dst_reg->min_value >>= min_val; + dst_reg->max_value >>= max_val; + break; + case BPF_MOD: + /* % is special since it is an unsigned modulus, so the floor + * will always be 0. + */ + dst_reg->min_value = 0; + dst_reg->max_value = max_val - 1; + break; + default: + reset_reg_range_values(regs, insn->dst_reg); + break; + } + + check_reg_overflow(dst_reg); +} + +/* check validity of 32-bit and 64-bit arithmetic operations */ +static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) +{ + struct bpf_reg_state *regs = env->cur_state.regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1496,6 +1627,11 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) if (err) return err; + /* we are setting our register to something new, we need to + * reset its range values. + */ + reset_reg_range_values(regs, insn->dst_reg); + if (BPF_SRC(insn->code) == BPF_X) { if (BPF_CLASS(insn->code) == BPF_ALU64) { /* case: R1 = R2 @@ -1517,6 +1653,8 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) */ regs[insn->dst_reg].type = CONST_IMM; regs[insn->dst_reg].imm = insn->imm; + regs[insn->dst_reg].max_value = insn->imm; + regs[insn->dst_reg].min_value = insn->imm; } } else if (opcode > BPF_END) { @@ -1569,6 +1707,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) dst_reg = ®s[insn->dst_reg]; + /* first we want to adjust our ranges. */ + adjust_reg_min_max_vals(env, insn); + /* pattern match 'bpf_add Rx, imm' instruction */ if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && dst_reg->type == FRAME_PTR && BPF_SRC(insn->code) == BPF_K) { @@ -1603,28 +1744,58 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return -EACCES; } - /* mark dest operand */ - mark_reg_unknown_value(regs, insn->dst_reg); + /* If we did pointer math on a map value then just set it to our + * PTR_TO_MAP_VALUE_ADJ type so we can deal with any stores or + * loads to this register appropriately, otherwise just mark the + * register as unknown. + */ + if (env->allow_ptr_leaks && + (dst_reg->type == PTR_TO_MAP_VALUE || + dst_reg->type == PTR_TO_MAP_VALUE_ADJ)) + dst_reg->type = PTR_TO_MAP_VALUE_ADJ; + else + mark_reg_unknown_value(regs, insn->dst_reg); } return 0; } -static void find_good_pkt_pointers(struct verifier_env *env, - struct reg_state *dst_reg) +static void find_good_pkt_pointers(struct bpf_verifier_state *state, + struct bpf_reg_state *dst_reg) { - struct verifier_state *state = &env->cur_state; - struct reg_state *regs = state->regs, *reg; + struct bpf_reg_state *regs = state->regs, *reg; int i; - /* r2 = r3; - * r2 += 8 - * if (r2 > pkt_end) goto somewhere - * r2 == dst_reg, pkt_end == src_reg, - * r2=pkt(id=n,off=8,r=0) - * r3=pkt(id=n,off=0,r=0) - * find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) - * so that range of bytes [r3, r3 + 8) is safe to access + + /* LLVM can generate two kind of checks: + * + * Type 1: + * + * r2 = r3; + * r2 += 8; + * if (r2 > pkt_end) goto + * + * + * Where: + * r2 == dst_reg, pkt_end == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Type 2: + * + * r2 = r3; + * r2 += 8; + * if (pkt_end >= r2) goto + * + * + * Where: + * pkt_end == dst_reg, r2 == src_reg + * r2=pkt(id=n,off=8,r=0) + * r3=pkt(id=n,off=0,r=0) + * + * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8) + * so that range of bytes [r3, r3 + 8) is safe to access. */ + for (i = 0; i < MAX_BPF_REG; i++) if (regs[i].type == PTR_TO_PACKET && regs[i].id == dst_reg->id) regs[i].range = dst_reg->off; @@ -1638,11 +1809,109 @@ static void find_good_pkt_pointers(struct verifier_env *env, } } -static int check_cond_jmp_op(struct verifier_env *env, +/* Adjusts the register min/max values in the case that the dst_reg is the + * variable register that we are working on, and src_reg is a constant or we're + * simply doing a BPF_K check. + */ +static void reg_set_min_max(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGT: + /* If this is false then we know the maximum val is val, + * otherwise we know the min val is val+1. + */ + false_reg->max_value = val; + true_reg->min_value = val + 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + false_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then we know the maximum value is val - 1, + * otherwise we know the mimimum value is val. + */ + false_reg->max_value = val - 1; + true_reg->min_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + +/* Same as above, but for the case that dst_reg is a CONST_IMM reg and src_reg + * is the variable reg. + */ +static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, + struct bpf_reg_state *false_reg, u64 val, + u8 opcode) +{ + switch (opcode) { + case BPF_JEQ: + /* If this is false then we know nothing Jon Snow, but if it is + * true then we know for sure. + */ + true_reg->max_value = true_reg->min_value = val; + break; + case BPF_JNE: + /* If this is true we know nothing Jon Snow, but if it is false + * we know the value for sure; + */ + false_reg->max_value = false_reg->min_value = val; + break; + case BPF_JGT: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGT: + /* + * If this is false, then the val is <= the register, if it is + * true the register <= to the val. + */ + false_reg->min_value = val; + true_reg->max_value = val - 1; + break; + case BPF_JGE: + /* Unsigned comparison, the minimum value is 0. */ + true_reg->min_value = 0; + case BPF_JSGE: + /* If this is false then constant < register, if it is true then + * the register < constant. + */ + false_reg->min_value = val + 1; + true_reg->max_value = val; + break; + default: + break; + } + + check_reg_overflow(false_reg); + check_reg_overflow(true_reg); +} + +static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx) { - struct reg_state *regs = env->cur_state.regs, *dst_reg; - struct verifier_state *other_branch; + struct bpf_verifier_state *other_branch, *this_branch = &env->cur_state; + struct bpf_reg_state *regs = this_branch->regs, *dst_reg; u8 opcode = BPF_OP(insn->code); int err; @@ -1704,7 +1973,24 @@ static int check_cond_jmp_op(struct verifier_env *env, if (!other_branch) return -EFAULT; - /* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */ + /* detect if we are comparing against a constant value so we can adjust + * our min/max values for our dst register. + */ + if (BPF_SRC(insn->code) == BPF_X) { + if (regs[insn->src_reg].type == CONST_IMM) + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, regs[insn->src_reg].imm, + opcode); + else if (dst_reg->type == CONST_IMM) + reg_set_min_max_inv(&other_branch->regs[insn->src_reg], + ®s[insn->src_reg], dst_reg->imm, + opcode); + } else { + reg_set_min_max(&other_branch->regs[insn->dst_reg], + dst_reg, insn->imm, opcode); + } + + /* detect if R == 0 where R is returned from bpf_map_lookup_elem() */ if (BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == PTR_TO_MAP_VALUE_OR_NULL) { @@ -1723,13 +2009,17 @@ static int check_cond_jmp_op(struct verifier_env *env, } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGT && dst_reg->type == PTR_TO_PACKET && regs[insn->src_reg].type == PTR_TO_PACKET_END) { - find_good_pkt_pointers(env, dst_reg); + find_good_pkt_pointers(this_branch, dst_reg); + } else if (BPF_SRC(insn->code) == BPF_X && opcode == BPF_JGE && + dst_reg->type == PTR_TO_PACKET_END && + regs[insn->src_reg].type == PTR_TO_PACKET) { + find_good_pkt_pointers(other_branch, ®s[insn->src_reg]); } else if (is_pointer_value(env, insn->dst_reg)) { verbose("R%d pointer comparison prohibited\n", insn->dst_reg); return -EACCES; } if (log_level) - print_verifier_state(&env->cur_state); + print_verifier_state(this_branch); return 0; } @@ -1742,9 +2032,9 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn) } /* verify BPF_LD_IMM64 instruction */ -static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; int err; if (BPF_SIZE(insn->code) != BPF_DW) { @@ -1760,9 +2050,19 @@ static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn) if (err) return err; - if (insn->src_reg == 0) - /* generic move 64-bit immediate into a register */ + if (insn->src_reg == 0) { + /* generic move 64-bit immediate into a register, + * only analyzer needs to collect the ld_imm value. + */ + u64 imm = ((u64)(insn + 1)->imm << 32) | (u32)insn->imm; + + if (!env->analyzer_ops) + return 0; + + regs[insn->dst_reg].type = CONST_IMM; + regs[insn->dst_reg].imm = imm; return 0; + } /* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */ BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD); @@ -1799,11 +2099,11 @@ static bool may_access_skb(enum bpf_prog_type type) * Output: * R0 - 8/16/32-bit skb data converted to cpu endianness */ -static int check_ld_abs(struct verifier_env *env, struct bpf_insn *insn) +static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) { - struct reg_state *regs = env->cur_state.regs; + struct bpf_reg_state *regs = env->cur_state.regs; u8 mode = BPF_MODE(insn->code); - struct reg_state *reg; + struct bpf_reg_state *reg; int i, err; if (!may_access_skb(env->prog->type)) { @@ -1889,7 +2189,7 @@ enum { BRANCH = 2, }; -#define STATE_LIST_MARK ((struct verifier_state_list *) -1L) +#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L) static int *insn_stack; /* stack of insns to process */ static int cur_stack; /* current stack index */ @@ -1900,7 +2200,7 @@ static int *insn_state; * w - next instruction * e - edge */ -static int push_insn(int t, int w, int e, struct verifier_env *env) +static int push_insn(int t, int w, int e, struct bpf_verifier_env *env) { if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) return 0; @@ -1941,7 +2241,7 @@ static int push_insn(int t, int w, int e, struct verifier_env *env) /* non-recursive depth-first-search to detect loops in BPF program * loop == back-edge in directed graph */ -static int check_cfg(struct verifier_env *env) +static int check_cfg(struct bpf_verifier_env *env) { struct bpf_insn *insns = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2050,7 +2350,8 @@ err_free: /* the following conditions reduce the number of explored insns * from ~140k to ~80k for ultra large programs that use a lot of ptr_to_packet */ -static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) +static bool compare_ptrs_to_packet(struct bpf_reg_state *old, + struct bpf_reg_state *cur) { if (old->id != cur->id) return false; @@ -2125,9 +2426,11 @@ static bool compare_ptrs_to_packet(struct reg_state *old, struct reg_state *cur) * whereas register type in current state is meaningful, it means that * the current state will reach 'bpf_exit' instruction safely */ -static bool states_equal(struct verifier_state *old, struct verifier_state *cur) +static bool states_equal(struct bpf_verifier_env *env, + struct bpf_verifier_state *old, + struct bpf_verifier_state *cur) { - struct reg_state *rold, *rcur; + struct bpf_reg_state *rold, *rcur; int i; for (i = 0; i < MAX_BPF_REG; i++) { @@ -2137,6 +2440,13 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) if (memcmp(rold, rcur, sizeof(*rold)) == 0) continue; + /* If the ranges were not the same, but everything else was and + * we didn't do a variable access into a map then we are a-ok. + */ + if (!env->varlen_map_value_access && + rold->type == rcur->type && rold->imm == rcur->imm) + continue; + if (rold->type == NOT_INIT || (rold->type == UNKNOWN_VALUE && rcur->type != NOT_INIT)) continue; @@ -2167,9 +2477,9 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) * the same, check that stored pointers types * are the same as well. * Ex: explored safe path could have stored - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -8} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -8} * but current path has stored: - * (struct reg_state) {.type = PTR_TO_STACK, .imm = -16} + * (bpf_reg_state) {.type = PTR_TO_STACK, .imm = -16} * such verifier states are not equivalent. * return false to continue verification of this path */ @@ -2180,10 +2490,10 @@ static bool states_equal(struct verifier_state *old, struct verifier_state *cur) return true; } -static int is_state_visited(struct verifier_env *env, int insn_idx) +static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { - struct verifier_state_list *new_sl; - struct verifier_state_list *sl; + struct bpf_verifier_state_list *new_sl; + struct bpf_verifier_state_list *sl; sl = env->explored_states[insn_idx]; if (!sl) @@ -2193,7 +2503,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; while (sl != STATE_LIST_MARK) { - if (states_equal(&sl->state, &env->cur_state)) + if (states_equal(env, &sl->state, &env->cur_state)) /* reached equivalent register/stack state, * prune the search */ @@ -2207,7 +2517,7 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) * it will be rejected. Since there are no loops, we won't be * seeing this 'insn_idx' instruction again on the way to bpf_exit */ - new_sl = kmalloc(sizeof(struct verifier_state_list), GFP_USER); + new_sl = kmalloc(sizeof(struct bpf_verifier_state_list), GFP_USER); if (!new_sl) return -ENOMEM; @@ -2218,11 +2528,20 @@ static int is_state_visited(struct verifier_env *env, int insn_idx) return 0; } -static int do_check(struct verifier_env *env) +static int ext_analyzer_insn_hook(struct bpf_verifier_env *env, + int insn_idx, int prev_insn_idx) { - struct verifier_state *state = &env->cur_state; + if (!env->analyzer_ops || !env->analyzer_ops->insn_hook) + return 0; + + return env->analyzer_ops->insn_hook(env, insn_idx, prev_insn_idx); +} + +static int do_check(struct bpf_verifier_env *env) +{ + struct bpf_verifier_state *state = &env->cur_state; struct bpf_insn *insns = env->prog->insnsi; - struct reg_state *regs = state->regs; + struct bpf_reg_state *regs = state->regs; int insn_cnt = env->prog->len; int insn_idx, prev_insn_idx = 0; int insn_processed = 0; @@ -2230,6 +2549,7 @@ static int do_check(struct verifier_env *env) init_reg_state(regs); insn_idx = 0; + env->varlen_map_value_access = false; for (;;) { struct bpf_insn *insn; u8 class; @@ -2276,13 +2596,17 @@ static int do_check(struct verifier_env *env) print_bpf_insn(insn); } + err = ext_analyzer_insn_hook(env, insn_idx, prev_insn_idx); + if (err) + return err; + if (class == BPF_ALU || class == BPF_ALU64) { err = check_alu_op(env, insn); if (err) return err; } else if (class == BPF_LDX) { - enum bpf_reg_type src_reg_type; + enum bpf_reg_type *prev_src_type, src_reg_type; /* check for reserved fields is already done */ @@ -2306,21 +2630,25 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (BPF_SIZE(insn->code) != BPF_W) { + reset_reg_range_values(regs, insn->dst_reg); + if (BPF_SIZE(insn->code) != BPF_W && + BPF_SIZE(insn->code) != BPF_DW) { insn_idx++; continue; } - if (insn->imm == 0) { + prev_src_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_src_type == NOT_INIT) { /* saw a valid insn * dst_reg = *(u32 *)(src_reg + off) - * use reserved 'imm' field to mark this insn + * save type to validate intersecting paths */ - insn->imm = src_reg_type; + *prev_src_type = src_reg_type; - } else if (src_reg_type != insn->imm && + } else if (src_reg_type != *prev_src_type && (src_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_src_type == PTR_TO_CTX)) { /* ABuser program is trying to use the same insn * dst_reg = *(u32*) (src_reg + off) * with different pointer types: @@ -2333,7 +2661,7 @@ static int do_check(struct verifier_env *env) } } else if (class == BPF_STX) { - enum bpf_reg_type dst_reg_type; + enum bpf_reg_type *prev_dst_type, dst_reg_type; if (BPF_MODE(insn->code) == BPF_XADD) { err = check_xadd(env, insn); @@ -2361,11 +2689,13 @@ static int do_check(struct verifier_env *env) if (err) return err; - if (insn->imm == 0) { - insn->imm = dst_reg_type; - } else if (dst_reg_type != insn->imm && + prev_dst_type = &env->insn_aux_data[insn_idx].ptr_type; + + if (*prev_dst_type == NOT_INIT) { + *prev_dst_type = dst_reg_type; + } else if (dst_reg_type != *prev_dst_type && (dst_reg_type == PTR_TO_CTX || - insn->imm == PTR_TO_CTX)) { + *prev_dst_type == PTR_TO_CTX)) { verbose("same insn cannot be used with different pointers\n"); return -EINVAL; } @@ -2471,6 +2801,7 @@ process_bpf_exit: verbose("invalid BPF_LD mode\n"); return -EINVAL; } + reset_reg_range_values(regs, insn->dst_reg); } else { verbose("unknown insn class %d\n", class); return -EINVAL; @@ -2483,14 +2814,28 @@ process_bpf_exit: return 0; } +static int check_map_prog_compatibility(struct bpf_map *map, + struct bpf_prog *prog) + +{ + if (prog->type == BPF_PROG_TYPE_PERF_EVENT && + (map->map_type == BPF_MAP_TYPE_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_HASH) && + (map->map_flags & BPF_F_NO_PREALLOC)) { + verbose("perf_event programs can only use preallocated hash map\n"); + return -EINVAL; + } + return 0; +} + /* look for pseudo eBPF instructions that access map FDs and * replace them with actual map pointers */ -static int replace_map_fd_with_map_ptr(struct verifier_env *env) +static int replace_map_fd_with_map_ptr(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; - int i, j; + int i, j, err; for (i = 0; i < insn_cnt; i++, insn++) { if (BPF_CLASS(insn->code) == BPF_LDX && @@ -2534,6 +2879,12 @@ static int replace_map_fd_with_map_ptr(struct verifier_env *env) return PTR_ERR(map); } + err = check_map_prog_compatibility(map, env->prog); + if (err) { + fdput(f); + return err; + } + /* store map pointer inside BPF_LD_IMM64 instruction */ insn[0].imm = (u32) (unsigned long) map; insn[1].imm = ((u64) (unsigned long) map) >> 32; @@ -2577,7 +2928,7 @@ next_insn: } /* drop refcnt of maps used by the rejected program */ -static void release_maps(struct verifier_env *env) +static void release_maps(struct bpf_verifier_env *env) { int i; @@ -2586,7 +2937,7 @@ static void release_maps(struct verifier_env *env) } /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */ -static void convert_pseudo_ld_imm64(struct verifier_env *env) +static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env) { struct bpf_insn *insn = env->prog->insnsi; int insn_cnt = env->prog->len; @@ -2600,62 +2951,74 @@ static void convert_pseudo_ld_imm64(struct verifier_env *env) /* convert load instructions that access fields of 'struct __sk_buff' * into sequence of instructions that access fields of 'struct sk_buff' */ -static int convert_ctx_accesses(struct verifier_env *env) +static int convert_ctx_accesses(struct bpf_verifier_env *env) { - struct bpf_insn *insn = env->prog->insnsi; - int insn_cnt = env->prog->len; - struct bpf_insn insn_buf[16]; + const struct bpf_verifier_ops *ops = env->prog->aux->ops; + const int insn_cnt = env->prog->len; + struct bpf_insn insn_buf[16], *insn; struct bpf_prog *new_prog; enum bpf_access_type type; - int i; + int i, cnt, delta = 0; - if (!env->prog->aux->ops->convert_ctx_access) + if (ops->gen_prologue) { + cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, + env->prog); + if (cnt >= ARRAY_SIZE(insn_buf)) { + verbose("bpf verifier is misconfigured\n"); + return -EINVAL; + } else if (cnt) { + new_prog = bpf_patch_insn_single(env->prog, 0, + insn_buf, cnt); + if (!new_prog) + return -ENOMEM; + env->prog = new_prog; + delta += cnt - 1; + } + } + + if (!ops->convert_ctx_access) return 0; - for (i = 0; i < insn_cnt; i++, insn++) { - u32 insn_delta, cnt; + insn = env->prog->insnsi + delta; - if (insn->code == (BPF_LDX | BPF_MEM | BPF_W)) + for (i = 0; i < insn_cnt; i++, insn++) { + if (insn->code == (BPF_LDX | BPF_MEM | BPF_W) || + insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) type = BPF_READ; - else if (insn->code == (BPF_STX | BPF_MEM | BPF_W)) + else if (insn->code == (BPF_STX | BPF_MEM | BPF_W) || + insn->code == (BPF_STX | BPF_MEM | BPF_DW)) type = BPF_WRITE; else continue; - if (insn->imm != PTR_TO_CTX) { - /* clear internal mark */ - insn->imm = 0; + if (env->insn_aux_data[i].ptr_type != PTR_TO_CTX) continue; - } - cnt = env->prog->aux->ops-> - convert_ctx_access(type, insn->dst_reg, insn->src_reg, - insn->off, insn_buf, env->prog); + cnt = ops->convert_ctx_access(type, insn->dst_reg, insn->src_reg, + insn->off, insn_buf, env->prog); if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) { verbose("bpf verifier is misconfigured\n"); return -EINVAL; } - new_prog = bpf_patch_insn_single(env->prog, i, insn_buf, cnt); + new_prog = bpf_patch_insn_single(env->prog, i + delta, insn_buf, + cnt); if (!new_prog) return -ENOMEM; - insn_delta = cnt - 1; + delta += cnt - 1; /* keep walking new program and skip insns we just inserted */ env->prog = new_prog; - insn = new_prog->insnsi + i + insn_delta; - - insn_cnt += insn_delta; - i += insn_delta; + insn = new_prog->insnsi + i + delta; } return 0; } -static void free_states(struct verifier_env *env) +static void free_states(struct bpf_verifier_env *env) { - struct verifier_state_list *sl, *sln; + struct bpf_verifier_state_list *sl, *sln; int i; if (!env->explored_states) @@ -2678,19 +3041,24 @@ static void free_states(struct verifier_env *env) int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) { char __user *log_ubuf = NULL; - struct verifier_env *env; + struct bpf_verifier_env *env; int ret = -EINVAL; if ((*prog)->len <= 0 || (*prog)->len > BPF_MAXINSNS) return -E2BIG; - /* 'struct verifier_env' can be global, but since it's not small, + /* 'struct bpf_verifier_env' can be global, but since it's not small, * allocate/free it every time bpf_check() is called */ - env = kzalloc(sizeof(struct verifier_env), GFP_KERNEL); + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); if (!env) return -ENOMEM; + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + (*prog)->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; env->prog = *prog; /* grab the mutex to protect few globals used by verifier */ @@ -2709,12 +3077,12 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) /* log_* values have to be sane */ if (log_size < 128 || log_size > UINT_MAX >> 8 || log_level == 0 || log_ubuf == NULL) - goto free_env; + goto err_unlock; ret = -ENOMEM; log_buf = vmalloc(log_size); if (!log_buf) - goto free_env; + goto err_unlock; } else { log_level = 0; } @@ -2724,7 +3092,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) goto skip_full_check; env->explored_states = kcalloc(env->prog->len, - sizeof(struct verifier_state_list *), + sizeof(struct bpf_verifier_state_list *), GFP_USER); ret = -ENOMEM; if (!env->explored_states) @@ -2783,14 +3151,67 @@ skip_full_check: free_log_buf: if (log_level) vfree(log_buf); -free_env: if (!env->prog->aux->used_maps) /* if we didn't copy map pointers into bpf_prog_info, release * them now. Otherwise free_bpf_prog_info() will release them. */ release_maps(env); *prog = env->prog; - kfree(env); +err_unlock: mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); return ret; } + +int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, + void *priv) +{ + struct bpf_verifier_env *env; + int ret; + + env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + env->insn_aux_data = vzalloc(sizeof(struct bpf_insn_aux_data) * + prog->len); + ret = -ENOMEM; + if (!env->insn_aux_data) + goto err_free_env; + env->prog = prog; + env->analyzer_ops = ops; + env->analyzer_priv = priv; + + /* grab the mutex to protect few globals used by verifier */ + mutex_lock(&bpf_verifier_lock); + + log_level = 0; + + env->explored_states = kcalloc(env->prog->len, + sizeof(struct bpf_verifier_state_list *), + GFP_KERNEL); + ret = -ENOMEM; + if (!env->explored_states) + goto skip_full_check; + + ret = check_cfg(env); + if (ret < 0) + goto skip_full_check; + + env->allow_ptr_leaks = capable(CAP_SYS_ADMIN); + + ret = do_check(env); + +skip_full_check: + while (pop_stack(env, NULL) >= 0); + free_states(env); + + mutex_unlock(&bpf_verifier_lock); + vfree(env->insn_aux_data); +err_free_env: + kfree(env); + return ret; +} +EXPORT_SYMBOL_GPL(bpf_analyzer); diff --git a/kernel/events/core.c b/kernel/events/core.c index 7c0d263f6bc5..c6e47e97b33f 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7079,7 +7079,7 @@ static int __perf_event_overflow(struct perf_event *event, irq_work_queue(&event->pending); } - event->overflow_handler(event, data, regs); + READ_ONCE(event->overflow_handler)(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; @@ -7694,11 +7694,83 @@ static void perf_event_free_filter(struct perf_event *event) ftrace_profile_free_filter(event); } +#ifdef CONFIG_BPF_SYSCALL +static void bpf_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct bpf_perf_event_data_kern ctx = { + .data = data, + .regs = regs, + }; + int ret = 0; + + preempt_disable(); + if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) + goto out; + rcu_read_lock(); + ret = BPF_PROG_RUN(event->prog, (void *)&ctx); + rcu_read_unlock(); +out: + __this_cpu_dec(bpf_prog_active); + preempt_enable(); + if (!ret) + return; + + event->orig_overflow_handler(event, data, regs); +} + +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) +{ + struct bpf_prog *prog; + + if (event->overflow_handler_context) + /* hw breakpoint or kernel counter */ + return -EINVAL; + + if (event->prog) + return -EEXIST; + + prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + event->prog = prog; + event->orig_overflow_handler = READ_ONCE(event->overflow_handler); + WRITE_ONCE(event->overflow_handler, bpf_overflow_handler); + return 0; +} + +static void perf_event_free_bpf_handler(struct perf_event *event) +{ + struct bpf_prog *prog = event->prog; + + if (!prog) + return; + + WRITE_ONCE(event->overflow_handler, event->orig_overflow_handler); + event->prog = NULL; + bpf_prog_put(prog); +} +#else +static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) +{ + return -EOPNOTSUPP; +} +static void perf_event_free_bpf_handler(struct perf_event *event) +{ +} +#endif + static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) { bool is_kprobe, is_tracepoint; struct bpf_prog *prog; + if (event->attr.type == PERF_TYPE_HARDWARE || + event->attr.type == PERF_TYPE_SOFTWARE) + return perf_event_set_bpf_handler(event, prog_fd); + if (event->attr.type != PERF_TYPE_TRACEPOINT) return -EINVAL; @@ -7739,6 +7811,8 @@ static void perf_event_free_bpf_prog(struct perf_event *event) { struct bpf_prog *prog; + perf_event_free_bpf_handler(event); + if (!event->tp_event) return; @@ -9055,6 +9129,19 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (!overflow_handler && parent_event) { overflow_handler = parent_event->overflow_handler; context = parent_event->overflow_handler_context; +#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING) + if (overflow_handler == bpf_overflow_handler) { + struct bpf_prog *prog = bpf_prog_inc(parent_event->prog); + + if (IS_ERR(prog)) { + err = PTR_ERR(prog); + goto err_ns; + } + event->prog = prog; + event->orig_overflow_handler = + parent_event->orig_overflow_handler; + } +#endif } if (overflow_handler) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index b20438fdb029..5dcb99281259 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1,4 +1,5 @@ /* Copyright (c) 2011-2015 PLUMgrid, http://plumgrid.com + * Copyright (c) 2016 Facebook * * This program is free software; you can redistribute it and/or * modify it under the terms of version 2 of the GNU General Public @@ -8,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -59,11 +61,9 @@ unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) } EXPORT_SYMBOL_GPL(trace_call_bpf); -static u64 bpf_probe_read(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr) { - void *dst = (void *) (long) r1; - int ret, size = (int) r2; - void *unsafe_ptr = (void *) (long) r3; + int ret; ret = probe_kernel_read(dst, unsafe_ptr, size); if (unlikely(ret < 0)) @@ -81,12 +81,9 @@ static const struct bpf_func_proto bpf_probe_read_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_probe_write_user(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_probe_write_user, void *, unsafe_ptr, const void *, src, + u32, size) { - void *unsafe_ptr = (void *) (long) r1; - void *src = (void *) (long) r2; - int size = (int) r3; - /* * Ensure we're in user context which is safe for the helper to * run. This helper has no business in a kthread. @@ -128,9 +125,9 @@ static const struct bpf_func_proto *bpf_get_probe_write_proto(void) * limited trace_printk() * only %d %u %x %ld %lu %lx %lld %llu %llx %p %s conversion specifiers allowed */ -static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) +BPF_CALL_5(bpf_trace_printk, char *, fmt, u32, fmt_size, u64, arg1, + u64, arg2, u64, arg3) { - char *fmt = (char *) (long) r1; bool str_seen = false; int mod[3] = {}; int fmt_cnt = 0; @@ -176,16 +173,16 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) switch (fmt_cnt) { case 1: - unsafe_addr = r3; - r3 = (long) buf; + unsafe_addr = arg1; + arg1 = (long) buf; break; case 2: - unsafe_addr = r4; - r4 = (long) buf; + unsafe_addr = arg2; + arg2 = (long) buf; break; case 3: - unsafe_addr = r5; - r5 = (long) buf; + unsafe_addr = arg3; + arg3 = (long) buf; break; } buf[0] = 0; @@ -207,9 +204,9 @@ static u64 bpf_trace_printk(u64 r1, u64 fmt_size, u64 r3, u64 r4, u64 r5) } return __trace_printk(1/* fake ip will not be printed */, fmt, - mod[0] == 2 ? r3 : mod[0] == 1 ? (long) r3 : (u32) r3, - mod[1] == 2 ? r4 : mod[1] == 1 ? (long) r4 : (u32) r4, - mod[2] == 2 ? r5 : mod[2] == 1 ? (long) r5 : (u32) r5); + mod[0] == 2 ? arg1 : mod[0] == 1 ? (long) arg1 : (u32) arg1, + mod[1] == 2 ? arg2 : mod[1] == 1 ? (long) arg2 : (u32) arg2, + mod[2] == 2 ? arg3 : mod[2] == 1 ? (long) arg3 : (u32) arg3); } static const struct bpf_func_proto bpf_trace_printk_proto = { @@ -231,9 +228,8 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void) return &bpf_trace_printk_proto; } -static u64 bpf_perf_event_read(u64 r1, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_perf_event_read, struct bpf_map *, map, u64, flags) { - struct bpf_map *map = (struct bpf_map *) (unsigned long) r1; struct bpf_array *array = container_of(map, struct bpf_array, map); unsigned int cpu = smp_processor_id(); u64 index = flags & BPF_F_INDEX_MASK; @@ -310,11 +306,9 @@ __bpf_perf_event_output(struct pt_regs *regs, struct bpf_map *map, return 0; } -static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { - struct pt_regs *regs = (struct pt_regs *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; - void *data = (void *)(long) r4; struct perf_raw_record raw = { .frag = { .size = size, @@ -365,7 +359,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, return __bpf_perf_event_output(regs, map, flags, &raw); } -static u64 bpf_get_current_task(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_0(bpf_get_current_task) { return (long) current; } @@ -376,6 +370,31 @@ static const struct bpf_func_proto bpf_get_current_task_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_2(bpf_current_task_under_cgroup, struct bpf_map *, map, u32, idx) +{ + struct bpf_array *array = container_of(map, struct bpf_array, map); + struct cgroup *cgrp; + + if (unlikely(in_interrupt())) + return -EINVAL; + if (unlikely(idx >= array->map.max_entries)) + return -E2BIG; + + cgrp = READ_ONCE(array->ptrs[idx]); + if (unlikely(!cgrp)) + return -EAGAIN; + + return task_under_cgroup_hierarchy(current, cgrp); +} + +static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = { + .func = bpf_current_task_under_cgroup, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -407,6 +426,10 @@ static const struct bpf_func_proto *tracing_func_proto(enum bpf_func_id func_id) return &bpf_perf_event_read_proto; case BPF_FUNC_probe_write_user: return bpf_get_probe_write_proto(); + case BPF_FUNC_current_task_under_cgroup: + return &bpf_current_task_under_cgroup_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; default: return NULL; } @@ -447,16 +470,17 @@ static struct bpf_prog_type_list kprobe_tl = { .type = BPF_PROG_TYPE_KPROBE, }; -static u64 bpf_perf_event_output_tp(u64 r1, u64 r2, u64 index, u64 r4, u64 size) +BPF_CALL_5(bpf_perf_event_output_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags, void *, data, u64, size) { + struct pt_regs *regs = *(struct pt_regs **)tp_buff; + /* * r1 points to perf tracepoint buffer where first 8 bytes are hidden * from bpf program and contain a pointer to 'struct pt_regs'. Fetch it - * from there and call the same bpf_perf_event_output() helper + * from there and call the same bpf_perf_event_output() helper inline. */ - u64 ctx = *(long *)(uintptr_t)r1; - - return bpf_perf_event_output(ctx, r2, index, r4, size); + return ____bpf_perf_event_output(regs, map, flags, data, size); } static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { @@ -470,11 +494,18 @@ static const struct bpf_func_proto bpf_perf_event_output_proto_tp = { .arg5_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_get_stackid_tp(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_get_stackid_tp, void *, tp_buff, struct bpf_map *, map, + u64, flags) { - u64 ctx = *(long *)(uintptr_t)r1; + struct pt_regs *regs = *(struct pt_regs **)tp_buff; - return bpf_get_stackid(ctx, r2, r3, r4, r5); + /* + * Same comment as in bpf_perf_event_output_tp(), only that this time + * the other helper's function body cannot be inlined due to being + * external, thus we need to call raw helper function. + */ + return bpf_get_stackid((unsigned long) regs, (unsigned long) map, + flags, 0, 0); } static const struct bpf_func_proto bpf_get_stackid_proto_tp = { @@ -520,10 +551,69 @@ static struct bpf_prog_type_list tracepoint_tl = { .type = BPF_PROG_TYPE_TRACEPOINT, }; +static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (off < 0 || off >= sizeof(struct bpf_perf_event_data)) + return false; + if (type != BPF_READ) + return false; + if (off % size != 0) + return false; + if (off == offsetof(struct bpf_perf_event_data, sample_period)) { + if (size != sizeof(u64)) + return false; + } else { + if (size != sizeof(long)) + return false; + } + return true; +} + +static u32 pe_prog_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct bpf_perf_event_data, sample_period): + BUILD_BUG_ON(FIELD_SIZEOF(struct perf_sample_data, period) != sizeof(u64)); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + data), dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, data)); + *insn++ = BPF_LDX_MEM(BPF_DW, dst_reg, dst_reg, + offsetof(struct perf_sample_data, period)); + break; + default: + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_perf_event_data_kern, + regs), dst_reg, src_reg, + offsetof(struct bpf_perf_event_data_kern, regs)); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(long), dst_reg, dst_reg, ctx_off); + break; + } + + return insn - insn_buf; +} + +static const struct bpf_verifier_ops perf_event_prog_ops = { + .get_func_proto = tp_prog_func_proto, + .is_valid_access = pe_prog_is_valid_access, + .convert_ctx_access = pe_prog_convert_ctx_access, +}; + +static struct bpf_prog_type_list perf_event_tl = { + .ops = &perf_event_prog_ops, + .type = BPF_PROG_TYPE_PERF_EVENT, +}; + static int __init register_kprobe_prog_ops(void) { bpf_register_prog_type(&kprobe_tl); bpf_register_prog_type(&tracepoint_tl); + bpf_register_prog_type(&perf_event_tl); return 0; } late_initcall(register_kprobe_prog_ops); diff --git a/lib/Makefile b/lib/Makefile index 5dc77a8ec297..df747e5eeb7a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -22,7 +22,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o md5.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o + earlycpio.o seq_buf.o nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o diff --git a/lib/random32.c b/lib/random32.c index 69ed593aab07..915982b304bb 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -81,7 +81,7 @@ u32 prandom_u32(void) u32 res; res = prandom_u32_state(state); - put_cpu_var(state); + put_cpu_var(net_rand_state); return res; } @@ -128,7 +128,7 @@ void prandom_bytes(void *buf, size_t bytes) struct rnd_state *state = &get_cpu_var(net_rand_state); prandom_bytes_state(state, buf, bytes); - put_cpu_var(state); + put_cpu_var(net_rand_state); } EXPORT_SYMBOL(prandom_bytes); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 56054e541a0f..32d0ad058380 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -378,22 +378,8 @@ static void rht_deferred_worker(struct work_struct *work) schedule_work(&ht->run_work); } -static bool rhashtable_check_elasticity(struct rhashtable *ht, - struct bucket_table *tbl, - unsigned int hash) -{ - unsigned int elasticity = ht->elasticity; - struct rhash_head *head; - - rht_for_each(head, tbl, hash) - if (!--elasticity) - return true; - - return false; -} - -int rhashtable_insert_rehash(struct rhashtable *ht, - struct bucket_table *tbl) +static int rhashtable_insert_rehash(struct rhashtable *ht, + struct bucket_table *tbl) { struct bucket_table *old_tbl; struct bucket_table *new_tbl; @@ -439,61 +425,172 @@ fail: return err; } -EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, - const void *key, - struct rhash_head *obj, - struct bucket_table *tbl) +static void *rhashtable_lookup_one(struct rhashtable *ht, + struct bucket_table *tbl, unsigned int hash, + const void *key, struct rhash_head *obj) { + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct rhash_head __rcu **pprev; struct rhash_head *head; - unsigned int hash; - int err; + int elasticity; - tbl = rhashtable_last_table(ht, tbl); - hash = head_hashfn(ht, tbl, obj); - spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); + elasticity = ht->elasticity; + pprev = &tbl->buckets[hash]; + rht_for_each(head, tbl, hash) { + struct rhlist_head *list; + struct rhlist_head *plist; - err = -EEXIST; - if (key && rhashtable_lookup_fast(ht, key, ht->p)) - goto exit; + elasticity--; + if (!key || + (ht->p.obj_cmpfn ? + ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head)))) + continue; + + if (!ht->rhlist) + return rht_obj(ht, head); + + list = container_of(obj, struct rhlist_head, rhead); + plist = container_of(head, struct rhlist_head, rhead); + + RCU_INIT_POINTER(list->next, plist); + head = rht_dereference_bucket(head->next, tbl, hash); + RCU_INIT_POINTER(list->rhead.next, head); + rcu_assign_pointer(*pprev, obj); + + return NULL; + } + + if (elasticity <= 0) + return ERR_PTR(-EAGAIN); + + return ERR_PTR(-ENOENT); +} + +static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash, + struct rhash_head *obj, + void *data) +{ + struct bucket_table *new_tbl; + struct rhash_head *head; + + if (!IS_ERR_OR_NULL(data)) + return ERR_PTR(-EEXIST); + + if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); + + new_tbl = rcu_dereference(tbl->future_tbl); + if (new_tbl) + return new_tbl; + + if (PTR_ERR(data) != -ENOENT) + return ERR_CAST(data); - err = -E2BIG; if (unlikely(rht_grow_above_max(ht, tbl))) - goto exit; + return ERR_PTR(-E2BIG); - err = -EAGAIN; - if (rhashtable_check_elasticity(ht, tbl, hash) || - rht_grow_above_100(ht, tbl)) - goto exit; - - err = 0; + if (unlikely(rht_grow_above_100(ht, tbl))) + return ERR_PTR(-EAGAIN); head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); RCU_INIT_POINTER(obj->next, head); + if (ht->rhlist) { + struct rhlist_head *list; + + list = container_of(obj, struct rhlist_head, rhead); + RCU_INIT_POINTER(list->next, NULL); + } rcu_assign_pointer(tbl->buckets[hash], obj); atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); -exit: - spin_unlock(rht_bucket_lock(tbl, hash)); + return NULL; +} - if (err == 0) - return NULL; - else if (err == -EAGAIN) - return tbl; - else - return ERR_PTR(err); +static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int hash; + spinlock_t *lock; + void *data; + + tbl = rcu_dereference(ht->tbl); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rcu_dereference(tbl->future_tbl); + } + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + while (!IS_ERR_OR_NULL(new_tbl)) { + tbl = new_tbl; + hash = rht_head_hashfn(ht, tbl, obj, ht->p); + spin_lock_nested(rht_bucket_lock(tbl, hash), + SINGLE_DEPTH_NESTING); + + data = rhashtable_lookup_one(ht, tbl, hash, key, obj); + new_tbl = rhashtable_insert_one(ht, tbl, hash, obj, data); + if (PTR_ERR(new_tbl) != -EEXIST) + data = ERR_CAST(new_tbl); + + spin_unlock(rht_bucket_lock(tbl, hash)); + } + + spin_unlock_bh(lock); + + if (PTR_ERR(data) == -EAGAIN) + data = ERR_PTR(rhashtable_insert_rehash(ht, tbl) ?: + -EAGAIN); + + return data; +} + +void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj) +{ + void *data; + + do { + rcu_read_lock(); + data = rhashtable_try_insert(ht, key, obj); + rcu_read_unlock(); + } while (PTR_ERR(data) == -EAGAIN); + + return data; } EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** - * rhashtable_walk_init - Initialise an iterator + * rhashtable_walk_enter - Initialise an iterator * @ht: Table to walk over * @iter: Hash table Iterator - * @gfp: GFP flags for allocations * * This function prepares a hash table walk. * @@ -508,30 +605,22 @@ EXPORT_SYMBOL_GPL(rhashtable_insert_slow); * This function may sleep so you must not call it from interrupt * context or with spin locks held. * - * You must call rhashtable_walk_exit if this function returns - * successfully. + * You must call rhashtable_walk_exit after this function returns. */ -int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, - gfp_t gfp) +void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter) { iter->ht = ht; iter->p = NULL; iter->slot = 0; iter->skip = 0; - iter->walker = kmalloc(sizeof(*iter->walker), gfp); - if (!iter->walker) - return -ENOMEM; - spin_lock(&ht->lock); - iter->walker->tbl = + iter->walker.tbl = rcu_dereference_protected(ht->tbl, lockdep_is_held(&ht->lock)); - list_add(&iter->walker->list, &iter->walker->tbl->walkers); + list_add(&iter->walker.list, &iter->walker.tbl->walkers); spin_unlock(&ht->lock); - - return 0; } -EXPORT_SYMBOL_GPL(rhashtable_walk_init); +EXPORT_SYMBOL_GPL(rhashtable_walk_enter); /** * rhashtable_walk_exit - Free an iterator @@ -542,10 +631,9 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { spin_lock(&iter->ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&iter->ht->lock); - kfree(iter->walker); } EXPORT_SYMBOL_GPL(rhashtable_walk_exit); @@ -571,12 +659,12 @@ int rhashtable_walk_start(struct rhashtable_iter *iter) rcu_read_lock(); spin_lock(&ht->lock); - if (iter->walker->tbl) - list_del(&iter->walker->list); + if (iter->walker.tbl) + list_del(&iter->walker.list); spin_unlock(&ht->lock); - if (!iter->walker->tbl) { - iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); + if (!iter->walker.tbl) { + iter->walker.tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -598,12 +686,17 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; + struct rhlist_head *list = iter->list; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; + bool rhlist = ht->rhlist; if (p) { - p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + if (!rhlist || !(list = rcu_dereference(list->next))) { + p = rcu_dereference(p->next); + list = container_of(p, struct rhlist_head, rhead); + } goto next; } @@ -611,6 +704,18 @@ void *rhashtable_walk_next(struct rhashtable_iter *iter) int skip = iter->skip; rht_for_each_rcu(p, tbl, iter->slot) { + if (rhlist) { + list = container_of(p, struct rhlist_head, + rhead); + do { + if (!skip) + goto next; + skip--; + list = rcu_dereference(list->next); + } while (list); + + continue; + } if (!skip) break; skip--; @@ -620,7 +725,8 @@ next: if (!rht_is_a_nulls(p)) { iter->skip++; iter->p = p; - return rht_obj(ht, p); + iter->list = list; + return rht_obj(ht, rhlist ? &list->rhead : p); } iter->skip = 0; @@ -631,8 +737,8 @@ next: /* Ensure we see any new tables. */ smp_rmb(); - iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); - if (iter->walker->tbl) { + iter->walker.tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker.tbl) { iter->slot = 0; iter->skip = 0; return ERR_PTR(-EAGAIN); @@ -652,7 +758,7 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU) { struct rhashtable *ht; - struct bucket_table *tbl = iter->walker->tbl; + struct bucket_table *tbl = iter->walker.tbl; if (!tbl) goto out; @@ -661,9 +767,9 @@ void rhashtable_walk_stop(struct rhashtable_iter *iter) spin_lock(&ht->lock); if (tbl->rehash < tbl->size) - list_add(&iter->walker->list, &tbl->walkers); + list_add(&iter->walker.list, &tbl->walkers); else - iter->walker->tbl = NULL; + iter->walker.tbl = NULL; spin_unlock(&ht->lock); iter->p = NULL; @@ -808,6 +914,48 @@ int rhashtable_init(struct rhashtable *ht, } EXPORT_SYMBOL_GPL(rhashtable_init); +/** + * rhltable_init - initialize a new hash list table + * @hlt: hash list table to be initialized + * @params: configuration parameters + * + * Initializes a new hash list table. + * + * See documentation for rhashtable_init. + */ +int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) +{ + int err; + + /* No rhlist NULLs marking for now. */ + if (params->nulls_base) + return -EINVAL; + + err = rhashtable_init(&hlt->ht, params); + hlt->ht.rhlist = true; + return err; +} +EXPORT_SYMBOL_GPL(rhltable_init); + +static void rhashtable_free_one(struct rhashtable *ht, struct rhash_head *obj, + void (*free_fn)(void *ptr, void *arg), + void *arg) +{ + struct rhlist_head *list; + + if (!ht->rhlist) { + free_fn(rht_obj(ht, obj), arg); + return; + } + + list = container_of(obj, struct rhlist_head, rhead); + do { + obj = &list->rhead; + list = rht_dereference(list->next, ht); + free_fn(rht_obj(ht, obj), arg); + } while (list); +} + /** * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy @@ -845,7 +993,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, pos = next, next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL) - free_fn(rht_obj(ht, pos), arg); + rhashtable_free_one(ht, pos, free_fn, arg); } } diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 93f45011a59d..94346b4d8984 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5485,6 +5485,7 @@ static struct sk_buff *populate_skb(char *buf, int size) skb->hash = SKB_HASH; skb->queue_mapping = SKB_QUEUE_MAP; skb->vlan_tci = SKB_VLAN_TCI; + skb->vlan_proto = htons(ETH_P_IP); skb->dev = &dev; skb->dev->ifindex = SKB_DEV_IFINDEX; skb->dev->type = SKB_DEV_TYPE; diff --git a/lib/win_minmax.c b/lib/win_minmax.c new file mode 100644 index 000000000000..c8420d404926 --- /dev/null +++ b/lib/win_minmax.c @@ -0,0 +1,98 @@ +/** + * lib/minmax.c: windowed min/max tracker + * + * Kathleen Nichols' algorithm for tracking the minimum (or maximum) + * value of a data stream over some fixed time interval. (E.g., + * the minimum RTT over the past five minutes.) It uses constant + * space and constant time per update yet almost always delivers + * the same minimum as an implementation that has to keep all the + * data in the window. + * + * The algorithm keeps track of the best, 2nd best & 3rd best min + * values, maintaining an invariant that the measurement time of + * the n'th best >= n-1'th best. It also makes sure that the three + * values are widely separated in the time window since that bounds + * the worse case error when that data is monotonically increasing + * over the window. + * + * Upon getting a new min, we can forget everything earlier because + * it has no value - the new min is <= everything else in the window + * by definition and it's the most recent. So we restart fresh on + * every new min and overwrites 2nd & 3rd choices. The same property + * holds for 2nd & 3rd best. + */ +#include +#include + +/* As time advances, update the 1st, 2nd, and 3rd choices. */ +static u32 minmax_subwin_update(struct minmax *m, u32 win, + const struct minmax_sample *val) +{ + u32 dt = val->t - m->s[0].t; + + if (unlikely(dt > win)) { + /* + * Passed entire window without a new val so make 2nd + * choice the new val & 3rd choice the new 2nd choice. + * we may have to iterate this since our 2nd choice + * may also be outside the window (we checked on entry + * that the third choice was in the window). + */ + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + if (unlikely(val->t - m->s[0].t > win)) { + m->s[0] = m->s[1]; + m->s[1] = m->s[2]; + m->s[2] = *val; + } + } else if (unlikely(m->s[1].t == m->s[0].t) && dt > win/4) { + /* + * We've passed a quarter of the window without a new val + * so take a 2nd choice from the 2nd quarter of the window. + */ + m->s[2] = m->s[1] = *val; + } else if (unlikely(m->s[2].t == m->s[1].t) && dt > win/2) { + /* + * We've passed half the window without finding a new val + * so take a 3rd choice from the last half of the window + */ + m->s[2] = *val; + } + return m->s[0].v; +} + +/* Check if new measurement updates the 1st, 2nd or 3rd choice max. */ +u32 minmax_running_max(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v >= m->s[0].v) || /* found new max? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v >= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v >= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} +EXPORT_SYMBOL(minmax_running_max); + +/* Check if new measurement updates the 1st, 2nd or 3rd choice min. */ +u32 minmax_running_min(struct minmax *m, u32 win, u32 t, u32 meas) +{ + struct minmax_sample val = { .t = t, .v = meas }; + + if (unlikely(val.v <= m->s[0].v) || /* found new min? */ + unlikely(val.t - m->s[2].t > win)) /* nothing left in window? */ + return minmax_reset(m, t, meas); /* forget earlier samples */ + + if (unlikely(val.v <= m->s[1].v)) + m->s[2] = m->s[1] = val; + else if (unlikely(val.v <= m->s[2].v)) + m->s[2] = val; + + return minmax_subwin_update(m, win, &val); +} diff --git a/net/6lowpan/ndisc.c b/net/6lowpan/ndisc.c index 86450b7e2899..941df2fa4448 100644 --- a/net/6lowpan/ndisc.c +++ b/net/6lowpan/ndisc.c @@ -101,8 +101,6 @@ static void lowpan_ndisc_802154_update(struct neighbour *n, u32 flags, ieee802154_be16_to_le16(&neigh->short_addr, lladdr_short); if (!lowpan_802154_is_valid_src_short_addr(neigh->short_addr)) neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); - } else { - neigh->short_addr = cpu_to_le16(IEEE802154_ADDR_SHORT_UNSPEC); } write_unlock_bh(&n->lock); } diff --git a/net/Kconfig b/net/Kconfig index c2cdbce629bd..7b6cd340b72b 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -369,6 +369,7 @@ source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" +source "net/strparser/Kconfig" config FIB_RULES bool diff --git a/net/Makefile b/net/Makefile index 9bd20bb86cc6..4cafaa2b4667 100644 --- a/net/Makefile +++ b/net/Makefile @@ -35,6 +35,7 @@ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ obj-$(CONFIG_AF_KCM) += kcm/ +obj-$(CONFIG_STREAM_PARSER) += strparser/ obj-$(CONFIG_ATM) += atm/ obj-$(CONFIG_L2TP) += l2tp/ obj-$(CONFIG_DECNET) += decnet/ diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c index f066781be3c8..10d2bdce686e 100644 --- a/net/appletalk/ddp.c +++ b/net/appletalk/ddp.c @@ -1278,7 +1278,7 @@ out: return err; } -#if defined(CONFIG_IPDDP) || defined(CONFIG_IPDDP_MODULE) +#if IS_ENABLED(CONFIG_IPDDP) static __inline__ int is_ip_over_ddp(struct sk_buff *skb) { return skb->data[12] == 22; diff --git a/net/atm/lec.c b/net/atm/lec.c index e574a7e9db6f..5d2693826afb 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -31,7 +31,7 @@ #include /* Proxy LEC knows about bridging */ -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) #include "../bridge/br_private.h" static unsigned char bridge_ula_lec[] = { 0x01, 0x80, 0xc2, 0x00, 0x00 }; @@ -121,7 +121,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) { char *buff; @@ -155,7 +155,7 @@ static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ /* * Open/initialize the netdevice. This is called (in the current kernel) @@ -222,7 +222,7 @@ static netdev_tx_t lec_start_xmit(struct sk_buff *skb, pr_debug("skbuff head:%lx data:%lx tail:%lx end:%lx\n", (long)skb->head, (long)skb->data, (long)skb_tail_pointer(skb), (long)skb_end_pointer(skb)); -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) if (memcmp(skb->data, bridge_ula_lec, sizeof(bridge_ula_lec)) == 0) lec_handle_bridge(skb, dev); #endif @@ -426,7 +426,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) (unsigned short)(0xffff & mesg->content.normal.flag); break; case l_should_bridge: -#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) +#if IS_ENABLED(CONFIG_BRIDGE) { pr_debug("%s: bridge zeppelin asks about %pM\n", dev->name, mesg->content.proxy.mac_addr); @@ -452,7 +452,7 @@ static int lec_atm_send(struct atm_vcc *vcc, struct sk_buff *skb) sk->sk_data_ready(sk); } } -#endif /* defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE) */ +#endif /* IS_ENABLED(CONFIG_BRIDGE) */ break; default: pr_info("%s: Unknown message type %d\n", dev->name, mesg->type); diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 0e982222d425..3b3b1a292ec8 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -1007,7 +1007,7 @@ static int mpoa_event_listener(struct notifier_block *mpoa_notifier, if (!net_eq(dev_net(dev), &init_net)) return NOTIFY_DONE; - if (dev->name == NULL || strncmp(dev->name, "lec", 3)) + if (strncmp(dev->name, "lec", 3)) return NOTIFY_DONE; /* we are only interested in lec:s */ switch (event) { diff --git a/net/batman-adv/Kconfig b/net/batman-adv/Kconfig index 833bb145ba3c..f20742cbae6d 100644 --- a/net/batman-adv/Kconfig +++ b/net/batman-adv/Kconfig @@ -73,10 +73,21 @@ config BATMAN_ADV_MCAST reduce the air overhead while improving the reliability of multicast messages. -config BATMAN_ADV_DEBUG - bool "B.A.T.M.A.N. debugging" +config BATMAN_ADV_DEBUGFS + bool "batman-adv debugfs entries" depends on BATMAN_ADV depends on DEBUG_FS + default y + help + Enable this to export routing related debug tables via debugfs. + The information for each soft-interface and used hard-interface can be + found under batman_adv/ + + If unsure, say Y. + +config BATMAN_ADV_DEBUG + bool "B.A.T.M.A.N. debugging" + depends on BATMAN_ADV_DEBUGFS help This is an option for use by developers; most people should say N here. This enables compilation of support for diff --git a/net/batman-adv/Makefile b/net/batman-adv/Makefile index a83fc6c58d19..f724d3c98a81 100644 --- a/net/batman-adv/Makefile +++ b/net/batman-adv/Makefile @@ -24,14 +24,14 @@ batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_elp.o batman-adv-$(CONFIG_BATMAN_ADV_BATMAN_V) += bat_v_ogm.o batman-adv-y += bitarray.o batman-adv-$(CONFIG_BATMAN_ADV_BLA) += bridge_loop_avoidance.o -batman-adv-$(CONFIG_DEBUG_FS) += debugfs.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += debugfs.o batman-adv-$(CONFIG_BATMAN_ADV_DAT) += distributed-arp-table.o batman-adv-y += fragmentation.o batman-adv-y += gateway_client.o batman-adv-y += gateway_common.o batman-adv-y += hard-interface.o batman-adv-y += hash.o -batman-adv-y += icmp_socket.o +batman-adv-$(CONFIG_BATMAN_ADV_DEBUGFS) += icmp_socket.o batman-adv-$(CONFIG_BATMAN_ADV_DEBUG) += log.o batman-adv-y += main.o batman-adv-$(CONFIG_BATMAN_ADV_MCAST) += multicast.o diff --git a/net/batman-adv/bat_algo.c b/net/batman-adv/bat_algo.c index 81dbbf569bd4..623d04302aa2 100644 --- a/net/batman-adv/bat_algo.c +++ b/net/batman-adv/bat_algo.c @@ -20,12 +20,18 @@ #include #include #include +#include #include #include +#include #include #include +#include +#include +#include #include "bat_algo.h" +#include "netlink.h" char batadv_routing_algo[20] = "BATMAN_IV"; static struct hlist_head batadv_algo_list; @@ -95,6 +101,7 @@ int batadv_algo_select(struct batadv_priv *bat_priv, char *name) return 0; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) { struct batadv_algo_ops *bat_algo_ops; @@ -107,6 +114,7 @@ int batadv_algo_seq_print_text(struct seq_file *seq, void *offset) return 0; } +#endif static int batadv_param_set_ra(const char *val, const struct kernel_param *kp) { @@ -138,3 +146,65 @@ static struct kparam_string batadv_param_string_ra = { module_param_cb(routing_algo, &batadv_param_ops_ra, &batadv_param_string_ra, 0644); + +/** + * batadv_algo_dump_entry - fill in information about one supported routing + * algorithm + * @msg: netlink message to be sent back + * @portid: Port to reply to + * @seq: Sequence number of message + * @bat_algo_ops: Algorithm to be dumped + * + * Return: Error number, or 0 on success + */ +static int batadv_algo_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_algo_ops *bat_algo_ops) +{ + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ROUTING_ALGOS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_string(msg, BATADV_ATTR_ALGO_NAME, bat_algo_ops->name)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_algo_dump - fill in information about supported routing + * algorithms + * @msg: netlink message to be sent back + * @cb: Parameters to the netlink request + * + * Return: Length of reply message. + */ +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_algo_ops *bat_algo_ops; + int skip = cb->args[0]; + int i = 0; + + hlist_for_each_entry(bat_algo_ops, &batadv_algo_list, list) { + if (i++ < skip) + continue; + + if (batadv_algo_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_algo_ops)) { + i--; + break; + } + } + + cb->args[0] = i; + + return msg->len; +} diff --git a/net/batman-adv/bat_algo.h b/net/batman-adv/bat_algo.h index 860d773dd8fa..3b5b69cdd12b 100644 --- a/net/batman-adv/bat_algo.h +++ b/net/batman-adv/bat_algo.h @@ -22,7 +22,9 @@ #include +struct netlink_callback; struct seq_file; +struct sk_buff; extern char batadv_routing_algo[]; extern struct list_head batadv_hardif_list; @@ -31,5 +33,6 @@ void batadv_algo_init(void); int batadv_algo_register(struct batadv_algo_ops *bat_algo_ops); int batadv_algo_select(struct batadv_priv *bat_priv, char *name); int batadv_algo_seq_print_text(struct seq_file *seq, void *offset); +int batadv_algo_dump(struct sk_buff *msg, struct netlink_callback *cb); #endif /* _NET_BATMAN_ADV_BAT_ALGO_H_ */ diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index 19b0abd6c640..e2d18d0b1f06 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -48,12 +49,17 @@ #include #include #include +#include +#include +#include #include "bat_algo.h" #include "bitarray.h" +#include "gateway_client.h" #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "network-coding.h" #include "originator.h" #include "packet.h" @@ -318,17 +324,18 @@ batadv_iv_ogm_orig_get(struct batadv_priv *bat_priv, const u8 *addr) if (!orig_node->bat_iv.bcast_own_sum) goto free_orig_node; + kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) - goto free_orig_node; + goto free_orig_node_hash; return orig_node; -free_orig_node: - /* free twice, as batadv_orig_node_new sets refcount to 2 */ +free_orig_node_hash: batadv_orig_node_put(orig_node); +free_orig_node: batadv_orig_node_put(orig_node); return NULL; @@ -528,36 +535,25 @@ static void batadv_iv_ogm_send_to_if(struct batadv_forw_packet *forw_packet, static void batadv_iv_ogm_emit(struct batadv_forw_packet *forw_packet) { struct net_device *soft_iface; - struct batadv_priv *bat_priv; - struct batadv_hard_iface *primary_if = NULL; if (!forw_packet->if_incoming) { pr_err("Error - can't forward packet: incoming iface not specified\n"); - goto out; + return; } soft_iface = forw_packet->if_incoming->soft_iface; - bat_priv = netdev_priv(soft_iface); if (WARN_ON(!forw_packet->if_outgoing)) - goto out; + return; if (WARN_ON(forw_packet->if_outgoing->soft_iface != soft_iface)) - goto out; + return; if (forw_packet->if_incoming->if_status != BATADV_IF_ACTIVE) - goto out; - - primary_if = batadv_primary_if_get_selected(bat_priv); - if (!primary_if) - goto out; + return; /* only for one specific outgoing interface */ batadv_iv_ogm_send_to_if(forw_packet, forw_packet->if_outgoing); - -out: - if (primary_if) - batadv_hardif_put(primary_if); } /** @@ -685,19 +681,12 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, struct batadv_forw_packet *forw_packet_aggr; unsigned char *skb_buff; unsigned int skb_size; + atomic_t *queue_left = own_packet ? NULL : &bat_priv->batman_queue_left; - /* own packet should always be scheduled */ - if (!own_packet) { - if (!batadv_atomic_dec_not_zero(&bat_priv->batman_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "batman packet queue full\n"); - return; - } - } - - forw_packet_aggr = kmalloc(sizeof(*forw_packet_aggr), GFP_ATOMIC); + forw_packet_aggr = batadv_forw_packet_alloc(if_incoming, if_outgoing, + queue_left, bat_priv); if (!forw_packet_aggr) - goto out_nomem; + return; if (atomic_read(&bat_priv->aggregated_ogms) && packet_len < BATADV_MAX_AGGREGATION_BYTES) @@ -708,8 +697,11 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, skb_size += ETH_HLEN; forw_packet_aggr->skb = netdev_alloc_skb_ip_align(NULL, skb_size); - if (!forw_packet_aggr->skb) - goto out_free_forw_packet; + if (!forw_packet_aggr->skb) { + batadv_forw_packet_free(forw_packet_aggr); + return; + } + forw_packet_aggr->skb->priority = TC_PRIO_CONTROL; skb_reserve(forw_packet_aggr->skb, ETH_HLEN); @@ -717,12 +709,7 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, forw_packet_aggr->packet_len = packet_len; memcpy(skb_buff, packet_buff, packet_len); - kref_get(&if_incoming->refcount); - kref_get(&if_outgoing->refcount); forw_packet_aggr->own = own_packet; - forw_packet_aggr->if_incoming = if_incoming; - forw_packet_aggr->if_outgoing = if_outgoing; - forw_packet_aggr->num_packets = 0; forw_packet_aggr->direct_link_flags = BATADV_NO_FLAGS; forw_packet_aggr->send_time = send_time; @@ -741,13 +728,6 @@ static void batadv_iv_ogm_aggregate_new(const unsigned char *packet_buff, queue_delayed_work(batadv_event_workqueue, &forw_packet_aggr->delayed_work, send_time - jiffies); - - return; -out_free_forw_packet: - kfree(forw_packet_aggr); -out_nomem: - if (!own_packet) - atomic_inc(&bat_priv->batman_queue_left); } /* aggregate a new packet into the existing ogm packet */ @@ -1830,10 +1810,6 @@ static void batadv_iv_send_outstanding_bat_ogm_packet(struct work_struct *work) batadv_iv_ogm_schedule(forw_packet->if_incoming); out: - /* don't count own packet */ - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } @@ -1879,6 +1855,7 @@ static int batadv_iv_ogm_receive(struct sk_buff *skb, return NET_RX_SUCCESS; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_iv_ogm_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -1976,7 +1953,238 @@ next: if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif +/** + * batadv_iv_ogm_neigh_get_tq_avg - Get the TQ average for a neighbour on a + * given outgoing interface. + * @neigh_node: Neighbour of interest + * @if_outgoing: Outgoing interface of interest + * @tq_avg: Pointer of where to store the TQ average + * + * Return: False if no average TQ available, otherwise true. + */ +static bool +batadv_iv_ogm_neigh_get_tq_avg(struct batadv_neigh_node *neigh_node, + struct batadv_hard_iface *if_outgoing, + u8 *tq_avg) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return false; + + *tq_avg = n_ifinfo->bat_iv.tq_avg; + batadv_neigh_ifinfo_put(n_ifinfo); + + return true; +} + +/** + * batadv_iv_ogm_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + void *hdr; + u8 tq_avg; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node, if_outgoing, &tq_avg)) + return 0; + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u8(msg, BATADV_ATTR_TQ, tq_avg) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + u8 tq_avg_best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + if (!batadv_iv_ogm_neigh_get_tq_avg(neigh_node_best, if_outgoing, + &tq_avg_best)) + goto out; + + if (tq_avg_best == 0) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_iv_ogm_orig_dump_subentry(msg, portid, seq, + bat_priv, if_outgoing, + orig_node, neigh_node, + best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_iv_ogm_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_iv_ogm_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_iv_ogm_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, + &idx, &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_iv_hardif_neigh_print - print a single hop neighbour node * @seq: neighbour table seq_file struct @@ -2027,6 +2235,182 @@ static void batadv_iv_neigh_print(struct batadv_priv *bat_priv, if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif + +/** + * batadv_iv_ogm_neigh_diff - calculate tq difference of two neighbors + * @neigh1: the first neighbor object of the comparison + * @if_outgoing1: outgoing interface for the first neighbor + * @neigh2: the second neighbor object of the comparison + * @if_outgoing2: outgoing interface for the second neighbor + * @diff: pointer to integer receiving the calculated difference + * + * The content of *@diff is only valid when this function returns true. + * It is less, equal to or greater than 0 if the metric via neigh1 is lower, + * the same as or higher than the metric via neigh2 + * + * Return: true when the difference could be calculated, false otherwise + */ +static bool batadv_iv_ogm_neigh_diff(struct batadv_neigh_node *neigh1, + struct batadv_hard_iface *if_outgoing1, + struct batadv_neigh_node *neigh2, + struct batadv_hard_iface *if_outgoing2, + int *diff) +{ + struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; + u8 tq1, tq2; + bool ret = true; + + neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); + neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); + + if (!neigh1_ifinfo || !neigh2_ifinfo) { + ret = false; + goto out; + } + + tq1 = neigh1_ifinfo->bat_iv.tq_avg; + tq2 = neigh2_ifinfo->bat_iv.tq_avg; + *diff = (int)tq1 - (int)tq2; + +out: + if (neigh1_ifinfo) + batadv_neigh_ifinfo_put(neigh1_ifinfo); + if (neigh2_ifinfo) + batadv_neigh_ifinfo_put(neigh2_ifinfo); + + return ret; +} + +/** + * batadv_iv_ogm_neigh_dump_neigh - Dump a neighbour into a netlink message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to be dumped + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_iv_ogm_neigh_dump_hardif - Dump the neighbours of a hard interface + * into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: Hard interface to dump the neighbours for + * @idx_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_iv_ogm_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_iv_ogm_neigh_dump_neigh(msg, portid, seq, + hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_iv_ogm_neigh_dump - Dump the neighbours into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dump to this hard interfaace + */ +static void +batadv_iv_ogm_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, + list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_iv_ogm_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, + hard_iface, &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} /** * batadv_iv_ogm_neigh_cmp - compare the metrics of two neighbors @@ -2043,27 +2427,13 @@ static int batadv_iv_ogm_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { - struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - u8 tq1, tq2; + bool ret; int diff; - neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - - if (!neigh1_ifinfo || !neigh2_ifinfo) { - diff = 0; - goto out; - } - - tq1 = neigh1_ifinfo->bat_iv.tq_avg; - tq2 = neigh2_ifinfo->bat_iv.tq_avg; - diff = tq1 - tq2; - -out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return 0; return diff; } @@ -2085,29 +2455,15 @@ batadv_iv_ogm_neigh_is_sob(struct batadv_neigh_node *neigh1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2) { - struct batadv_neigh_ifinfo *neigh1_ifinfo, *neigh2_ifinfo; - u8 tq1, tq2; bool ret; + int diff; - neigh1_ifinfo = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - neigh2_ifinfo = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - - /* we can't say that the metric is better */ - if (!neigh1_ifinfo || !neigh2_ifinfo) { - ret = false; - goto out; - } - - tq1 = neigh1_ifinfo->bat_iv.tq_avg; - tq2 = neigh2_ifinfo->bat_iv.tq_avg; - ret = (tq1 - tq2) > -BATADV_TQ_SIMILARITY_THRESHOLD; - -out: - if (neigh1_ifinfo) - batadv_neigh_ifinfo_put(neigh1_ifinfo); - if (neigh2_ifinfo) - batadv_neigh_ifinfo_put(neigh2_ifinfo); + ret = batadv_iv_ogm_neigh_diff(neigh1, if_outgoing1, neigh2, + if_outgoing2, &diff); + if (!ret) + return false; + ret = diff > -BATADV_TQ_SIMILARITY_THRESHOLD; return ret; } @@ -2117,6 +2473,325 @@ static void batadv_iv_iface_activate(struct batadv_hard_iface *hard_iface) batadv_iv_ogm_schedule(hard_iface); } +static struct batadv_gw_node * +batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo; + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u64 max_gw_factor = 0; + u64 tmp_gw_factor = 0; + u8 max_tq = 0; + u8 tq_avg; + struct batadv_orig_node *orig_node; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + continue; + + router_ifinfo = batadv_neigh_ifinfo_get(router, + BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto next; + + if (!kref_get_unless_zero(&gw_node->refcount)) + goto next; + + tq_avg = router_ifinfo->bat_iv.tq_avg; + + switch (atomic_read(&bat_priv->gw.sel_class)) { + case 1: /* fast connection */ + tmp_gw_factor = tq_avg * tq_avg; + tmp_gw_factor *= gw_node->bandwidth_down; + tmp_gw_factor *= 100 * 100; + tmp_gw_factor >>= 18; + + if ((tmp_gw_factor > max_gw_factor) || + ((tmp_gw_factor == max_gw_factor) && + (tq_avg > max_tq))) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + + default: /* 2: stable connection (use best statistic) + * 3: fast-switch (use best statistic but change as + * soon as a better gateway appears) + * XX: late-switch (use best statistic but change as + * soon as a better gateway appears which has + * $routing_class more tq points) + */ + if (tq_avg > max_tq) { + if (curr_gw) + batadv_gw_node_put(curr_gw); + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + } + break; + } + + if (tq_avg > max_tq) + max_tq = tq_avg; + + if (tmp_gw_factor > max_gw_factor) + max_gw_factor = tmp_gw_factor; + + batadv_gw_node_put(gw_node); + +next: + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + } + rcu_read_unlock(); + + return curr_gw; +} + +static bool batadv_iv_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_neigh_ifinfo *router_orig_ifinfo = NULL; + struct batadv_neigh_ifinfo *router_gw_ifinfo = NULL; + struct batadv_neigh_node *router_gw = NULL; + struct batadv_neigh_node *router_orig = NULL; + u8 gw_tq_avg, orig_tq_avg; + bool ret = false; + + /* dynamic re-election is performed only on fast or late switch */ + if (atomic_read(&bat_priv->gw.sel_class) <= 2) + return false; + + router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); + if (!router_gw) { + ret = true; + goto out; + } + + router_gw_ifinfo = batadv_neigh_ifinfo_get(router_gw, + BATADV_IF_DEFAULT); + if (!router_gw_ifinfo) { + ret = true; + goto out; + } + + router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router_orig) + goto out; + + router_orig_ifinfo = batadv_neigh_ifinfo_get(router_orig, + BATADV_IF_DEFAULT); + if (!router_orig_ifinfo) + goto out; + + gw_tq_avg = router_gw_ifinfo->bat_iv.tq_avg; + orig_tq_avg = router_orig_ifinfo->bat_iv.tq_avg; + + /* the TQ value has to be better */ + if (orig_tq_avg < gw_tq_avg) + goto out; + + /* if the routing class is greater than 3 the value tells us how much + * greater the TQ value of the new gateway must be + */ + if ((atomic_read(&bat_priv->gw.sel_class) > 3) && + (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", + gw_tq_avg, orig_tq_avg); + + ret = true; +out: + if (router_gw_ifinfo) + batadv_neigh_ifinfo_put(router_gw_ifinfo); + if (router_orig_ifinfo) + batadv_neigh_ifinfo_put(router_orig_ifinfo); + if (router_gw) + batadv_neigh_node_put(router_gw); + if (router_orig) + batadv_neigh_node_put(router_orig); + + return ret; +} + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS +/* fails if orig_node has no router */ +static int batadv_iv_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) +{ + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_iv.tq_avg, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +static void batadv_iv_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_iv_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} +#endif + +/** + * batadv_iv_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_iv_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TQ, router_ifinfo->bat_iv.tq_avg) || + nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, + router->addr) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down) || + nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, + gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_iv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_iv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_iv_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; +} + static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .name = "BATMAN_IV", .iface = { @@ -2129,14 +2804,28 @@ static struct batadv_algo_ops batadv_batman_iv __read_mostly = { .neigh = { .cmp = batadv_iv_ogm_neigh_cmp, .is_similar_or_better = batadv_iv_ogm_neigh_is_sob, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_iv_neigh_print, +#endif + .dump = batadv_iv_ogm_neigh_dump, }, .orig = { +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_iv_ogm_orig_print, +#endif + .dump = batadv_iv_ogm_orig_dump, .free = batadv_iv_ogm_orig_free, .add_if = batadv_iv_ogm_orig_add_if, .del_if = batadv_iv_ogm_orig_del_if, }, + .gw = { + .get_best_gw_node = batadv_iv_gw_get_best_gw_node, + .is_eligible = batadv_iv_gw_is_eligible, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS + .print = batadv_iv_gw_print, +#endif + .dump = batadv_iv_gw_dump, + }, }; int __init batadv_iv_init(void) diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 0366cbf5e444..e79f6f01182e 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -21,24 +21,38 @@ #include #include #include +#include +#include #include #include +#include +#include #include +#include #include #include #include #include #include #include +#include +#include +#include #include "bat_algo.h" #include "bat_v_elp.h" #include "bat_v_ogm.h" +#include "gateway_client.h" +#include "gateway_common.h" #include "hard-interface.h" #include "hash.h" +#include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +struct sk_buff; + static void batadv_v_iface_activate(struct batadv_hard_iface *hard_iface) { struct batadv_priv *bat_priv = netdev_priv(hard_iface->soft_iface); @@ -115,6 +129,7 @@ batadv_v_hardif_neigh_init(struct batadv_hardif_neigh_node *hardif_neigh) batadv_v_elp_throughput_metric_update); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_v_orig_print_neigh - print neighbors for the originator table * @orig_node: the orig_node for which the neighbors are printed @@ -198,7 +213,141 @@ static void batadv_v_neigh_print(struct batadv_priv *bat_priv, if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif +/** + * batadv_v_neigh_dump_neigh - Dump a neighbour into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hardif_neigh: Neighbour to dump + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_neigh(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hardif_neigh_node *hardif_neigh) +{ + void *hdr; + unsigned int last_seen_msecs; + u32 throughput; + + last_seen_msecs = jiffies_to_msecs(jiffies - hardif_neigh->last_seen); + throughput = ewma_throughput_read(&hardif_neigh->bat_v.throughput); + throughput = throughput * 100; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_NEIGHBORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + hardif_neigh->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + hardif_neigh->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_neigh_dump_hardif - Dump the neighbours of a hard interface into + * a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @hard_iface: The hard interface to be dumped + * @idx_s: Entries to be skipped + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_neigh_dump_hardif(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *hard_iface, + int *idx_s) +{ + struct batadv_hardif_neigh_node *hardif_neigh; + int idx = 0; + + hlist_for_each_entry_rcu(hardif_neigh, + &hard_iface->neigh_list, list) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_neigh_dump_neigh(msg, portid, seq, hardif_neigh)) { + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + + *idx_s = 0; + return 0; +} + +/** + * batadv_v_neigh_dump - Dump the neighbours of a hard interface into a + * message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @single_hardif: Limit dumping to this hard interface + */ +static void +batadv_v_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *single_hardif) +{ + struct batadv_hard_iface *hard_iface; + int i_hardif = 0; + int i_hardif_s = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + rcu_read_lock(); + if (single_hardif) { + if (i_hardif_s == 0) { + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, single_hardif, + &idx) == 0) + i_hardif++; + } + } else { + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != bat_priv->soft_iface) + continue; + + if (i_hardif++ < i_hardif_s) + continue; + + if (batadv_v_neigh_dump_hardif(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, hard_iface, + &idx)) { + i_hardif--; + break; + } + } + } + rcu_read_unlock(); + + cb->args[0] = i_hardif; + cb->args[1] = idx; +} + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_v_orig_print - print the originator table * @bat_priv: the bat priv with all the soft interface information @@ -265,6 +414,205 @@ next: if (batman_count == 0) seq_puts(seq, "No batman nodes in range ...\n"); } +#endif + +/** + * batadv_v_orig_dump_subentry - Dump an originator subentry into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @neigh_node: Single hops neighbour + * @best: Is the best originator + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, + struct batadv_neigh_node *neigh_node, + bool best) +{ + struct batadv_neigh_ifinfo *n_ifinfo; + unsigned int last_seen_msecs; + u32 throughput; + void *hdr; + + n_ifinfo = batadv_neigh_ifinfo_get(neigh_node, if_outgoing); + if (!n_ifinfo) + return 0; + + throughput = n_ifinfo->bat_v.throughput * 100; + + batadv_neigh_ifinfo_put(n_ifinfo); + + last_seen_msecs = jiffies_to_msecs(jiffies - orig_node->last_seen); + + if (if_outgoing != BATADV_IF_DEFAULT && + if_outgoing != neigh_node->if_incoming) + return 0; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_ORIGINATORS); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, orig_node->orig) || + nla_put(msg, BATADV_ATTR_NEIGH_ADDRESS, ETH_ALEN, + neigh_node->addr) || + nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + neigh_node->if_incoming->net_dev->ifindex) || + nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, throughput) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, + last_seen_msecs)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_v_orig_dump_entry - Dump an originator entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @orig_node: Originator to dump + * @sub_s: Number of sub entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct batadv_orig_node *orig_node, int *sub_s) +{ + struct batadv_neigh_node *neigh_node_best; + struct batadv_neigh_node *neigh_node; + int sub = 0; + bool best; + + neigh_node_best = batadv_orig_router_get(orig_node, if_outgoing); + if (!neigh_node_best) + goto out; + + hlist_for_each_entry_rcu(neigh_node, &orig_node->neigh_list, list) { + if (sub++ < *sub_s) + continue; + + best = (neigh_node == neigh_node_best); + + if (batadv_v_orig_dump_subentry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, + neigh_node, best)) { + batadv_neigh_node_put(neigh_node_best); + + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + out: + if (neigh_node_best) + batadv_neigh_node_put(neigh_node_best); + + *sub_s = 0; + return 0; +} + +/** + * batadv_v_orig_dump_bucket - Dump an originator bucket into a + * message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + * @head: Bucket to be dumped + * @idx_s: Number of entries to be skipped + * @sub: Number of sub entries to be skipped + * + * Return: Error code, or 0 on success + */ +static int +batadv_v_orig_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_orig_node *orig_node; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(orig_node, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_v_orig_dump_entry(msg, portid, seq, bat_priv, + if_outgoing, orig_node, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_v_orig_dump - Dump the originators into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + * @if_outgoing: Limit dump to entries with this outgoing interface + */ +static void +batadv_v_orig_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv, + struct batadv_hard_iface *if_outgoing) +{ + struct batadv_hashtable *hash = bat_priv->orig_hash; + struct hlist_head *head; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_v_orig_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + bat_priv, if_outgoing, head, &idx, + &sub)) + break; + + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; +} static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, struct batadv_hard_iface *if_outgoing1, @@ -320,6 +668,365 @@ err_ifinfo1: return ret; } +static ssize_t batadv_v_store_sel_class(struct batadv_priv *bat_priv, + char *buff, size_t count) +{ + u32 old_class, class; + + if (!batadv_parse_throughput(bat_priv->soft_iface, buff, + "B.A.T.M.A.N. V GW selection class", + &class)) + return -EINVAL; + + old_class = atomic_read(&bat_priv->gw.sel_class); + atomic_set(&bat_priv->gw.sel_class, class); + + if (old_class != class) + batadv_gw_reselect(bat_priv); + + return count; +} + +static ssize_t batadv_v_show_sel_class(struct batadv_priv *bat_priv, char *buff) +{ + u32 class = atomic_read(&bat_priv->gw.sel_class); + + return sprintf(buff, "%u.%u MBit\n", class / 10, class % 10); +} + +/** + * batadv_v_gw_throughput_get - retrieve the GW-bandwidth for a given GW + * @gw_node: the GW to retrieve the metric for + * @bw: the pointer where the metric will be stored. The metric is computed as + * the minimum between the GW advertised throughput and the path throughput to + * it in the mesh + * + * Return: 0 on success, -1 on failure + */ +static int batadv_v_gw_throughput_get(struct batadv_gw_node *gw_node, u32 *bw) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_orig_node *orig_node; + struct batadv_neigh_node *router; + int ret = -1; + + orig_node = gw_node->orig_node; + router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + /* the GW metric is computed as the minimum between the path throughput + * to reach the GW itself and the advertised bandwidth. + * This gives us an approximation of the effective throughput that the + * client can expect via this particular GW node + */ + *bw = router_ifinfo->bat_v.throughput; + *bw = min_t(u32, *bw, gw_node->bandwidth_down); + + ret = 0; +out: + if (router) + batadv_neigh_node_put(router); + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + + return ret; +} + +/** + * batadv_v_gw_get_best_gw_node - retrieve the best GW node + * @bat_priv: the bat priv with all the soft interface information + * + * Return: the GW node having the best GW-metric, NULL if no GW is known + */ +static struct batadv_gw_node * +batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) +{ + struct batadv_gw_node *gw_node, *curr_gw = NULL; + u32 max_bw = 0, bw; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (!kref_get_unless_zero(&gw_node->refcount)) + continue; + + if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) + goto next; + + if (curr_gw && (bw <= max_bw)) + goto next; + + if (curr_gw) + batadv_gw_node_put(curr_gw); + + curr_gw = gw_node; + kref_get(&curr_gw->refcount); + max_bw = bw; + +next: + batadv_gw_node_put(gw_node); + } + rcu_read_unlock(); + + return curr_gw; +} + +/** + * batadv_v_gw_is_eligible - check if a originator would be selected as GW + * @bat_priv: the bat priv with all the soft interface information + * @curr_gw_orig: originator representing the currently selected GW + * @orig_node: the originator representing the new candidate + * + * Return: true if orig_node can be selected as current GW, false otherwise + */ +static bool batadv_v_gw_is_eligible(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node) +{ + struct batadv_gw_node *curr_gw = NULL, *orig_gw = NULL; + u32 gw_throughput, orig_throughput, threshold; + bool ret = false; + + threshold = atomic_read(&bat_priv->gw.sel_class); + + curr_gw = batadv_gw_node_get(bat_priv, curr_gw_orig); + if (!curr_gw) { + ret = true; + goto out; + } + + if (batadv_v_gw_throughput_get(curr_gw, &gw_throughput) < 0) { + ret = true; + goto out; + } + + orig_gw = batadv_gw_node_get(bat_priv, orig_node); + if (!orig_node) + goto out; + + if (batadv_v_gw_throughput_get(orig_gw, &orig_throughput) < 0) + goto out; + + if (orig_throughput < gw_throughput) + goto out; + + if ((orig_throughput - gw_throughput) < threshold) + goto out; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "Restarting gateway selection: better gateway found (throughput curr: %u, throughput new: %u)\n", + gw_throughput, orig_throughput); + + ret = true; +out: + if (curr_gw) + batadv_gw_node_put(curr_gw); + if (orig_gw) + batadv_gw_node_put(orig_gw); + + return ret; +} + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS +/* fails if orig_node has no router */ +static int batadv_v_gw_write_buffer_text(struct batadv_priv *bat_priv, + struct seq_file *seq, + const struct batadv_gw_node *gw_node) +{ + struct batadv_gw_node *curr_gw; + struct batadv_neigh_node *router; + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + int ret = -1; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + seq_printf(seq, "%s %pM (%9u.%1u) %pM [%10s]: %u.%u/%u.%u MBit\n", + (curr_gw == gw_node ? "=>" : " "), + gw_node->orig_node->orig, + router_ifinfo->bat_v.throughput / 10, + router_ifinfo->bat_v.throughput % 10, router->addr, + router->if_incoming->net_dev->name, + gw_node->bandwidth_down / 10, + gw_node->bandwidth_down % 10, + gw_node->bandwidth_up / 10, + gw_node->bandwidth_up % 10); + ret = seq_has_overflowed(seq) ? -1 : 0; + + if (curr_gw) + batadv_gw_node_put(curr_gw); +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_print - print the gateway list + * @bat_priv: the bat priv with all the soft interface information + * @seq: gateway table seq_file struct + */ +static void batadv_v_gw_print(struct batadv_priv *bat_priv, + struct seq_file *seq) +{ + struct batadv_gw_node *gw_node; + int gw_count = 0; + + seq_puts(seq, + " Gateway ( throughput) Nexthop [outgoingIF]: advertised uplink bandwidth\n"); + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + /* fails if orig_node has no router */ + if (batadv_v_gw_write_buffer_text(bat_priv, seq, gw_node) < 0) + continue; + + gw_count++; + } + rcu_read_unlock(); + + if (gw_count == 0) + seq_puts(seq, "No gateways in range ...\n"); +} +#endif + +/** + * batadv_v_gw_dump_entry - Dump a gateway into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @gw_node: Gateway to be dumped + * + * Return: Error code, or 0 on success + */ +static int batadv_v_gw_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_gw_node *gw_node) +{ + struct batadv_neigh_ifinfo *router_ifinfo = NULL; + struct batadv_neigh_node *router; + struct batadv_gw_node *curr_gw; + int ret = -EINVAL; + void *hdr; + + router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); + if (!router) + goto out; + + router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); + if (!router_ifinfo) + goto out; + + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_GATEWAYS); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + ret = -EMSGSIZE; + + if (curr_gw == gw_node) { + if (nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) { + genlmsg_cancel(msg, hdr); + goto out; + } + } + + if (nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + gw_node->orig_node->orig)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_THROUGHPUT, + router_ifinfo->bat_v.throughput)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_ROUTER, ETH_ALEN, router->addr)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + router->if_incoming->net_dev->name)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_DOWN, + gw_node->bandwidth_down)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put_u32(msg, BATADV_ATTR_BANDWIDTH_UP, gw_node->bandwidth_up)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + if (router_ifinfo) + batadv_neigh_ifinfo_put(router_ifinfo); + if (router) + batadv_neigh_node_put(router); + return ret; +} + +/** + * batadv_v_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * @bat_priv: The bat priv with all the soft interface information + */ +static void batadv_v_gw_dump(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *bat_priv) +{ + int portid = NETLINK_CB(cb->skb).portid; + struct batadv_gw_node *gw_node; + int idx_skip = cb->args[0]; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { + if (idx++ < idx_skip) + continue; + + if (batadv_v_gw_dump_entry(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, gw_node)) { + idx_skip = idx - 1; + goto unlock; + } + } + + idx_skip = idx; +unlock: + rcu_read_unlock(); + + cb->args[0] = idx_skip; +} + static struct batadv_algo_ops batadv_batman_v __read_mostly = { .name = "BATMAN_V", .iface = { @@ -333,10 +1040,26 @@ static struct batadv_algo_ops batadv_batman_v __read_mostly = { .hardif_init = batadv_v_hardif_neigh_init, .cmp = batadv_v_neigh_cmp, .is_similar_or_better = batadv_v_neigh_is_sob, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_v_neigh_print, +#endif + .dump = batadv_v_neigh_dump, }, .orig = { +#ifdef CONFIG_BATMAN_ADV_DEBUGFS .print = batadv_v_orig_print, +#endif + .dump = batadv_v_orig_dump, + }, + .gw = { + .store_sel_class = batadv_v_store_sel_class, + .show_sel_class = batadv_v_show_sel_class, + .get_best_gw_node = batadv_v_gw_get_best_gw_node, + .is_eligible = batadv_v_gw_is_eligible, +#ifdef CONFIG_BATMAN_ADV_DEBUGFS + .print = batadv_v_gw_print, +#endif + .dump = batadv_v_gw_dump, }, }; @@ -363,7 +1086,16 @@ void batadv_v_hardif_init(struct batadv_hard_iface *hard_iface) */ int batadv_v_mesh_init(struct batadv_priv *bat_priv) { - return batadv_v_ogm_init(bat_priv); + int ret = 0; + + ret = batadv_v_ogm_init(bat_priv); + if (ret < 0) + return ret; + + /* set default throughput difference threshold to 5Mbps */ + atomic_set(&bat_priv->gw.sel_class, 50); + + return 0; } /** diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 6fbba4eb0617..1aeeadca620c 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -73,13 +73,12 @@ struct batadv_orig_node *batadv_v_ogm_orig_get(struct batadv_priv *bat_priv, if (!orig_node) return NULL; + kref_get(&orig_node->refcount); hash_added = batadv_hash_add(bat_priv->orig_hash, batadv_compare_orig, batadv_choose_orig, orig_node, &orig_node->hash_entry); if (hash_added != 0) { - /* orig_node->refcounter is initialised to 2 by - * batadv_orig_node_new() - */ + /* remove refcnt for newly created orig_node and hash entry */ batadv_orig_node_put(orig_node); batadv_orig_node_put(orig_node); orig_node = NULL; diff --git a/net/batman-adv/bridge_loop_avoidance.c b/net/batman-adv/bridge_loop_avoidance.c index ad2ffe16d29f..e7f690b571ea 100644 --- a/net/batman-adv/bridge_loop_avoidance.c +++ b/net/batman-adv/bridge_loop_avoidance.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -45,12 +46,18 @@ #include #include #include +#include +#include +#include +#include #include "hard-interface.h" #include "hash.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -519,11 +526,9 @@ batadv_bla_get_backbone_gw(struct batadv_priv *bat_priv, u8 *orig, atomic_set(&entry->wait_periods, 0); ether_addr_copy(entry->orig, orig); INIT_WORK(&entry->report_work, batadv_bla_loopdetect_report); - - /* one for the hash, one for returning */ kref_init(&entry->refcount); - kref_get(&entry->refcount); + kref_get(&entry->refcount); hash_added = batadv_hash_add(bat_priv->bla.backbone_hash, batadv_compare_backbone_gw, batadv_choose_backbone_gw, entry, @@ -711,12 +716,13 @@ static void batadv_bla_add_claim(struct batadv_priv *bat_priv, claim->lasttime = jiffies; kref_get(&backbone_gw->refcount); claim->backbone_gw = backbone_gw; - kref_init(&claim->refcount); - kref_get(&claim->refcount); + batadv_dbg(BATADV_DBG_BLA, bat_priv, "bla_add_claim(): adding new entry %pM, vid %d to hash ...\n", mac, BATADV_PRINT_VID(vid)); + + kref_get(&claim->refcount); hash_added = batadv_hash_add(bat_priv->bla.claim_hash, batadv_compare_claim, batadv_choose_claim, claim, @@ -1148,7 +1154,7 @@ static bool batadv_bla_process_claim(struct batadv_priv *bat_priv, /* Let the loopdetect frames on the mesh in any case. */ if (bla_dst->type == BATADV_CLAIM_TYPE_LOOPDETECT) - return 0; + return false; /* check if it is a claim frame. */ ret = batadv_check_claim_group(bat_priv, primary_if, hw_src, hw_dst, @@ -1990,6 +1996,7 @@ out: return ret; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_bla_claim_table_seq_print_text - print the claim table in a seq file * @seq: seq file to print on @@ -2050,7 +2057,171 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif +/** + * batadv_bla_claim_dump_entry - dump one entry of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @claim: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_claim_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_claim *claim) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_CLAIM); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(claim->backbone_gw->orig, + primary_addr); + + spin_lock_bh(&claim->backbone_gw->crc_lock); + backbone_crc = claim->backbone_gw->crc; + spin_unlock_bh(&claim->backbone_gw->crc_lock); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_ADDRESS, ETH_ALEN, claim->addr) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, claim->vid) || + nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + claim->backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_claim_dump_bucket - dump one bucket of the claim table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_claim_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_claim *claim; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(claim, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_claim_dump_entry(msg, portid, seq, + primary_if, claim)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_claim_dump - dump claim table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.claim_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_claim_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_bla_backbone_table_seq_print_text - print the backbone table in a seq * file @@ -2114,3 +2285,168 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif + +/** + * batadv_bla_backbone_dump_entry - dump one entry of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @backbone_gw: entry to dump + * + * Return: 0 or error code. + */ +static int +batadv_bla_backbone_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct batadv_bla_backbone_gw *backbone_gw) +{ + u8 *primary_addr = primary_if->net_dev->dev_addr; + u16 backbone_crc; + bool is_own; + int msecs; + void *hdr; + int ret = -EINVAL; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, BATADV_CMD_GET_BLA_BACKBONE); + if (!hdr) { + ret = -ENOBUFS; + goto out; + } + + is_own = batadv_compare_eth(backbone_gw->orig, primary_addr); + + spin_lock_bh(&backbone_gw->crc_lock); + backbone_crc = backbone_gw->crc; + spin_unlock_bh(&backbone_gw->crc_lock); + + msecs = jiffies_to_msecs(jiffies - backbone_gw->lasttime); + + if (is_own) + if (nla_put_flag(msg, BATADV_ATTR_BLA_OWN)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + if (nla_put(msg, BATADV_ATTR_BLA_BACKBONE, ETH_ALEN, + backbone_gw->orig) || + nla_put_u16(msg, BATADV_ATTR_BLA_VID, backbone_gw->vid) || + nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + backbone_crc) || + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, msecs)) { + genlmsg_cancel(msg, hdr); + goto out; + } + + genlmsg_end(msg, hdr); + ret = 0; + +out: + return ret; +} + +/** + * batadv_bla_backbone_dump_bucket - dump one bucket of the backbone table + * to a netlink socket + * @msg: buffer for the message + * @portid: netlink port + * @seq: Sequence number of netlink message + * @primary_if: primary interface + * @head: bucket to dump + * @idx_skip: How many entries to skip + * + * Return: always 0. + */ +static int +batadv_bla_backbone_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *primary_if, + struct hlist_head *head, int *idx_skip) +{ + struct batadv_bla_backbone_gw *backbone_gw; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(backbone_gw, head, hash_entry) { + if (idx++ < *idx_skip) + continue; + if (batadv_bla_backbone_dump_entry(msg, portid, seq, + primary_if, backbone_gw)) { + *idx_skip = idx - 1; + goto unlock; + } + } + + *idx_skip = idx; +unlock: + rcu_read_unlock(); + return 0; +} + +/** + * batadv_bla_backbone_dump - dump backbone table to a netlink socket + * @msg: buffer for the message + * @cb: callback structure containing arguments + * + * Return: message length. + */ +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + int portid = NETLINK_CB(cb->skb).portid; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hashtable *hash; + struct batadv_priv *bat_priv; + int bucket = cb->args[0]; + struct hlist_head *head; + int idx = cb->args[1]; + int ifindex; + int ret = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + hash = bat_priv->bla.backbone_hash; + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_bla_backbone_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, + primary_if, head, &idx)) + break; + bucket++; + } + + cb->args[0] = bucket; + cb->args[1] = idx; + + ret = msg->len; + +out: + if (primary_if) + batadv_hardif_put(primary_if); + + if (soft_iface) + dev_put(soft_iface); + + return ret; +} diff --git a/net/batman-adv/bridge_loop_avoidance.h b/net/batman-adv/bridge_loop_avoidance.h index 0f01daeb359e..1ae93e46fb98 100644 --- a/net/batman-adv/bridge_loop_avoidance.h +++ b/net/batman-adv/bridge_loop_avoidance.h @@ -23,6 +23,7 @@ #include struct net_device; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -35,8 +36,10 @@ bool batadv_bla_is_backbone_gw(struct sk_buff *skb, struct batadv_orig_node *orig_node, int hdr_size); int batadv_bla_claim_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_bla_backbone_table_seq_print_text(struct seq_file *seq, void *offset); +int batadv_bla_backbone_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_bla_is_backbone_gw_orig(struct batadv_priv *bat_priv, u8 *orig, unsigned short vid); bool batadv_bla_check_bcast_duplist(struct batadv_priv *bat_priv, @@ -47,7 +50,7 @@ void batadv_bla_update_orig_address(struct batadv_priv *bat_priv, void batadv_bla_status_update(struct net_device *net_dev); int batadv_bla_init(struct batadv_priv *bat_priv); void batadv_bla_free(struct batadv_priv *bat_priv); - +int batadv_bla_claim_dump(struct sk_buff *msg, struct netlink_callback *cb); #define BATADV_BLA_CRC_INIT 0 #else /* ifdef CONFIG_BATMAN_ADV_BLA */ @@ -112,6 +115,18 @@ static inline void batadv_bla_free(struct batadv_priv *bat_priv) { } +static inline int batadv_bla_claim_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + +static inline int batadv_bla_backbone_dump(struct sk_buff *msg, + struct netlink_callback *cb) +{ + return -EOPNOTSUPP; +} + #endif /* ifdef CONFIG_BATMAN_ADV_BLA */ #endif /* ifndef _NET_BATMAN_ADV_BLA_H_ */ diff --git a/net/batman-adv/debugfs.c b/net/batman-adv/debugfs.c index 1d68b6e63b96..b4ffba7dd583 100644 --- a/net/batman-adv/debugfs.c +++ b/net/batman-adv/debugfs.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -305,12 +306,16 @@ void batadv_debugfs_destroy(void) */ int batadv_debugfs_add_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); struct batadv_debuginfo **bat_debug; struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + hard_iface->debug_dir = debugfs_create_dir(hard_iface->net_dev->name, batadv_debugfs); if (!hard_iface->debug_dir) @@ -341,6 +346,11 @@ out: */ void batadv_debugfs_del_hardif(struct batadv_hard_iface *hard_iface) { + struct net *net = dev_net(hard_iface->net_dev); + + if (net != &init_net) + return; + if (batadv_debugfs) { debugfs_remove_recursive(hard_iface->debug_dir); hard_iface->debug_dir = NULL; @@ -351,11 +361,15 @@ int batadv_debugfs_add_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); struct batadv_debuginfo **bat_debug; + struct net *net = dev_net(dev); struct dentry *file; if (!batadv_debugfs) goto out; + if (net != &init_net) + return 0; + bat_priv->debug_dir = debugfs_create_dir(dev->name, batadv_debugfs); if (!bat_priv->debug_dir) goto out; @@ -392,6 +406,10 @@ out: void batadv_debugfs_del_meshif(struct net_device *dev) { struct batadv_priv *bat_priv = netdev_priv(dev); + struct net *net = dev_net(dev); + + if (net != &init_net) + return; batadv_debug_log_cleanup(bat_priv); diff --git a/net/batman-adv/debugfs.h b/net/batman-adv/debugfs.h index 1ab4e2e63afc..c68ff3dcb926 100644 --- a/net/batman-adv/debugfs.h +++ b/net/batman-adv/debugfs.h @@ -26,7 +26,7 @@ struct net_device; #define BATADV_DEBUGFS_SUBDIR "batman_adv" -#if IS_ENABLED(CONFIG_DEBUG_FS) +#if IS_ENABLED(CONFIG_BATMAN_ADV_DEBUGFS) void batadv_debugfs_init(void); void batadv_debugfs_destroy(void); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index b1cc8bfe11ac..e257efdc5d03 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -343,8 +343,8 @@ static void batadv_dat_entry_add(struct batadv_priv *bat_priv, __be32 ip, ether_addr_copy(dat_entry->mac_addr, mac_addr); dat_entry->last_update = jiffies; kref_init(&dat_entry->refcount); - kref_get(&dat_entry->refcount); + kref_get(&dat_entry->refcount); hash_added = batadv_hash_add(bat_priv->dat.hash, batadv_compare_dat, batadv_hash_dat, dat_entry, &dat_entry->hash_entry); @@ -795,6 +795,7 @@ void batadv_dat_free(struct batadv_priv *bat_priv) batadv_dat_hash_free(bat_priv); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_dat_cache_seq_print_text - print the local DAT hash table * @seq: seq file to print on @@ -846,6 +847,7 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif /** * batadv_arp_get_type - parse an ARP packet and gets the type diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index 63a805d3f96e..de055d64debe 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include @@ -31,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -39,13 +41,17 @@ #include #include #include +#include +#include #include "gateway_common.h" #include "hard-interface.h" #include "log.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "routing.h" +#include "soft-interface.h" #include "sysfs.h" #include "translation-table.h" @@ -80,12 +86,12 @@ static void batadv_gw_node_release(struct kref *ref) * batadv_gw_node_put - decrement the gw_node refcounter and possibly release it * @gw_node: gateway node to free */ -static void batadv_gw_node_put(struct batadv_gw_node *gw_node) +void batadv_gw_node_put(struct batadv_gw_node *gw_node) { kref_put(&gw_node->refcount, batadv_gw_node_release); } -static struct batadv_gw_node * +struct batadv_gw_node * batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv) { struct batadv_gw_node *gw_node; @@ -164,86 +170,6 @@ void batadv_gw_reselect(struct batadv_priv *bat_priv) atomic_set(&bat_priv->gw.reselect, 1); } -static struct batadv_gw_node * -batadv_gw_get_best_gw_node(struct batadv_priv *bat_priv) -{ - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo; - struct batadv_gw_node *gw_node, *curr_gw = NULL; - u64 max_gw_factor = 0; - u64 tmp_gw_factor = 0; - u8 max_tq = 0; - u8 tq_avg; - struct batadv_orig_node *orig_node; - - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - orig_node = gw_node->orig_node; - router = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router) - continue; - - router_ifinfo = batadv_neigh_ifinfo_get(router, - BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto next; - - if (!kref_get_unless_zero(&gw_node->refcount)) - goto next; - - tq_avg = router_ifinfo->bat_iv.tq_avg; - - switch (atomic_read(&bat_priv->gw.sel_class)) { - case 1: /* fast connection */ - tmp_gw_factor = tq_avg * tq_avg; - tmp_gw_factor *= gw_node->bandwidth_down; - tmp_gw_factor *= 100 * 100; - tmp_gw_factor >>= 18; - - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - - default: /* 2: stable connection (use best statistic) - * 3: fast-switch (use best statistic but change as - * soon as a better gateway appears) - * XX: late-switch (use best statistic but change as - * soon as a better gateway appears which has - * $routing_class more tq points) - */ - if (tq_avg > max_tq) { - if (curr_gw) - batadv_gw_node_put(curr_gw); - curr_gw = gw_node; - kref_get(&curr_gw->refcount); - } - break; - } - - if (tq_avg > max_tq) - max_tq = tq_avg; - - if (tmp_gw_factor > max_gw_factor) - max_gw_factor = tmp_gw_factor; - - batadv_gw_node_put(gw_node); - -next: - batadv_neigh_node_put(router); - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - } - rcu_read_unlock(); - - return curr_gw; -} - /** * batadv_gw_check_client_stop - check if client mode has been switched off * @bat_priv: the bat priv with all the soft interface information @@ -287,12 +213,19 @@ void batadv_gw_election(struct batadv_priv *bat_priv) if (atomic_read(&bat_priv->gw.mode) != BATADV_GW_MODE_CLIENT) goto out; + if (!bat_priv->algo_ops->gw.get_best_gw_node) + goto out; + curr_gw = batadv_gw_get_selected_gw_node(bat_priv); if (!batadv_atomic_dec_not_zero(&bat_priv->gw.reselect) && curr_gw) goto out; - next_gw = batadv_gw_get_best_gw_node(bat_priv); + /* if gw.reselect is set to 1 it means that a previous call to + * gw.is_eligible() said that we have a new best GW, therefore it can + * now be picked from the list and selected + */ + next_gw = bat_priv->algo_ops->gw.get_best_gw_node(bat_priv); if (curr_gw == next_gw) goto out; @@ -360,70 +293,31 @@ out: void batadv_gw_check_election(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node) { - struct batadv_neigh_ifinfo *router_orig_tq = NULL; - struct batadv_neigh_ifinfo *router_gw_tq = NULL; struct batadv_orig_node *curr_gw_orig; - struct batadv_neigh_node *router_gw = NULL; - struct batadv_neigh_node *router_orig = NULL; - u8 gw_tq_avg, orig_tq_avg; + + /* abort immediately if the routing algorithm does not support gateway + * election + */ + if (!bat_priv->algo_ops->gw.is_eligible) + return; curr_gw_orig = batadv_gw_get_selected_orig(bat_priv); if (!curr_gw_orig) goto reselect; - router_gw = batadv_orig_router_get(curr_gw_orig, BATADV_IF_DEFAULT); - if (!router_gw) - goto reselect; - - router_gw_tq = batadv_neigh_ifinfo_get(router_gw, - BATADV_IF_DEFAULT); - if (!router_gw_tq) - goto reselect; - /* this node already is the gateway */ if (curr_gw_orig == orig_node) goto out; - router_orig = batadv_orig_router_get(orig_node, BATADV_IF_DEFAULT); - if (!router_orig) + if (!bat_priv->algo_ops->gw.is_eligible(bat_priv, curr_gw_orig, + orig_node)) goto out; - router_orig_tq = batadv_neigh_ifinfo_get(router_orig, - BATADV_IF_DEFAULT); - if (!router_orig_tq) - goto out; - - gw_tq_avg = router_gw_tq->bat_iv.tq_avg; - orig_tq_avg = router_orig_tq->bat_iv.tq_avg; - - /* the TQ value has to be better */ - if (orig_tq_avg < gw_tq_avg) - goto out; - - /* if the routing class is greater than 3 the value tells us how much - * greater the TQ value of the new gateway must be - */ - if ((atomic_read(&bat_priv->gw.sel_class) > 3) && - (orig_tq_avg - gw_tq_avg < atomic_read(&bat_priv->gw.sel_class))) - goto out; - - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Restarting gateway selection: better gateway found (tq curr: %i, tq new: %i)\n", - gw_tq_avg, orig_tq_avg); - reselect: batadv_gw_reselect(bat_priv); out: if (curr_gw_orig) batadv_orig_node_put(curr_gw_orig); - if (router_gw) - batadv_neigh_node_put(router_gw); - if (router_orig) - batadv_neigh_node_put(router_orig); - if (router_gw_tq) - batadv_neigh_ifinfo_put(router_gw_tq); - if (router_orig_tq) - batadv_neigh_ifinfo_put(router_orig_tq); } /** @@ -445,14 +339,15 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, if (!gw_node) return; - kref_get(&orig_node->refcount); + kref_init(&gw_node->refcount); INIT_HLIST_NODE(&gw_node->list); + kref_get(&orig_node->refcount); gw_node->orig_node = orig_node; gw_node->bandwidth_down = ntohl(gateway->bandwidth_down); gw_node->bandwidth_up = ntohl(gateway->bandwidth_up); - kref_init(&gw_node->refcount); spin_lock_bh(&bat_priv->gw.list_lock); + kref_get(&gw_node->refcount); hlist_add_head_rcu(&gw_node->list, &bat_priv->gw.list); spin_unlock_bh(&bat_priv->gw.list_lock); @@ -463,6 +358,9 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, ntohl(gateway->bandwidth_down) % 10, ntohl(gateway->bandwidth_up) / 10, ntohl(gateway->bandwidth_up) % 10); + + /* don't return reference to new gw_node */ + batadv_gw_node_put(gw_node); } /** @@ -472,9 +370,8 @@ static void batadv_gw_node_add(struct batadv_priv *bat_priv, * * Return: gateway node if found or NULL otherwise. */ -static struct batadv_gw_node * -batadv_gw_node_get(struct batadv_priv *bat_priv, - struct batadv_orig_node *orig_node) +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node) { struct batadv_gw_node *gw_node_tmp, *gw_node = NULL; @@ -585,81 +482,87 @@ void batadv_gw_node_free(struct batadv_priv *bat_priv) spin_unlock_bh(&bat_priv->gw.list_lock); } -/* fails if orig_node has no router */ -static int batadv_write_buffer_text(struct batadv_priv *bat_priv, - struct seq_file *seq, - const struct batadv_gw_node *gw_node) -{ - struct batadv_gw_node *curr_gw; - struct batadv_neigh_node *router; - struct batadv_neigh_ifinfo *router_ifinfo = NULL; - int ret = -1; - - router = batadv_orig_router_get(gw_node->orig_node, BATADV_IF_DEFAULT); - if (!router) - goto out; - - router_ifinfo = batadv_neigh_ifinfo_get(router, BATADV_IF_DEFAULT); - if (!router_ifinfo) - goto out; - - curr_gw = batadv_gw_get_selected_gw_node(bat_priv); - - seq_printf(seq, "%s %pM (%3i) %pM [%10s]: %u.%u/%u.%u MBit\n", - (curr_gw == gw_node ? "=>" : " "), - gw_node->orig_node->orig, - router_ifinfo->bat_iv.tq_avg, router->addr, - router->if_incoming->net_dev->name, - gw_node->bandwidth_down / 10, - gw_node->bandwidth_down % 10, - gw_node->bandwidth_up / 10, - gw_node->bandwidth_up % 10); - ret = seq_has_overflowed(seq) ? -1 : 0; - - if (curr_gw) - batadv_gw_node_put(curr_gw); -out: - if (router_ifinfo) - batadv_neigh_ifinfo_put(router_ifinfo); - if (router) - batadv_neigh_node_put(router); - return ret; -} - +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; struct batadv_priv *bat_priv = netdev_priv(net_dev); struct batadv_hard_iface *primary_if; - struct batadv_gw_node *gw_node; - int gw_count = 0; primary_if = batadv_seq_print_text_primary_if_get(seq); if (!primary_if) - goto out; + return 0; - seq_printf(seq, - " Gateway (#/255) Nexthop [outgoingIF]: advertised uplink bandwidth ... [B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s)]\n", + seq_printf(seq, "[B.A.T.M.A.N. adv %s, MainIF/MAC: %s/%pM (%s %s)]\n", BATADV_SOURCE_VERSION, primary_if->net_dev->name, - primary_if->net_dev->dev_addr, net_dev->name); + primary_if->net_dev->dev_addr, net_dev->name, + bat_priv->algo_ops->name); - rcu_read_lock(); - hlist_for_each_entry_rcu(gw_node, &bat_priv->gw.list, list) { - /* fails if orig_node has no router */ - if (batadv_write_buffer_text(bat_priv, seq, gw_node) < 0) - continue; + batadv_hardif_put(primary_if); - gw_count++; + if (!bat_priv->algo_ops->gw.print) { + seq_puts(seq, + "No printing function for this routing protocol\n"); + return 0; } - rcu_read_unlock(); - if (gw_count == 0) - seq_puts(seq, "No gateways in range ...\n"); + bat_priv->algo_ops->gw.print(bat_priv, seq); + + return 0; +} +#endif + +/** + * batadv_gw_dump - Dump gateways into a message + * @msg: Netlink message to dump into + * @cb: Control block containing additional options + * + * Return: Error code, or length of message + */ +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct batadv_hard_iface *primary_if = NULL; + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + int ifindex; + int ret; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + if (!bat_priv->algo_ops->gw.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->gw.dump(msg, cb, bat_priv); + + ret = msg->len; out: if (primary_if) batadv_hardif_put(primary_if); - return 0; + if (soft_iface) + dev_put(soft_iface); + + return ret; } /** diff --git a/net/batman-adv/gateway_client.h b/net/batman-adv/gateway_client.h index 582dd8c413c8..859166d03561 100644 --- a/net/batman-adv/gateway_client.h +++ b/net/batman-adv/gateway_client.h @@ -23,6 +23,7 @@ #include struct batadv_tvlv_gateway_data; +struct netlink_callback; struct seq_file; struct sk_buff; @@ -39,10 +40,16 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, void batadv_gw_node_delete(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node); void batadv_gw_node_free(struct batadv_priv *bat_priv); +void batadv_gw_node_put(struct batadv_gw_node *gw_node); +struct batadv_gw_node * +batadv_gw_get_selected_gw_node(struct batadv_priv *bat_priv); int batadv_gw_client_seq_print_text(struct seq_file *seq, void *offset); +int batadv_gw_dump(struct sk_buff *msg, struct netlink_callback *cb); bool batadv_gw_out_of_range(struct batadv_priv *bat_priv, struct sk_buff *skb); enum batadv_dhcp_recipient batadv_gw_dhcp_recipient_get(struct sk_buff *skb, unsigned int *header_len, u8 *chaddr); +struct batadv_gw_node *batadv_gw_node_get(struct batadv_priv *bat_priv, + struct batadv_orig_node *orig_node); #endif /* _NET_BATMAN_ADV_GATEWAY_CLIENT_H_ */ diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index d7bc6a87bcc9..21184810d89f 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -241,10 +241,9 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); - /* restart gateway selection if fast or late switching was enabled */ + /* restart gateway selection */ if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) && - (atomic_read(&bat_priv->gw.sel_class) > 2)) + (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index 1f9080840566..08ce36147c4c 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -35,7 +35,8 @@ #include #include #include -#include +#include +#include #include "bat_v.h" #include "bridge_loop_avoidance.h" @@ -84,26 +85,56 @@ out: return hard_iface; } +/** + * batadv_getlink_net - return link net namespace (of use fallback) + * @netdev: net_device to check + * @fallback_net: return in case get_link_net is not available for @netdev + * + * Return: result of rtnl_link_ops->get_link_net or @fallback_net + */ +static const struct net *batadv_getlink_net(const struct net_device *netdev, + const struct net *fallback_net) +{ + if (!netdev->rtnl_link_ops) + return fallback_net; + + if (!netdev->rtnl_link_ops->get_link_net) + return fallback_net; + + return netdev->rtnl_link_ops->get_link_net(netdev); +} + /** * batadv_mutual_parents - check if two devices are each others parent - * @dev1: 1st net_device - * @dev2: 2nd net_device + * @dev1: 1st net dev + * @net1: 1st devices netns + * @dev2: 2nd net dev + * @net2: 2nd devices netns * * veth devices come in pairs and each is the parent of the other! * * Return: true if the devices are each others parent, otherwise false */ static bool batadv_mutual_parents(const struct net_device *dev1, - const struct net_device *dev2) + const struct net *net1, + const struct net_device *dev2, + const struct net *net2) { int dev1_parent_iflink = dev_get_iflink(dev1); int dev2_parent_iflink = dev_get_iflink(dev2); + const struct net *dev1_parent_net; + const struct net *dev2_parent_net; + + dev1_parent_net = batadv_getlink_net(dev1, net1); + dev2_parent_net = batadv_getlink_net(dev2, net2); if (!dev1_parent_iflink || !dev2_parent_iflink) return false; return (dev1_parent_iflink == dev2->ifindex) && - (dev2_parent_iflink == dev1->ifindex); + (dev2_parent_iflink == dev1->ifindex) && + net_eq(dev1_parent_net, net2) && + net_eq(dev2_parent_net, net1); } /** @@ -121,8 +152,9 @@ static bool batadv_mutual_parents(const struct net_device *dev1, */ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) { - struct net_device *parent_dev; struct net *net = dev_net(net_dev); + struct net_device *parent_dev; + const struct net *parent_net; bool ret; /* check if this is a batman-adv mesh interface */ @@ -134,13 +166,16 @@ static bool batadv_is_on_batman_iface(const struct net_device *net_dev) dev_get_iflink(net_dev) == net_dev->ifindex) return false; + parent_net = batadv_getlink_net(net_dev, net); + /* recurse over the parent device */ - parent_dev = __dev_get_by_index(net, dev_get_iflink(net_dev)); + parent_dev = __dev_get_by_index((struct net *)parent_net, + dev_get_iflink(net_dev)); /* if we got a NULL parent_dev there is something broken.. */ if (WARN(!parent_dev, "Cannot find parent device")) return false; - if (batadv_mutual_parents(net_dev, parent_dev)) + if (batadv_mutual_parents(net_dev, net, parent_dev, parent_net)) return false; ret = batadv_is_on_batman_iface(parent_dev); @@ -625,25 +660,6 @@ out: batadv_hardif_put(primary_if); } -/** - * batadv_hardif_remove_interface_finish - cleans up the remains of a hardif - * @work: work queue item - * - * Free the parts of the hard interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_hardif_remove_interface_finish(struct work_struct *work) -{ - struct batadv_hard_iface *hard_iface; - - hard_iface = container_of(work, struct batadv_hard_iface, - cleanup_work); - - batadv_debugfs_del_hardif(hard_iface); - batadv_sysfs_del_hardif(&hard_iface->hardif_obj); - batadv_hardif_put(hard_iface); -} - static struct batadv_hard_iface * batadv_hardif_add_interface(struct net_device *net_dev) { @@ -676,10 +692,9 @@ batadv_hardif_add_interface(struct net_device *net_dev) INIT_LIST_HEAD(&hard_iface->list); INIT_HLIST_HEAD(&hard_iface->neigh_list); - INIT_WORK(&hard_iface->cleanup_work, - batadv_hardif_remove_interface_finish); spin_lock_init(&hard_iface->neigh_list_lock); + kref_init(&hard_iface->refcount); hard_iface->num_bcasts = BATADV_NUM_BCASTS_DEFAULT; if (batadv_is_wifi_netdev(net_dev)) @@ -687,11 +702,8 @@ batadv_hardif_add_interface(struct net_device *net_dev) batadv_v_hardif_init(hard_iface); - /* extra reference for return */ - kref_init(&hard_iface->refcount); - kref_get(&hard_iface->refcount); - batadv_check_known_mac_addr(hard_iface->net_dev); + kref_get(&hard_iface->refcount); list_add_tail_rcu(&hard_iface->list, &batadv_hardif_list); return hard_iface; @@ -713,13 +725,15 @@ static void batadv_hardif_remove_interface(struct batadv_hard_iface *hard_iface) /* first deactivate interface */ if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) batadv_hardif_disable_interface(hard_iface, - BATADV_IF_CLEANUP_AUTO); + BATADV_IF_CLEANUP_KEEP); if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) return; hard_iface->if_status = BATADV_IF_TO_BE_REMOVED; - queue_work(batadv_event_workqueue, &hard_iface->cleanup_work); + batadv_debugfs_del_hardif(hard_iface); + batadv_sysfs_del_hardif(&hard_iface->hardif_obj); + batadv_hardif_put(hard_iface); } void batadv_hardif_remove_interfaces(void) diff --git a/net/batman-adv/icmp_socket.h b/net/batman-adv/icmp_socket.h index 618d5de06f20..e44a7da51431 100644 --- a/net/batman-adv/icmp_socket.h +++ b/net/batman-adv/icmp_socket.h @@ -26,9 +26,25 @@ struct batadv_icmp_header; #define BATADV_ICMP_SOCKET "socket" -void batadv_socket_init(void); int batadv_socket_setup(struct batadv_priv *bat_priv); + +#ifdef CONFIG_BATMAN_ADV_DEBUGFS + +void batadv_socket_init(void); void batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len); +#else + +static inline void batadv_socket_init(void) +{ +} + +static inline void +batadv_socket_receive_packet(struct batadv_icmp_header *icmph, size_t icmp_len) +{ +} + +#endif + #endif /* _NET_BATMAN_ADV_ICMP_SOCKET_H_ */ diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fe4c5e29f96b..2c017ab47557 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -82,6 +82,12 @@ static void batadv_recv_handler_init(void); static int __init batadv_init(void) { + int ret; + + ret = batadv_tt_cache_init(); + if (ret < 0) + return ret; + INIT_LIST_HEAD(&batadv_hardif_list); batadv_algo_init(); @@ -93,9 +99,8 @@ static int __init batadv_init(void) batadv_tp_meter_init(); batadv_event_workqueue = create_singlethread_workqueue("bat_events"); - if (!batadv_event_workqueue) - return -ENOMEM; + goto err_create_wq; batadv_socket_init(); batadv_debugfs_init(); @@ -108,6 +113,11 @@ static int __init batadv_init(void) BATADV_SOURCE_VERSION, BATADV_COMPAT_VERSION); return 0; + +err_create_wq: + batadv_tt_cache_destroy(); + + return -ENOMEM; } static void __exit batadv_exit(void) @@ -123,6 +133,8 @@ static void __exit batadv_exit(void) batadv_event_workqueue = NULL; rcu_barrier(); + + batadv_tt_cache_destroy(); } int batadv_mesh_init(struct net_device *soft_iface) @@ -270,6 +282,7 @@ bool batadv_is_my_mac(struct batadv_priv *bat_priv, const u8 *addr) return is_my_mac; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_seq_print_text_primary_if_get - called from debugfs table printing * function that requires the primary interface @@ -305,6 +318,7 @@ batadv_seq_print_text_primary_if_get(struct seq_file *seq) out: return primary_if; } +#endif /** * batadv_max_header_len - calculate maximum encapsulation overhead for a @@ -638,3 +652,4 @@ MODULE_AUTHOR(BATADV_DRIVER_AUTHOR); MODULE_DESCRIPTION(BATADV_DRIVER_DESC); MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE); MODULE_VERSION(BATADV_SOURCE_VERSION); +MODULE_ALIAS_RTNL_LINK("batadv"); diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 06a860845434..09af21e27639 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2016.3" +#define BATADV_SOURCE_VERSION "2016.4" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index cc915073a753..13661f43386f 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -528,7 +528,7 @@ update: } return !(mcast_data.flags & - (BATADV_MCAST_WANT_ALL_IPV4 + BATADV_MCAST_WANT_ALL_IPV6)); + (BATADV_MCAST_WANT_ALL_IPV4 | BATADV_MCAST_WANT_ALL_IPV6)); } /** @@ -1134,6 +1134,7 @@ void batadv_mcast_init(struct batadv_priv *bat_priv) BATADV_TVLV_HANDLER_OGM_CIFNOTFND); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_mcast_flags_print_header - print own mcast flags to debugfs table * @bat_priv: the bat priv with all the soft interface information @@ -1234,6 +1235,7 @@ int batadv_mcast_flags_seq_print_text(struct seq_file *seq, void *offset) return 0; } +#endif /** * batadv_mcast_free - free the multicast optimizations structures diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 231f8eaf075b..64cb6acbe0a6 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -18,6 +18,8 @@ #include "netlink.h" #include "main.h" +#include +#include #include #include #include @@ -26,24 +28,33 @@ #include #include #include +#include +#include +#include #include #include #include #include +#include #include +#include "bat_algo.h" +#include "bridge_loop_avoidance.h" +#include "gateway_client.h" #include "hard-interface.h" +#include "originator.h" +#include "packet.h" #include "soft-interface.h" #include "tp_meter.h" +#include "translation-table.h" -struct sk_buff; - -static struct genl_family batadv_netlink_family = { +struct genl_family batadv_netlink_family = { .id = GENL_ID_GENERATE, .hdrsize = 0, .name = BATADV_NL_NAME, .version = 1, .maxattr = BATADV_ATTR_MAX, + .netnsok = true, }; /* multicast groups */ @@ -51,11 +62,11 @@ enum batadv_netlink_multicast_groups { BATADV_NL_MCGRP_TPMETER, }; -static struct genl_multicast_group batadv_netlink_mcgrps[] = { +static const struct genl_multicast_group batadv_netlink_mcgrps[] = { [BATADV_NL_MCGRP_TPMETER] = { .name = BATADV_NL_MCAST_GROUP_TPMETER }, }; -static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { +static const struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_VERSION] = { .type = NLA_STRING }, [BATADV_ATTR_ALGO_NAME] = { .type = NLA_STRING }, [BATADV_ATTR_MESH_IFINDEX] = { .type = NLA_U32 }, @@ -69,8 +80,43 @@ static struct nla_policy batadv_netlink_policy[NUM_BATADV_ATTR] = { [BATADV_ATTR_TPMETER_TEST_TIME] = { .type = NLA_U32 }, [BATADV_ATTR_TPMETER_BYTES] = { .type = NLA_U64 }, [BATADV_ATTR_TPMETER_COOKIE] = { .type = NLA_U32 }, + [BATADV_ATTR_ACTIVE] = { .type = NLA_FLAG }, + [BATADV_ATTR_TT_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TT_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_LAST_TTVN] = { .type = NLA_U8 }, + [BATADV_ATTR_TT_CRC32] = { .type = NLA_U32 }, + [BATADV_ATTR_TT_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_TT_FLAGS] = { .type = NLA_U32 }, + [BATADV_ATTR_FLAG_BEST] = { .type = NLA_FLAG }, + [BATADV_ATTR_LAST_SEEN_MSECS] = { .type = NLA_U32 }, + [BATADV_ATTR_NEIGH_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_TQ] = { .type = NLA_U8 }, + [BATADV_ATTR_THROUGHPUT] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_UP] = { .type = NLA_U32 }, + [BATADV_ATTR_BANDWIDTH_DOWN] = { .type = NLA_U32 }, + [BATADV_ATTR_ROUTER] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_OWN] = { .type = NLA_FLAG }, + [BATADV_ATTR_BLA_ADDRESS] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_VID] = { .type = NLA_U16 }, + [BATADV_ATTR_BLA_BACKBONE] = { .len = ETH_ALEN }, + [BATADV_ATTR_BLA_CRC] = { .type = NLA_U16 }, }; +/** + * batadv_netlink_get_ifindex - Extract an interface index from a message + * @nlh: Message header + * @attrtype: Attribute which holds an interface index + * + * Return: interface index, or 0. + */ +int +batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) +{ + struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); + + return attr ? nla_get_u32(attr) : 0; +} + /** * batadv_netlink_mesh_info_put - fill in generic information about mesh * interface @@ -93,9 +139,17 @@ batadv_netlink_mesh_info_put(struct sk_buff *msg, struct net_device *soft_iface) nla_put_u32(msg, BATADV_ATTR_MESH_IFINDEX, soft_iface->ifindex) || nla_put_string(msg, BATADV_ATTR_MESH_IFNAME, soft_iface->name) || nla_put(msg, BATADV_ATTR_MESH_ADDRESS, ETH_ALEN, - soft_iface->dev_addr)) + soft_iface->dev_addr) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, + (u8)atomic_read(&bat_priv->tt.vn))) goto out; +#ifdef CONFIG_BATMAN_ADV_BLA + if (nla_put_u16(msg, BATADV_ATTR_BLA_CRC, + ntohs(bat_priv->bla.claim_dest.group))) + goto out; +#endif + primary_if = batadv_primary_if_get_selected(bat_priv); if (primary_if && primary_if->if_status == BATADV_IF_ACTIVE) { hard_iface = primary_if->net_dev; @@ -380,6 +434,106 @@ out: return ret; } +/** + * batadv_netlink_dump_hardif_entry - Dump one hard interface into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @hard_iface: Hard interface to dump + * + * Return: error code, or 0 on success + */ +static int +batadv_netlink_dump_hardif_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_hard_iface *hard_iface) +{ + struct net_device *net_dev = hard_iface->net_dev; + void *hdr; + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, NLM_F_MULTI, + BATADV_CMD_GET_HARDIFS); + if (!hdr) + return -EMSGSIZE; + + if (nla_put_u32(msg, BATADV_ATTR_HARD_IFINDEX, + net_dev->ifindex) || + nla_put_string(msg, BATADV_ATTR_HARD_IFNAME, + net_dev->name) || + nla_put(msg, BATADV_ATTR_HARD_ADDRESS, ETH_ALEN, + net_dev->dev_addr)) + goto nla_put_failure; + + if (hard_iface->if_status == BATADV_IF_ACTIVE) { + if (nla_put_flag(msg, BATADV_ATTR_ACTIVE)) + goto nla_put_failure; + } + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_netlink_dump_hardifs - Dump all hard interface into a messages + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: error code, or length of reply message on success + */ +static int +batadv_netlink_dump_hardifs(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_hard_iface *hard_iface; + int ifindex; + int portid = NETLINK_CB(cb->skb).portid; + int seq = cb->nlh->nlmsg_seq; + int skip = cb->args[0]; + int i = 0; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface) + return -ENODEV; + + if (!batadv_softif_is_valid(soft_iface)) { + dev_put(soft_iface); + return -ENODEV; + } + + rcu_read_lock(); + + list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { + if (hard_iface->soft_iface != soft_iface) + continue; + + if (i++ < skip) + continue; + + if (batadv_netlink_dump_hardif_entry(msg, portid, seq, + hard_iface)) { + i--; + break; + } + } + + rcu_read_unlock(); + + dev_put(soft_iface); + + cb->args[0] = i; + + return msg->len; +} + static struct genl_ops batadv_netlink_ops[] = { { .cmd = BATADV_CMD_GET_MESH_INFO, @@ -399,6 +553,61 @@ static struct genl_ops batadv_netlink_ops[] = { .policy = batadv_netlink_policy, .doit = batadv_netlink_tp_meter_cancel, }, + { + .cmd = BATADV_CMD_GET_ROUTING_ALGOS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_algo_dump, + }, + { + .cmd = BATADV_CMD_GET_HARDIFS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_netlink_dump_hardifs, + }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_LOCAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_local_dump, + }, + { + .cmd = BATADV_CMD_GET_TRANSTABLE_GLOBAL, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_tt_global_dump, + }, + { + .cmd = BATADV_CMD_GET_ORIGINATORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_orig_dump, + }, + { + .cmd = BATADV_CMD_GET_NEIGHBORS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_hardif_neigh_dump, + }, + { + .cmd = BATADV_CMD_GET_GATEWAYS, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_gw_dump, + }, + { + .cmd = BATADV_CMD_GET_BLA_CLAIM, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_claim_dump, + }, + { + .cmd = BATADV_CMD_GET_BLA_BACKBONE, + .flags = GENL_ADMIN_PERM, + .policy = batadv_netlink_policy, + .dumpit = batadv_bla_backbone_dump, + }, + }; /** diff --git a/net/batman-adv/netlink.h b/net/batman-adv/netlink.h index 945653ab58c6..52eb16281aba 100644 --- a/net/batman-adv/netlink.h +++ b/net/batman-adv/netlink.h @@ -21,12 +21,18 @@ #include "main.h" #include +#include + +struct nlmsghdr; void batadv_netlink_register(void); void batadv_netlink_unregister(void); +int batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype); int batadv_netlink_tpmeter_notify(struct batadv_priv *bat_priv, const u8 *dst, u8 result, u32 test_time, u64 total_bytes, u32 cookie); +extern struct genl_family batadv_netlink_family; + #endif /* _NET_BATMAN_ADV_NETLINK_H_ */ diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 293ef4ffd4e1..e3baf697a35c 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -856,14 +856,12 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, if (!nc_node) return NULL; - kref_get(&orig_neigh_node->refcount); - /* Initialize nc_node */ INIT_LIST_HEAD(&nc_node->list); - ether_addr_copy(nc_node->addr, orig_node->orig); - nc_node->orig_node = orig_neigh_node; kref_init(&nc_node->refcount); - kref_get(&nc_node->refcount); + ether_addr_copy(nc_node->addr, orig_node->orig); + kref_get(&orig_neigh_node->refcount); + nc_node->orig_node = orig_neigh_node; /* Select ingoing or outgoing coding node */ if (in_coding) { @@ -879,6 +877,7 @@ batadv_nc_get_nc_node(struct batadv_priv *bat_priv, /* Add nc_node to orig_node */ spin_lock_bh(lock); + kref_get(&nc_node->refcount); list_add_tail_rcu(&nc_node->list, list); spin_unlock_bh(lock); @@ -979,7 +978,6 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, INIT_LIST_HEAD(&nc_path->packet_list); spin_lock_init(&nc_path->packet_list_lock); kref_init(&nc_path->refcount); - kref_get(&nc_path->refcount); nc_path->last_valid = jiffies; ether_addr_copy(nc_path->next_hop, dst); ether_addr_copy(nc_path->prev_hop, src); @@ -989,6 +987,7 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, nc_path->next_hop); /* Add nc_path to hash table */ + kref_get(&nc_path->refcount); hash_added = batadv_hash_add(hash, batadv_nc_hash_compare, batadv_nc_hash_choose, &nc_path_key, &nc_path->hash_entry); @@ -1882,6 +1881,7 @@ void batadv_nc_mesh_free(struct batadv_priv *bat_priv) batadv_hash_destroy(bat_priv->nc.decoding_hash); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_nc_nodes_seq_print_text - print the nc node information * @seq: seq file to print on @@ -1981,3 +1981,4 @@ int batadv_nc_init_debugfs(struct batadv_priv *bat_priv) out: return -ENOMEM; } +#endif diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 3940b5d24421..5f3bfc41aeb1 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -28,11 +28,15 @@ #include #include #include +#include #include #include +#include #include #include #include +#include +#include #include "bat_algo.h" #include "distributed-arp-table.h" @@ -42,8 +46,10 @@ #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "network-coding.h" #include "routing.h" +#include "soft-interface.h" #include "translation-table.h" /* hash class keys */ @@ -127,9 +133,9 @@ batadv_orig_node_vlan_new(struct batadv_orig_node *orig_node, goto out; kref_init(&vlan->refcount); - kref_get(&vlan->refcount); vlan->vid = vid; + kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &orig_node->vlan_list); out: @@ -380,6 +386,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, orig_ifinfo->if_outgoing = if_outgoing; INIT_HLIST_NODE(&orig_ifinfo->list); kref_init(&orig_ifinfo->refcount); + kref_get(&orig_ifinfo->refcount); hlist_add_head_rcu(&orig_ifinfo->list, &orig_node->ifinfo_list); @@ -453,9 +460,9 @@ batadv_neigh_ifinfo_new(struct batadv_neigh_node *neigh, INIT_HLIST_NODE(&neigh_ifinfo->list); kref_init(&neigh_ifinfo->refcount); - kref_get(&neigh_ifinfo->refcount); neigh_ifinfo->if_outgoing = if_outgoing; + kref_get(&neigh_ifinfo->refcount); hlist_add_head_rcu(&neigh_ifinfo->list, &neigh->ifinfo_list); out: @@ -647,8 +654,8 @@ batadv_neigh_node_create(struct batadv_orig_node *orig_node, /* extra reference for return */ kref_init(&neigh_node->refcount); - kref_get(&neigh_node->refcount); + kref_get(&neigh_node->refcount); hlist_add_head_rcu(&neigh_node->list, &orig_node->neigh_list); batadv_dbg(BATADV_DBG_BATMAN, orig_node->bat_priv, @@ -686,6 +693,7 @@ batadv_neigh_node_get_or_create(struct batadv_orig_node *orig_node, return batadv_neigh_node_create(orig_node, hard_iface, neigh_addr); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_hardif_neigh_seq_print_text - print the single hop neighbour list * @seq: neighbour table seq_file struct @@ -719,6 +727,84 @@ int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset) bat_priv->algo_ops->neigh.print(bat_priv, seq); return 0; } +#endif + +/** + * batadv_hardif_neigh_dump - Dump to netlink the neighbor infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->neigh.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->neigh.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} /** * batadv_orig_ifinfo_release - release orig_ifinfo from lists and queue for @@ -905,7 +991,6 @@ struct batadv_orig_node *batadv_orig_node_new(struct batadv_priv *bat_priv, /* extra reference for return */ kref_init(&orig_node->refcount); - kref_get(&orig_node->refcount); orig_node->bat_priv = bat_priv; ether_addr_copy(orig_node->orig, addr); @@ -1256,6 +1341,7 @@ void batadv_purge_orig_ref(struct batadv_priv *bat_priv) _batadv_purge_orig(bat_priv); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_orig_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1329,6 +1415,84 @@ out: batadv_hardif_put(hard_iface); return 0; } +#endif + +/** + * batadv_orig_dump - Dump to netlink the originator infos for a specific + * outgoing interface + * @msg: message to dump into + * @cb: parameters for the dump + * + * Return: 0 or error value + */ +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct net_device *hard_iface = NULL; + struct batadv_hard_iface *hardif = BATADV_IF_DEFAULT; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + int ret; + int ifindex, hard_ifindex; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hard_ifindex = batadv_netlink_get_ifindex(cb->nlh, + BATADV_ATTR_HARD_IFINDEX); + if (hard_ifindex) { + hard_iface = dev_get_by_index(net, hard_ifindex); + if (hard_iface) + hardif = batadv_hardif_get_by_netdev(hard_iface); + + if (!hardif) { + ret = -ENODEV; + goto out; + } + + if (hardif->soft_iface != soft_iface) { + ret = -ENOENT; + goto out; + } + } + + if (!bat_priv->algo_ops->orig.dump) { + ret = -EOPNOTSUPP; + goto out; + } + + bat_priv->algo_ops->orig.dump(msg, cb, bat_priv, hardif); + + ret = msg->len; + + out: + if (hardif) + batadv_hardif_put(hardif); + if (hard_iface) + dev_put(hard_iface); + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + return ret; +} int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num) diff --git a/net/batman-adv/originator.h b/net/batman-adv/originator.h index 566306bf05dc..ebc56183f358 100644 --- a/net/batman-adv/originator.h +++ b/net/batman-adv/originator.h @@ -31,7 +31,9 @@ #include "hash.h" +struct netlink_callback; struct seq_file; +struct sk_buff; bool batadv_compare_orig(const struct hlist_node *node, const void *data2); int batadv_originator_init(struct batadv_priv *bat_priv); @@ -61,6 +63,7 @@ batadv_neigh_ifinfo_get(struct batadv_neigh_node *neigh, struct batadv_hard_iface *if_outgoing); void batadv_neigh_ifinfo_put(struct batadv_neigh_ifinfo *neigh_ifinfo); +int batadv_hardif_neigh_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_hardif_neigh_seq_print_text(struct seq_file *seq, void *offset); struct batadv_orig_ifinfo * @@ -72,6 +75,7 @@ batadv_orig_ifinfo_new(struct batadv_orig_node *orig_node, void batadv_orig_ifinfo_put(struct batadv_orig_ifinfo *orig_ifinfo); int batadv_orig_seq_print_text(struct seq_file *seq, void *offset); +int batadv_orig_dump(struct sk_buff *msg, struct netlink_callback *cb); int batadv_orig_hardif_seq_print_text(struct seq_file *seq, void *offset); int batadv_orig_hash_add_if(struct batadv_hard_iface *hard_iface, int max_if_num); diff --git a/net/batman-adv/packet.h b/net/batman-adv/packet.h index 6b011ff64dd8..6afc0b86950e 100644 --- a/net/batman-adv/packet.h +++ b/net/batman-adv/packet.h @@ -128,42 +128,6 @@ enum batadv_tt_data_flags { BATADV_TT_FULL_TABLE = BIT(4), }; -/** - * enum batadv_tt_client_flags - TT client specific flags - * @BATADV_TT_CLIENT_DEL: the client has to be deleted from the table - * @BATADV_TT_CLIENT_ROAM: the client roamed to/from another node and the new - * update telling its new real location has not been received/sent yet - * @BATADV_TT_CLIENT_WIFI: this client is connected through a wifi interface. - * This information is used by the "AP Isolation" feature - * @BATADV_TT_CLIENT_ISOLA: this client is considered "isolated". This - * information is used by the Extended Isolation feature - * @BATADV_TT_CLIENT_NOPURGE: this client should never be removed from the table - * @BATADV_TT_CLIENT_NEW: this client has been added to the local table but has - * not been announced yet - * @BATADV_TT_CLIENT_PENDING: this client is marked for removal but it is kept - * in the table for one more originator interval for consistency purposes - * @BATADV_TT_CLIENT_TEMP: this global client has been detected to be part of - * the network but no nnode has already announced it - * - * Bits from 0 to 7 are called _remote flags_ because they are sent on the wire. - * Bits from 8 to 15 are called _local flags_ because they are used for local - * computations only. - * - * Bits from 4 to 7 - a subset of remote flags - are ensured to be in sync with - * the other nodes in the network. To achieve this goal these flags are included - * in the TT CRC computation. - */ -enum batadv_tt_client_flags { - BATADV_TT_CLIENT_DEL = BIT(0), - BATADV_TT_CLIENT_ROAM = BIT(1), - BATADV_TT_CLIENT_WIFI = BIT(4), - BATADV_TT_CLIENT_ISOLA = BIT(5), - BATADV_TT_CLIENT_NOPURGE = BIT(8), - BATADV_TT_CLIENT_NEW = BIT(9), - BATADV_TT_CLIENT_PENDING = BIT(10), - BATADV_TT_CLIENT_TEMP = BIT(11), -}; - /** * enum batadv_vlan_flags - flags for the four MSB of any vlan ID field * @BATADV_VLAN_HAS_TAG: whether the field contains a valid vlan tag or not diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index 3d199478c405..7e8dc648b95a 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -74,11 +74,23 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, if (!orig_ifinfo) return; - rcu_read_lock(); - curr_router = rcu_dereference(orig_ifinfo->router); - if (curr_router && !kref_get_unless_zero(&curr_router->refcount)) - curr_router = NULL; - rcu_read_unlock(); + spin_lock_bh(&orig_node->neigh_list_lock); + /* curr_router used earlier may not be the current orig_ifinfo->router + * anymore because it was dereferenced outside of the neigh_list_lock + * protected region. After the new best neighbor has replace the current + * best neighbor the reference counter needs to decrease. Consequently, + * the code needs to ensure the curr_router variable contains a pointer + * to the replaced best neighbor. + */ + curr_router = rcu_dereference_protected(orig_ifinfo->router, true); + + /* increase refcount of new best neighbor */ + if (neigh_node) + kref_get(&neigh_node->refcount); + + rcu_assign_pointer(orig_ifinfo->router, neigh_node); + spin_unlock_bh(&orig_node->neigh_list_lock); + batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ if ((curr_router) && (!neigh_node)) { @@ -100,27 +112,6 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, curr_router->addr); } - if (curr_router) - batadv_neigh_node_put(curr_router); - - spin_lock_bh(&orig_node->neigh_list_lock); - /* curr_router used earlier may not be the current orig_ifinfo->router - * anymore because it was dereferenced outside of the neigh_list_lock - * protected region. After the new best neighbor has replace the current - * best neighbor the reference counter needs to decrease. Consequently, - * the code needs to ensure the curr_router variable contains a pointer - * to the replaced best neighbor. - */ - curr_router = rcu_dereference_protected(orig_ifinfo->router, true); - - /* increase refcount of new best neighbor */ - if (neigh_node) - kref_get(&neigh_node->refcount); - - rcu_assign_pointer(orig_ifinfo->router, neigh_node); - spin_unlock_bh(&orig_node->neigh_list_lock); - batadv_orig_ifinfo_put(orig_ifinfo); - /* decrease refcount of previous best neighbor */ if (curr_router) batadv_neigh_node_put(curr_router); diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index 6191159484df..8d4e1f578574 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -315,8 +315,7 @@ out: * * Wrap the given skb into a batman-adv unicast or unicast-4addr header * depending on whether BATADV_UNICAST or BATADV_UNICAST_4ADDR was supplied - * as packet_type. Then send this frame to the given orig_node and release a - * reference to this orig_node. + * as packet_type. Then send this frame to the given orig_node. * * Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise. */ @@ -370,8 +369,6 @@ int batadv_send_skb_unicast(struct batadv_priv *bat_priv, ret = NET_XMIT_SUCCESS; out: - if (orig_node) - batadv_orig_node_put(orig_node); if (ret == NET_XMIT_DROP) kfree_skb(skb); return ret; @@ -403,6 +400,7 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, struct ethhdr *ethhdr = (struct ethhdr *)skb->data; struct batadv_orig_node *orig_node; u8 *src, *dst; + int ret; src = ethhdr->h_source; dst = ethhdr->h_dest; @@ -414,8 +412,13 @@ int batadv_send_skb_via_tt_generic(struct batadv_priv *bat_priv, } orig_node = batadv_transtable_search(bat_priv, src, dst, vid); - return batadv_send_skb_unicast(bat_priv, skb, packet_type, - packet_subtype, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, packet_type, + packet_subtype, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } /** @@ -433,12 +436,25 @@ int batadv_send_skb_via_gw(struct batadv_priv *bat_priv, struct sk_buff *skb, unsigned short vid) { struct batadv_orig_node *orig_node; + int ret; orig_node = batadv_gw_get_selected_orig(bat_priv); - return batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, - BATADV_P_DATA, orig_node, vid); + ret = batadv_send_skb_unicast(bat_priv, skb, BATADV_UNICAST_4ADDR, + BATADV_P_DATA, orig_node, vid); + + if (orig_node) + batadv_orig_node_put(orig_node); + + return ret; } +/** + * batadv_forw_packet_free - free a forwarding packet + * @forw_packet: The packet to free + * + * This frees a forwarding packet and releases any resources it might + * have claimed. + */ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) { kfree_skb(forw_packet->skb); @@ -446,9 +462,73 @@ void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet) batadv_hardif_put(forw_packet->if_incoming); if (forw_packet->if_outgoing) batadv_hardif_put(forw_packet->if_outgoing); + if (forw_packet->queue_left) + atomic_inc(forw_packet->queue_left); kfree(forw_packet); } +/** + * batadv_forw_packet_alloc - allocate a forwarding packet + * @if_incoming: The (optional) if_incoming to be grabbed + * @if_outgoing: The (optional) if_outgoing to be grabbed + * @queue_left: The (optional) queue counter to decrease + * @bat_priv: The bat_priv for the mesh of this forw_packet + * + * Allocates a forwarding packet and tries to get a reference to the + * (optional) if_incoming, if_outgoing and queue_left. If queue_left + * is NULL then bat_priv is optional, too. + * + * Return: An allocated forwarding packet on success, NULL otherwise. + */ +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv) +{ + struct batadv_forw_packet *forw_packet; + const char *qname; + + if (queue_left && !batadv_atomic_dec_not_zero(queue_left)) { + qname = "unknown"; + + if (queue_left == &bat_priv->bcast_queue_left) + qname = "bcast"; + + if (queue_left == &bat_priv->batman_queue_left) + qname = "batman"; + + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, + "%s queue is full\n", qname); + + return NULL; + } + + forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + if (!forw_packet) + goto err; + + if (if_incoming) + kref_get(&if_incoming->refcount); + + if (if_outgoing) + kref_get(&if_outgoing->refcount); + + forw_packet->skb = NULL; + forw_packet->queue_left = queue_left; + forw_packet->if_incoming = if_incoming; + forw_packet->if_outgoing = if_outgoing; + forw_packet->num_packets = 0; + + return forw_packet; + +err: + if (queue_left) + atomic_inc(queue_left); + + return NULL; +} + static void _batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_forw_packet *forw_packet, @@ -487,24 +567,20 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, struct batadv_bcast_packet *bcast_packet; struct sk_buff *newskb; - if (!batadv_atomic_dec_not_zero(&bat_priv->bcast_queue_left)) { - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bcast packet queue full\n"); - goto out; - } - primary_if = batadv_primary_if_get_selected(bat_priv); if (!primary_if) - goto out_and_inc; - - forw_packet = kmalloc(sizeof(*forw_packet), GFP_ATOMIC); + goto err; + forw_packet = batadv_forw_packet_alloc(primary_if, NULL, + &bat_priv->bcast_queue_left, + bat_priv); + batadv_hardif_put(primary_if); if (!forw_packet) - goto out_and_inc; + goto err; newskb = skb_copy(skb, GFP_ATOMIC); if (!newskb) - goto packet_free; + goto err_packet_free; /* as we have a copy now, it is safe to decrease the TTL */ bcast_packet = (struct batadv_bcast_packet *)newskb->data; @@ -513,11 +589,6 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, skb_reset_mac_header(newskb); forw_packet->skb = newskb; - forw_packet->if_incoming = primary_if; - forw_packet->if_outgoing = NULL; - - /* how often did we send the bcast packet ? */ - forw_packet->num_packets = 0; INIT_DELAYED_WORK(&forw_packet->delayed_work, batadv_send_outstanding_bcast_packet); @@ -525,13 +596,9 @@ int batadv_add_bcast_packet_to_list(struct batadv_priv *bat_priv, _batadv_add_bcast_packet_to_list(bat_priv, forw_packet, delay); return NETDEV_TX_OK; -packet_free: - kfree(forw_packet); -out_and_inc: - atomic_inc(&bat_priv->bcast_queue_left); -out: - if (primary_if) - batadv_hardif_put(primary_if); +err_packet_free: + batadv_forw_packet_free(forw_packet); +err: return NETDEV_TX_BUSY; } @@ -592,7 +659,6 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) out: batadv_forw_packet_free(forw_packet); - atomic_inc(&bat_priv->bcast_queue_left); } void @@ -633,9 +699,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->bcast_queue_left); - batadv_forw_packet_free(forw_packet); } } @@ -663,9 +726,6 @@ batadv_purge_outstanding_packets(struct batadv_priv *bat_priv, if (pending) { hlist_del(&forw_packet->list); - if (!forw_packet->own) - atomic_inc(&bat_priv->batman_queue_left); - batadv_forw_packet_free(forw_packet); } } diff --git a/net/batman-adv/send.h b/net/batman-adv/send.h index 7cecb7563b45..999f78683d9e 100644 --- a/net/batman-adv/send.h +++ b/net/batman-adv/send.h @@ -28,6 +28,12 @@ struct sk_buff; void batadv_forw_packet_free(struct batadv_forw_packet *forw_packet); +struct batadv_forw_packet * +batadv_forw_packet_alloc(struct batadv_hard_iface *if_incoming, + struct batadv_hard_iface *if_outgoing, + atomic_t *queue_left, + struct batadv_priv *bat_priv); + int batadv_send_skb_to_orig(struct sk_buff *skb, struct batadv_orig_node *orig_node, struct batadv_hard_iface *recv_if); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 7527c0652dd5..49e16b6e0ba3 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -46,7 +47,6 @@ #include #include #include -#include #include "bat_algo.h" #include "bridge_loop_avoidance.h" @@ -57,6 +57,7 @@ #include "hard-interface.h" #include "multicast.h" #include "network-coding.h" +#include "originator.h" #include "packet.h" #include "send.h" #include "sysfs.h" @@ -377,6 +378,8 @@ dropped: dropped_freed: batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED); end: + if (mcast_single_orig) + batadv_orig_node_put(mcast_single_orig); if (primary_if) batadv_hardif_put(primary_if); return NETDEV_TX_OK; @@ -591,6 +594,7 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) } spin_lock_bh(&bat_priv->softif_vlan_list_lock); + kref_get(&vlan->refcount); hlist_add_head_rcu(&vlan->list, &bat_priv->softif_vlan_list); spin_unlock_bh(&bat_priv->softif_vlan_list_lock); @@ -601,6 +605,9 @@ int batadv_softif_create_vlan(struct batadv_priv *bat_priv, unsigned short vid) bat_priv->soft_iface->dev_addr, vid, BATADV_NULL_IFINDEX, BATADV_NO_MARK); + /* don't return reference to new softif_vlan */ + batadv_softif_vlan_put(vlan); + return 0; } @@ -746,34 +753,6 @@ static void batadv_set_lockdep_class(struct net_device *dev) netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL); } -/** - * batadv_softif_destroy_finish - cleans up the remains of a softif - * @work: work queue item - * - * Free the parts of the soft interface which can not be removed under - * rtnl lock (to prevent deadlock situations). - */ -static void batadv_softif_destroy_finish(struct work_struct *work) -{ - struct batadv_softif_vlan *vlan; - struct batadv_priv *bat_priv; - struct net_device *soft_iface; - - bat_priv = container_of(work, struct batadv_priv, - cleanup_work); - soft_iface = bat_priv->soft_iface; - - /* destroy the "untagged" VLAN */ - vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); - if (vlan) { - batadv_softif_destroy_vlan(bat_priv, vlan); - batadv_softif_vlan_put(vlan); - } - - batadv_sysfs_del_meshif(soft_iface); - unregister_netdev(soft_iface); -} - /** * batadv_softif_init_late - late stage initialization of soft interface * @dev: registered network device to modify @@ -791,7 +770,6 @@ static int batadv_softif_init_late(struct net_device *dev) bat_priv = netdev_priv(dev); bat_priv->soft_iface = dev; - INIT_WORK(&bat_priv->cleanup_work, batadv_softif_destroy_finish); /* batadv_interface_stats() needs to be available as soon as * register_netdevice() has been called @@ -1028,8 +1006,19 @@ struct net_device *batadv_softif_create(struct net *net, const char *name) void batadv_softif_destroy_sysfs(struct net_device *soft_iface) { struct batadv_priv *bat_priv = netdev_priv(soft_iface); + struct batadv_softif_vlan *vlan; - queue_work(batadv_event_workqueue, &bat_priv->cleanup_work); + ASSERT_RTNL(); + + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + + batadv_sysfs_del_meshif(soft_iface); + unregister_netdevice(soft_iface); } /** diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index fe9ca94ddee2..02d96f224c60 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -37,6 +37,7 @@ #include #include #include +#include #include "bridge_loop_avoidance.h" #include "distributed-arp-table.h" @@ -428,6 +429,13 @@ static ssize_t batadv_show_gw_mode(struct kobject *kobj, struct attribute *attr, struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); int bytes_written; + /* GW mode is not available if the routing algorithm in use does not + * implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + switch (atomic_read(&bat_priv->gw.mode)) { case BATADV_GW_MODE_CLIENT: bytes_written = sprintf(buff, "%s\n", @@ -455,6 +463,13 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, char *curr_gw_mode_str; int gw_mode_tmp = -1; + /* toggling GW mode is allowed only if the routing algorithm in use + * provides the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + if (buff[count - 1] == '\n') buff[count - 1] = '\0'; @@ -514,6 +529,50 @@ static ssize_t batadv_store_gw_mode(struct kobject *kobj, return count; } +static ssize_t batadv_show_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + /* GW selection class is not available if the routing algorithm in use + * does not implement the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -ENOENT; + + if (bat_priv->algo_ops->gw.show_sel_class) + return bat_priv->algo_ops->gw.show_sel_class(bat_priv, buff); + + return sprintf(buff, "%i\n", atomic_read(&bat_priv->gw.sel_class)); +} + +static ssize_t batadv_store_gw_sel_class(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct batadv_priv *bat_priv = batadv_kobj_to_batpriv(kobj); + + /* setting the GW selection class is allowed only if the routing + * algorithm in use implements the GW API + */ + if (!bat_priv->algo_ops->gw.get_best_gw_node || + !bat_priv->algo_ops->gw.is_eligible) + return -EINVAL; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (bat_priv->algo_ops->gw.store_sel_class) + return bat_priv->algo_ops->gw.store_sel_class(bat_priv, buff, + count); + + return __batadv_store_uint_attr(buff, count, 1, BATADV_TQ_MAX_VALUE, + batadv_post_gw_reselect, attr, + &bat_priv->gw.sel_class, + bat_priv->soft_iface); +} + static ssize_t batadv_show_gw_bwidth(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -625,8 +684,8 @@ BATADV_ATTR_SIF_UINT(orig_interval, orig_interval, S_IRUGO | S_IWUSR, 2 * BATADV_JITTER, INT_MAX, NULL); BATADV_ATTR_SIF_UINT(hop_penalty, hop_penalty, S_IRUGO | S_IWUSR, 0, BATADV_TQ_MAX_VALUE, NULL); -BATADV_ATTR_SIF_UINT(gw_sel_class, gw.sel_class, S_IRUGO | S_IWUSR, 1, - BATADV_TQ_MAX_VALUE, batadv_post_gw_reselect); +static BATADV_ATTR(gw_sel_class, S_IRUGO | S_IWUSR, batadv_show_gw_sel_class, + batadv_store_gw_sel_class); static BATADV_ATTR(gw_bandwidth, S_IRUGO | S_IWUSR, batadv_show_gw_bwidth, batadv_store_gw_bwidth); #ifdef CONFIG_BATMAN_ADV_MCAST @@ -712,6 +771,8 @@ rem_attr: for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; out: @@ -726,6 +787,8 @@ void batadv_sysfs_del_meshif(struct net_device *dev) for (bat_attr = batadv_mesh_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(bat_priv->mesh_obj, &((*bat_attr)->attr)); + kobject_uevent(bat_priv->mesh_obj, KOBJ_REMOVE); + kobject_del(bat_priv->mesh_obj); kobject_put(bat_priv->mesh_obj); bat_priv->mesh_obj = NULL; } @@ -781,6 +844,10 @@ rem_attr: for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; out: @@ -800,6 +867,10 @@ void batadv_sysfs_del_vlan(struct batadv_priv *bat_priv, for (bat_attr = batadv_vlan_attrs; *bat_attr; ++bat_attr) sysfs_remove_file(vlan->kobj, &((*bat_attr)->attr)); + if (vlan->kobj != bat_priv->mesh_obj) { + kobject_uevent(vlan->kobj, KOBJ_REMOVE); + kobject_del(vlan->kobj); + } kobject_put(vlan->kobj); vlan->kobj = NULL; } @@ -828,31 +899,31 @@ static ssize_t batadv_show_mesh_iface(struct kobject *kobj, return length; } -static ssize_t batadv_store_mesh_iface(struct kobject *kobj, - struct attribute *attr, char *buff, - size_t count) +/** + * batadv_store_mesh_iface_finish - store new hardif mesh_iface state + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @ifname: name of soft-interface to modify + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + * + * Return: 0 on success, 0 < on failure + */ +static int batadv_store_mesh_iface_finish(struct net_device *net_dev, + char ifname[IFNAMSIZ]) { - struct net_device *net_dev = batadv_kobj_to_netdev(kobj); struct net *net = dev_net(net_dev); struct batadv_hard_iface *hard_iface; - int status_tmp = -1; - int ret = count; + int status_tmp; + int ret = 0; + + ASSERT_RTNL(); hard_iface = batadv_hardif_get_by_netdev(net_dev); if (!hard_iface) - return count; + return 0; - if (buff[count - 1] == '\n') - buff[count - 1] = '\0'; - - if (strlen(buff) >= IFNAMSIZ) { - pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", - buff); - batadv_hardif_put(hard_iface); - return -EINVAL; - } - - if (strncmp(buff, "none", 4) == 0) + if (strncmp(ifname, "none", 4) == 0) status_tmp = BATADV_IF_NOT_IN_USE; else status_tmp = BATADV_IF_I_WANT_YOU; @@ -861,15 +932,13 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, goto out; if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, buff, IFNAMSIZ) == 0)) + (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) goto out; - rtnl_lock(); - if (status_tmp == BATADV_IF_NOT_IN_USE) { batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - goto unlock; + goto out; } /* if the interface already is in use */ @@ -877,15 +946,71 @@ static ssize_t batadv_store_mesh_iface(struct kobject *kobj, batadv_hardif_disable_interface(hard_iface, BATADV_IF_CLEANUP_AUTO); - ret = batadv_hardif_enable_interface(hard_iface, net, buff); - -unlock: - rtnl_unlock(); + ret = batadv_hardif_enable_interface(hard_iface, net, ifname); out: batadv_hardif_put(hard_iface); return ret; } +/** + * batadv_store_mesh_iface_work - store new hardif mesh_iface state + * @work: work queue item + * + * Changes the parts of the hard+soft interface which can not be modified under + * sysfs lock (to prevent deadlock situations). + */ +static void batadv_store_mesh_iface_work(struct work_struct *work) +{ + struct batadv_store_mesh_work *store_work; + int ret; + + store_work = container_of(work, struct batadv_store_mesh_work, work); + + rtnl_lock(); + ret = batadv_store_mesh_iface_finish(store_work->net_dev, + store_work->soft_iface_name); + rtnl_unlock(); + + if (ret < 0) + pr_err("Failed to store new mesh_iface state %s for %s: %d\n", + store_work->soft_iface_name, store_work->net_dev->name, + ret); + + dev_put(store_work->net_dev); + kfree(store_work); +} + +static ssize_t batadv_store_mesh_iface(struct kobject *kobj, + struct attribute *attr, char *buff, + size_t count) +{ + struct net_device *net_dev = batadv_kobj_to_netdev(kobj); + struct batadv_store_mesh_work *store_work; + + if (buff[count - 1] == '\n') + buff[count - 1] = '\0'; + + if (strlen(buff) >= IFNAMSIZ) { + pr_err("Invalid parameter for 'mesh_iface' setting received: interface name too long '%s'\n", + buff); + return -EINVAL; + } + + store_work = kmalloc(sizeof(*store_work), GFP_KERNEL); + if (!store_work) + return -ENOMEM; + + dev_hold(net_dev); + INIT_WORK(&store_work->work, batadv_store_mesh_iface_work); + store_work->net_dev = net_dev; + strlcpy(store_work->soft_iface_name, buff, + sizeof(store_work->soft_iface_name)); + + queue_work(batadv_event_workqueue, &store_work->work); + + return count; +} + static ssize_t batadv_show_iface_status(struct kobject *kobj, struct attribute *attr, char *buff) { @@ -1048,6 +1173,8 @@ out: void batadv_sysfs_del_hardif(struct kobject **hardif_obj) { + kobject_uevent(*hardif_obj, KOBJ_REMOVE); + kobject_del(*hardif_obj); kobject_put(*hardif_obj); *hardif_obj = NULL; } diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 7e6df7a4964a..7f663092f6de 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -22,12 +22,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -35,25 +37,39 @@ #include #include #include +#include #include #include #include +#include #include #include #include #include #include +#include +#include +#include +#include #include "bridge_loop_avoidance.h" #include "hard-interface.h" #include "hash.h" #include "log.h" #include "multicast.h" +#include "netlink.h" #include "originator.h" #include "packet.h" #include "soft-interface.h" #include "tvlv.h" +static struct kmem_cache *batadv_tl_cache __read_mostly; +static struct kmem_cache *batadv_tg_cache __read_mostly; +static struct kmem_cache *batadv_tt_orig_cache __read_mostly; +static struct kmem_cache *batadv_tt_change_cache __read_mostly; +static struct kmem_cache *batadv_tt_req_cache __read_mostly; +static struct kmem_cache *batadv_tt_roam_cache __read_mostly; + /* hash class keys */ static struct lock_class_key batadv_tt_local_hash_lock_class_key; static struct lock_class_key batadv_tt_global_hash_lock_class_key; @@ -204,6 +220,20 @@ batadv_tt_global_hash_find(struct batadv_priv *bat_priv, const u8 *addr, return tt_global_entry; } +/** + * batadv_tt_local_entry_free_rcu - free the tt_local_entry + * @rcu: rcu pointer of the tt_local_entry + */ +static void batadv_tt_local_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_local_entry *tt_local_entry; + + tt_local_entry = container_of(rcu, struct batadv_tt_local_entry, + common.rcu); + + kmem_cache_free(batadv_tl_cache, tt_local_entry); +} + /** * batadv_tt_local_entry_release - release tt_local_entry from lists and queue * for free after rcu grace period @@ -218,7 +248,7 @@ static void batadv_tt_local_entry_release(struct kref *ref) batadv_softif_vlan_put(tt_local_entry->vlan); - kfree_rcu(tt_local_entry, common.rcu); + call_rcu(&tt_local_entry->common.rcu, batadv_tt_local_entry_free_rcu); } /** @@ -233,6 +263,20 @@ batadv_tt_local_entry_put(struct batadv_tt_local_entry *tt_local_entry) batadv_tt_local_entry_release); } +/** + * batadv_tt_global_entry_free_rcu - free the tt_global_entry + * @rcu: rcu pointer of the tt_global_entry + */ +static void batadv_tt_global_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_global_entry *tt_global_entry; + + tt_global_entry = container_of(rcu, struct batadv_tt_global_entry, + common.rcu); + + kmem_cache_free(batadv_tg_cache, tt_global_entry); +} + /** * batadv_tt_global_entry_release - release tt_global_entry from lists and queue * for free after rcu grace period @@ -246,7 +290,8 @@ static void batadv_tt_global_entry_release(struct kref *ref) common.refcount); batadv_tt_global_del_orig_list(tt_global_entry); - kfree_rcu(tt_global_entry, common.rcu); + + call_rcu(&tt_global_entry->common.rcu, batadv_tt_global_entry_free_rcu); } /** @@ -383,6 +428,19 @@ static void batadv_tt_global_size_dec(struct batadv_orig_node *orig_node, batadv_tt_global_size_mod(orig_node, vid, -1); } +/** + * batadv_tt_orig_list_entry_free_rcu - free the orig_entry + * @rcu: rcu pointer of the orig_entry + */ +static void batadv_tt_orig_list_entry_free_rcu(struct rcu_head *rcu) +{ + struct batadv_tt_orig_list_entry *orig_entry; + + orig_entry = container_of(rcu, struct batadv_tt_orig_list_entry, rcu); + + kmem_cache_free(batadv_tt_orig_cache, orig_entry); +} + /** * batadv_tt_orig_list_entry_release - release tt orig entry from lists and * queue for free after rcu grace period @@ -396,7 +454,7 @@ static void batadv_tt_orig_list_entry_release(struct kref *ref) refcount); batadv_orig_node_put(orig_entry->orig_node); - kfree_rcu(orig_entry, rcu); + call_rcu(&orig_entry->rcu, batadv_tt_orig_list_entry_free_rcu); } /** @@ -426,7 +484,7 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, bool event_removed = false; bool del_op_requested, del_op_entry; - tt_change_node = kmalloc(sizeof(*tt_change_node), GFP_ATOMIC); + tt_change_node = kmem_cache_alloc(batadv_tt_change_cache, GFP_ATOMIC); if (!tt_change_node) return; @@ -467,8 +525,8 @@ static void batadv_tt_local_event(struct batadv_priv *bat_priv, continue; del: list_del(&entry->list); - kfree(entry); - kfree(tt_change_node); + kmem_cache_free(batadv_tt_change_cache, entry); + kmem_cache_free(batadv_tt_change_cache, tt_change_node); event_removed = true; goto unlock; } @@ -646,7 +704,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, goto out; } - tt_local = kmalloc(sizeof(*tt_local), GFP_ATOMIC); + tt_local = kmem_cache_alloc(batadv_tl_cache, GFP_ATOMIC); if (!tt_local) goto out; @@ -656,7 +714,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, net_ratelimited_function(batadv_info, soft_iface, "adding TT local entry %pM to non-existent VLAN %d\n", addr, BATADV_PRINT_VID(vid)); - kfree(tt_local); + kmem_cache_free(batadv_tl_cache, tt_local); tt_local = NULL; goto out; } @@ -676,7 +734,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (batadv_is_wifi_netdev(in_dev)) tt_local->common.flags |= BATADV_TT_CLIENT_WIFI; kref_init(&tt_local->common.refcount); - kref_get(&tt_local->common.refcount); tt_local->last_seen = jiffies; tt_local->common.added_at = tt_local->last_seen; tt_local->vlan = vlan; @@ -688,6 +745,7 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, is_multicast_ether_addr(addr)) tt_local->common.flags |= BATADV_TT_CLIENT_NOPURGE; + kref_get(&tt_local->common.refcount); hash_added = batadv_hash_add(bat_priv->tt.local_hash, batadv_compare_tt, batadv_choose_tt, &tt_local->common, &tt_local->common.hash_entry); @@ -959,7 +1017,7 @@ static void batadv_tt_tvlv_container_update(struct batadv_priv *bat_priv) tt_diff_entries_count++; } list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } spin_unlock_bh(&bat_priv->tt.changes_list_lock); @@ -989,6 +1047,7 @@ container_register: kfree(tt_data); } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset) { struct net_device *net_dev = (struct net_device *)seq->private; @@ -1056,6 +1115,165 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif + +/** + * batadv_tt_local_dump_entry - Dump one TT local entry into a message + * @msg :Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common) +{ + void *hdr; + struct batadv_softif_vlan *vlan; + struct batadv_tt_local_entry *local; + unsigned int last_seen_msecs; + u32 crc; + + local = container_of(common, struct batadv_tt_local_entry, common); + last_seen_msecs = jiffies_to_msecs(jiffies - local->last_seen); + + vlan = batadv_softif_vlan_get(bat_priv, common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_softif_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_LOCAL); + if (!hdr) + return -ENOBUFS; + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (!(common->flags & BATADV_TT_CLIENT_NOPURGE) && + nla_put_u32(msg, BATADV_ATTR_LAST_SEEN_MSECS, last_seen_msecs)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_local_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the local tt entries + * @idx_s: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_local_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_local_dump_entry(msg, portid, seq, bat_priv, + common)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + return 0; +} + +/** + * batadv_tt_local_dump - Dump TT local entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or 0 on success + */ +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.local_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_local_dump_bucket(msg, portid, cb->nlh->nlmsg_seq, + bat_priv, head, &idx)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + + return ret; +} static void batadv_tt_local_set_pending(struct batadv_priv *bat_priv, @@ -1259,7 +1477,7 @@ static void batadv_tt_changes_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(entry, safe, &bat_priv->tt.changes_list, list) { list_del(&entry->list); - kfree(entry); + kmem_cache_free(batadv_tt_change_cache, entry); } atomic_set(&bat_priv->tt.local_changes, 0); @@ -1341,7 +1559,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, goto out; } - orig_entry = kzalloc(sizeof(*orig_entry), GFP_ATOMIC); + orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); if (!orig_entry) goto out; @@ -1351,9 +1569,9 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; kref_init(&orig_entry->refcount); - kref_get(&orig_entry->refcount); spin_lock_bh(&tt_global->list_lock); + kref_get(&orig_entry->refcount); hlist_add_head_rcu(&orig_entry->list, &tt_global->orig_list); spin_unlock_bh(&tt_global->list_lock); @@ -1411,7 +1629,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, goto out; if (!tt_global_entry) { - tt_global_entry = kzalloc(sizeof(*tt_global_entry), GFP_ATOMIC); + tt_global_entry = kmem_cache_zalloc(batadv_tg_cache, + GFP_ATOMIC); if (!tt_global_entry) goto out; @@ -1428,13 +1647,13 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, if (flags & BATADV_TT_CLIENT_ROAM) tt_global_entry->roam_at = jiffies; kref_init(&common->refcount); - kref_get(&common->refcount); common->added_at = jiffies; INIT_HLIST_HEAD(&tt_global_entry->orig_list); atomic_set(&tt_global_entry->orig_list_count, 0); spin_lock_init(&tt_global_entry->list_lock); + kref_get(&common->refcount); hash_added = batadv_hash_add(bat_priv->tt.global_hash, batadv_compare_tt, batadv_choose_tt, common, @@ -1579,6 +1798,7 @@ batadv_transtable_best_orig(struct batadv_priv *bat_priv, return best_entry; } +#ifdef CONFIG_BATMAN_ADV_DEBUGFS /** * batadv_tt_global_print_entry - print all orig nodes who announce the address * for this global entry @@ -1702,6 +1922,219 @@ out: batadv_hardif_put(primary_if); return 0; } +#endif + +/** + * batadv_tt_global_dump_subentry - Dump all TT local entries into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @common: tt local & tt global common data + * @orig: Originator node announcing a non-mesh client + * @best: Is the best originator for the TT entry + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_tt_common_entry *common, + struct batadv_tt_orig_list_entry *orig, + bool best) +{ + void *hdr; + struct batadv_orig_node_vlan *vlan; + u8 last_ttvn; + u32 crc; + + vlan = batadv_orig_node_vlan_get(orig->orig_node, + common->vid); + if (!vlan) + return 0; + + crc = vlan->tt.crc; + + batadv_orig_node_vlan_put(vlan); + + hdr = genlmsg_put(msg, portid, seq, &batadv_netlink_family, + NLM_F_MULTI, + BATADV_CMD_GET_TRANSTABLE_GLOBAL); + if (!hdr) + return -ENOBUFS; + + last_ttvn = atomic_read(&orig->orig_node->last_ttvn); + + if (nla_put(msg, BATADV_ATTR_TT_ADDRESS, ETH_ALEN, common->addr) || + nla_put(msg, BATADV_ATTR_ORIG_ADDRESS, ETH_ALEN, + orig->orig_node->orig) || + nla_put_u8(msg, BATADV_ATTR_TT_TTVN, orig->ttvn) || + nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || + nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || + nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + goto nla_put_failure; + + if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + return 0; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + return -EMSGSIZE; +} + +/** + * batadv_tt_global_dump_entry - Dump one TT global entry into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @common: tt local & tt global common data + * @sub_s: Number of entries to skip + * + * This function assumes the caller holds rcu_read_lock(). + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_entry(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct batadv_tt_common_entry *common, int *sub_s) +{ + struct batadv_tt_orig_list_entry *orig_entry, *best_entry; + struct batadv_tt_global_entry *global; + struct hlist_head *head; + int sub = 0; + bool best; + + global = container_of(common, struct batadv_tt_global_entry, common); + best_entry = batadv_transtable_best_orig(bat_priv, global); + head = &global->orig_list; + + hlist_for_each_entry_rcu(orig_entry, head, list) { + if (sub++ < *sub_s) + continue; + + best = (orig_entry == best_entry); + + if (batadv_tt_global_dump_subentry(msg, portid, seq, common, + orig_entry, best)) { + *sub_s = sub - 1; + return -EMSGSIZE; + } + } + + *sub_s = 0; + return 0; +} + +/** + * batadv_tt_global_dump_bucket - Dump one TT local bucket into a message + * @msg: Netlink message to dump into + * @portid: Port making netlink request + * @seq: Sequence number of netlink message + * @bat_priv: The bat priv with all the soft interface information + * @head: Pointer to the list containing the global tt entries + * @idx_s: Number of entries to skip + * @sub: Number of entries to skip + * + * Return: Error code, or 0 on success + */ +static int +batadv_tt_global_dump_bucket(struct sk_buff *msg, u32 portid, u32 seq, + struct batadv_priv *bat_priv, + struct hlist_head *head, int *idx_s, int *sub) +{ + struct batadv_tt_common_entry *common; + int idx = 0; + + rcu_read_lock(); + hlist_for_each_entry_rcu(common, head, hash_entry) { + if (idx++ < *idx_s) + continue; + + if (batadv_tt_global_dump_entry(msg, portid, seq, bat_priv, + common, sub)) { + rcu_read_unlock(); + *idx_s = idx - 1; + return -EMSGSIZE; + } + } + rcu_read_unlock(); + + *idx_s = 0; + *sub = 0; + return 0; +} + +/** + * batadv_tt_global_dump - Dump TT global entries into a message + * @msg: Netlink message to dump into + * @cb: Parameters from query + * + * Return: Error code, or length of message on success + */ +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb) +{ + struct net *net = sock_net(cb->skb->sk); + struct net_device *soft_iface; + struct batadv_priv *bat_priv; + struct batadv_hard_iface *primary_if = NULL; + struct batadv_hashtable *hash; + struct hlist_head *head; + int ret; + int ifindex; + int bucket = cb->args[0]; + int idx = cb->args[1]; + int sub = cb->args[2]; + int portid = NETLINK_CB(cb->skb).portid; + + ifindex = batadv_netlink_get_ifindex(cb->nlh, BATADV_ATTR_MESH_IFINDEX); + if (!ifindex) + return -EINVAL; + + soft_iface = dev_get_by_index(net, ifindex); + if (!soft_iface || !batadv_softif_is_valid(soft_iface)) { + ret = -ENODEV; + goto out; + } + + bat_priv = netdev_priv(soft_iface); + + primary_if = batadv_primary_if_get_selected(bat_priv); + if (!primary_if || primary_if->if_status != BATADV_IF_ACTIVE) { + ret = -ENOENT; + goto out; + } + + hash = bat_priv->tt.global_hash; + + while (bucket < hash->size) { + head = &hash->table[bucket]; + + if (batadv_tt_global_dump_bucket(msg, portid, + cb->nlh->nlmsg_seq, bat_priv, + head, &idx, &sub)) + break; + + bucket++; + } + + ret = msg->len; + + out: + if (primary_if) + batadv_hardif_put(primary_if); + if (soft_iface) + dev_put(soft_iface); + + cb->args[0] = bucket; + cb->args[1] = idx; + cb->args[2] = sub; + + return ret; +} /** * _batadv_tt_global_del_orig_entry - remove and free an orig_entry @@ -2280,7 +2713,7 @@ static void batadv_tt_req_node_release(struct kref *ref) tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); - kfree(tt_req_node); + kmem_cache_free(batadv_tt_req_cache, tt_req_node); } /** @@ -2367,7 +2800,7 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, goto unlock; } - tt_req_node = kmalloc(sizeof(*tt_req_node), GFP_ATOMIC); + tt_req_node = kmem_cache_alloc(batadv_tt_req_cache, GFP_ATOMIC); if (!tt_req_node) goto unlock; @@ -3104,7 +3537,7 @@ static void batadv_tt_roam_list_free(struct batadv_priv *bat_priv) list_for_each_entry_safe(node, safe, &bat_priv->tt.roam_list, list) { list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); @@ -3121,7 +3554,7 @@ static void batadv_tt_roam_purge(struct batadv_priv *bat_priv) continue; list_del(&node->list); - kfree(node); + kmem_cache_free(batadv_tt_roam_cache, node); } spin_unlock_bh(&bat_priv->tt.roam_list_lock); } @@ -3162,7 +3595,8 @@ static bool batadv_tt_check_roam_count(struct batadv_priv *bat_priv, u8 *client) } if (!ret) { - tt_roam_node = kmalloc(sizeof(*tt_roam_node), GFP_ATOMIC); + tt_roam_node = kmem_cache_alloc(batadv_tt_roam_cache, + GFP_ATOMIC); if (!tt_roam_node) goto unlock; @@ -3865,3 +4299,85 @@ bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, return ret; } + +/** + * batadv_tt_cache_init - Initialize tt memory object cache + * + * Return: 0 on success or negative error number in case of failure. + */ +int __init batadv_tt_cache_init(void) +{ + size_t tl_size = sizeof(struct batadv_tt_local_entry); + size_t tg_size = sizeof(struct batadv_tt_global_entry); + size_t tt_orig_size = sizeof(struct batadv_tt_orig_list_entry); + size_t tt_change_size = sizeof(struct batadv_tt_change_node); + size_t tt_req_size = sizeof(struct batadv_tt_req_node); + size_t tt_roam_size = sizeof(struct batadv_tt_roam_node); + + batadv_tl_cache = kmem_cache_create("batadv_tl_cache", tl_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tl_cache) + return -ENOMEM; + + batadv_tg_cache = kmem_cache_create("batadv_tg_cache", tg_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tg_cache) + goto err_tt_tl_destroy; + + batadv_tt_orig_cache = kmem_cache_create("batadv_tt_orig_cache", + tt_orig_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_orig_cache) + goto err_tt_tg_destroy; + + batadv_tt_change_cache = kmem_cache_create("batadv_tt_change_cache", + tt_change_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_change_cache) + goto err_tt_orig_destroy; + + batadv_tt_req_cache = kmem_cache_create("batadv_tt_req_cache", + tt_req_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_req_cache) + goto err_tt_change_destroy; + + batadv_tt_roam_cache = kmem_cache_create("batadv_tt_roam_cache", + tt_roam_size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!batadv_tt_roam_cache) + goto err_tt_req_destroy; + + return 0; + +err_tt_req_destroy: + kmem_cache_destroy(batadv_tt_req_cache); + batadv_tt_req_cache = NULL; +err_tt_change_destroy: + kmem_cache_destroy(batadv_tt_change_cache); + batadv_tt_change_cache = NULL; +err_tt_orig_destroy: + kmem_cache_destroy(batadv_tt_orig_cache); + batadv_tt_orig_cache = NULL; +err_tt_tg_destroy: + kmem_cache_destroy(batadv_tg_cache); + batadv_tg_cache = NULL; +err_tt_tl_destroy: + kmem_cache_destroy(batadv_tl_cache); + batadv_tl_cache = NULL; + + return -ENOMEM; +} + +/** + * batadv_tt_cache_destroy - Destroy tt memory object cache + */ +void batadv_tt_cache_destroy(void) +{ + kmem_cache_destroy(batadv_tl_cache); + kmem_cache_destroy(batadv_tg_cache); + kmem_cache_destroy(batadv_tt_orig_cache); + kmem_cache_destroy(batadv_tt_change_cache); + kmem_cache_destroy(batadv_tt_req_cache); + kmem_cache_destroy(batadv_tt_roam_cache); +} diff --git a/net/batman-adv/translation-table.h b/net/batman-adv/translation-table.h index 7c7e2c006bfe..783fdba84db2 100644 --- a/net/batman-adv/translation-table.h +++ b/net/batman-adv/translation-table.h @@ -22,8 +22,10 @@ #include +struct netlink_callback; struct net_device; struct seq_file; +struct sk_buff; int batadv_tt_init(struct batadv_priv *bat_priv); bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, @@ -33,6 +35,8 @@ u16 batadv_tt_local_remove(struct batadv_priv *bat_priv, const char *message, bool roaming); int batadv_tt_local_seq_print_text(struct seq_file *seq, void *offset); int batadv_tt_global_seq_print_text(struct seq_file *seq, void *offset); +int batadv_tt_local_dump(struct sk_buff *msg, struct netlink_callback *cb); +int batadv_tt_global_dump(struct sk_buff *msg, struct netlink_callback *cb); void batadv_tt_global_del_orig(struct batadv_priv *bat_priv, struct batadv_orig_node *orig_node, s32 match_vid, const char *message); @@ -59,4 +63,7 @@ bool batadv_tt_add_temporary_global_entry(struct batadv_priv *bat_priv, bool batadv_tt_global_is_isolated(struct batadv_priv *bat_priv, const u8 *addr, unsigned short vid); +int batadv_tt_cache_init(void); +void batadv_tt_cache_destroy(void); + #endif /* _NET_BATMAN_ADV_TRANSLATION_TABLE_H_ */ diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c index 3d1cf0fb112d..77654f055f24 100644 --- a/net/batman-adv/tvlv.c +++ b/net/batman-adv/tvlv.c @@ -257,8 +257,13 @@ void batadv_tvlv_container_register(struct batadv_priv *bat_priv, spin_lock_bh(&bat_priv->tvlv.container_list_lock); tvlv_old = batadv_tvlv_container_get(bat_priv, type, version); batadv_tvlv_container_remove(bat_priv, tvlv_old); + + kref_get(&tvlv_new->refcount); hlist_add_head(&tvlv_new->list, &bat_priv->tvlv.container_list); spin_unlock_bh(&bat_priv->tvlv.container_list_lock); + + /* don't return reference to new tvlv_container */ + batadv_tvlv_container_put(tvlv_new); } /** @@ -542,8 +547,12 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv, INIT_HLIST_NODE(&tvlv_handler->list); spin_lock_bh(&bat_priv->tvlv.handler_list_lock); + kref_get(&tvlv_handler->refcount); hlist_add_head_rcu(&tvlv_handler->list, &bat_priv->tvlv.handler_list); spin_unlock_bh(&bat_priv->tvlv.handler_list_lock); + + /* don't return reference to new tvlv_handler */ + batadv_tvlv_handler_put(tvlv_handler); } /** diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index a64522c3b45d..b3dd1a381aad 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -28,6 +28,7 @@ #include #include #include +#include #include /* for linux/wait.h */ #include #include @@ -132,7 +133,6 @@ struct batadv_hard_iface_bat_v { * @rcu: struct used for freeing in an RCU-safe manner * @bat_iv: per hard-interface B.A.T.M.A.N. IV data * @bat_v: per hard-interface B.A.T.M.A.N. V data - * @cleanup_work: work queue callback item for hard-interface deinit * @debug_dir: dentry for nc subdir in batman-adv directory in debugfs * @neigh_list: list of unique single hop neighbors via this interface * @neigh_list_lock: lock protecting neigh_list @@ -152,7 +152,6 @@ struct batadv_hard_iface { #ifdef CONFIG_BATMAN_ADV_BATMAN_V struct batadv_hard_iface_bat_v bat_v; #endif - struct work_struct cleanup_work; struct dentry *debug_dir; struct hlist_head neigh_list; /* neigh_list_lock protects: neigh_list */ @@ -1015,7 +1014,6 @@ struct batadv_priv_bat_v { * @forw_bcast_list_lock: lock protecting forw_bcast_list * @tp_list_lock: spinlock protecting @tp_list * @orig_work: work queue callback item for orig node purging - * @cleanup_work: work queue callback item for soft-interface deinit * @primary_if: one of the hard-interfaces assigned to this mesh interface * becomes the primary interface * @algo_ops: routing algorithm used by this mesh interface @@ -1074,7 +1072,6 @@ struct batadv_priv { spinlock_t tp_list_lock; /* protects tp_list */ atomic_t tp_num; struct delayed_work orig_work; - struct work_struct cleanup_work; struct batadv_hard_iface __rcu *primary_if; /* rcu protected pointer */ struct batadv_algo_ops *algo_ops; struct hlist_head softif_vlan_list; @@ -1379,6 +1376,7 @@ struct batadv_skb_cb { * locally generated packet * @if_outgoing: packet where the packet should be sent to, or NULL if * unspecified + * @queue_left: The queue (counter) this packet was applied to */ struct batadv_forw_packet { struct hlist_node list; @@ -1391,11 +1389,13 @@ struct batadv_forw_packet { struct delayed_work delayed_work; struct batadv_hard_iface *if_incoming; struct batadv_hard_iface *if_outgoing; + atomic_t *queue_left; }; /** * struct batadv_algo_iface_ops - mesh algorithm callbacks (interface specific) * @activate: start routing mechanisms when hard-interface is brought up + * (optional) * @enable: init routing info when hard-interface is enabled * @disable: de-init routing info when hard-interface is disabled * @update_mac: (re-)init mac addresses of the protocol information @@ -1413,11 +1413,13 @@ struct batadv_algo_iface_ops { /** * struct batadv_algo_neigh_ops - mesh algorithm callbacks (neighbour specific) * @hardif_init: called on creation of single hop entry + * (optional) * @cmp: compare the metrics of two neighbors for their respective outgoing * interfaces * @is_similar_or_better: check if neigh1 is equally similar or better than * neigh2 for their respective outgoing interface from the metric prospective * @print: print the single hop neighbor list (optional) + * @dump: dump neighbors to a netlink socket (optional) */ struct batadv_algo_neigh_ops { void (*hardif_init)(struct batadv_hardif_neigh_node *neigh); @@ -1429,26 +1431,64 @@ struct batadv_algo_neigh_ops { struct batadv_hard_iface *if_outgoing1, struct batadv_neigh_node *neigh2, struct batadv_hard_iface *if_outgoing2); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq); +#endif + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); }; /** * struct batadv_algo_orig_ops - mesh algorithm callbacks (originator specific) * @free: free the resources allocated by the routing algorithm for an orig_node - * object + * object (optional) * @add_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to a new hard-interface being added into the mesh + * orig_node due to a new hard-interface being added into the mesh (optional) * @del_if: ask the routing algorithm to apply the needed changes to the - * orig_node due to an hard-interface being removed from the mesh + * orig_node due to an hard-interface being removed from the mesh (optional) * @print: print the originator table (optional) + * @dump: dump originators to a netlink socket (optional) */ struct batadv_algo_orig_ops { void (*free)(struct batadv_orig_node *orig_node); int (*add_if)(struct batadv_orig_node *orig_node, int max_if_num); int (*del_if)(struct batadv_orig_node *orig_node, int max_if_num, int del_if_num); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS void (*print)(struct batadv_priv *priv, struct seq_file *seq, struct batadv_hard_iface *hard_iface); +#endif + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv, + struct batadv_hard_iface *hard_iface); +}; + +/** + * struct batadv_algo_gw_ops - mesh algorithm callbacks (GW specific) + * @store_sel_class: parse and stores a new GW selection class (optional) + * @show_sel_class: prints the current GW selection class (optional) + * @get_best_gw_node: select the best GW from the list of available nodes + * (optional) + * @is_eligible: check if a newly discovered GW is a potential candidate for + * the election as best GW (optional) + * @print: print the gateway table (optional) + * @dump: dump gateways to a netlink socket (optional) + */ +struct batadv_algo_gw_ops { + ssize_t (*store_sel_class)(struct batadv_priv *bat_priv, char *buff, + size_t count); + ssize_t (*show_sel_class)(struct batadv_priv *bat_priv, char *buff); + struct batadv_gw_node *(*get_best_gw_node) + (struct batadv_priv *bat_priv); + bool (*is_eligible)(struct batadv_priv *bat_priv, + struct batadv_orig_node *curr_gw_orig, + struct batadv_orig_node *orig_node); +#ifdef CONFIG_BATMAN_ADV_DEBUGFS + void (*print)(struct batadv_priv *bat_priv, struct seq_file *seq); +#endif + void (*dump)(struct sk_buff *msg, struct netlink_callback *cb, + struct batadv_priv *priv); }; /** @@ -1458,6 +1498,7 @@ struct batadv_algo_orig_ops { * @iface: callbacks related to interface handling * @neigh: callbacks related to neighbors handling * @orig: callbacks related to originators handling + * @gw: callbacks related to GW mode */ struct batadv_algo_ops { struct hlist_node list; @@ -1465,6 +1506,7 @@ struct batadv_algo_ops { struct batadv_algo_iface_ops iface; struct batadv_algo_neigh_ops neigh; struct batadv_algo_orig_ops orig; + struct batadv_algo_gw_ops gw; }; /** @@ -1564,4 +1606,17 @@ enum batadv_tvlv_handler_flags { BATADV_TVLV_HANDLER_OGM_CALLED = BIT(2), }; +/** + * struct batadv_store_mesh_work - Work queue item to detach add/del interface + * from sysfs locks + * @net_dev: netdevice to add/remove to/from batman-adv soft-interface + * @soft_iface_name: name of soft-interface to modify + * @work: work queue item + */ +struct batadv_store_mesh_work { + struct net_device *net_dev; + char soft_iface_name[IFNAMSIZ]; + struct work_struct work; +}; + #endif /* _NET_BATMAN_ADV_TYPES_H_ */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index 0b5f729d08d2..1aff2da9bc74 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -26,11 +26,13 @@ #include #include +#include #include #include #include +#include "leds.h" #include "selftest.h" /* Bluetooth sockets */ @@ -712,13 +714,16 @@ static struct net_proto_family bt_sock_family_ops = { struct dentry *bt_debugfs; EXPORT_SYMBOL_GPL(bt_debugfs); +#define VERSION __stringify(BT_SUBSYS_VERSION) "." \ + __stringify(BT_SUBSYS_REVISION) + static int __init bt_init(void) { int err; sock_skb_cb_check_size(sizeof(struct bt_skb_cb)); - BT_INFO("Core ver %s", BT_SUBSYS_VERSION); + BT_INFO("Core ver %s", VERSION); err = bt_selftest(); if (err < 0) @@ -726,6 +731,8 @@ static int __init bt_init(void) bt_debugfs = debugfs_create_dir("bluetooth", NULL); + bt_leds_init(); + err = bt_sysfs_init(); if (err < 0) return err; @@ -785,6 +792,8 @@ static void __exit bt_exit(void) bt_sysfs_cleanup(); + bt_leds_cleanup(); + debugfs_remove_recursive(bt_debugfs); } @@ -792,7 +801,7 @@ subsys_initcall(bt_init); module_exit(bt_exit); MODULE_AUTHOR("Marcel Holtmann "); -MODULE_DESCRIPTION("Bluetooth Core ver " BT_SUBSYS_VERSION); -MODULE_VERSION(BT_SUBSYS_VERSION); +MODULE_DESCRIPTION("Bluetooth Core ver " VERSION); +MODULE_VERSION(VERSION); MODULE_LICENSE("GPL"); MODULE_ALIAS_NETPROTO(PF_BLUETOOTH); diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index ddf8432fe8fb..3ac89e9ace71 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -1562,6 +1562,7 @@ int hci_dev_do_close(struct hci_dev *hdev) auto_off = hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF); if (!auto_off && hdev->dev_type == HCI_PRIMARY && + !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && hci_dev_test_flag(hdev, HCI_MGMT)) __mgmt_power_off(hdev); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b0e23dfc5c34..c8135680c43e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -971,14 +971,14 @@ void __hci_req_enable_advertising(struct hci_request *req) hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); } -static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +static u8 append_local_name(struct hci_dev *hdev, u8 *ptr, u8 ad_len) { - u8 ad_len = 0; size_t name_len; + int max_len; + max_len = HCI_MAX_AD_LENGTH - ad_len - 2; name_len = strlen(hdev->dev_name); - if (name_len > 0) { - size_t max_len = HCI_MAX_AD_LENGTH - ad_len - 2; + if (name_len > 0 && max_len > 0) { if (name_len > max_len) { name_len = max_len; @@ -997,22 +997,42 @@ static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) return ad_len; } +static u8 create_default_scan_rsp_data(struct hci_dev *hdev, u8 *ptr) +{ + return append_local_name(hdev, ptr, 0); +} + static u8 create_instance_scan_rsp_data(struct hci_dev *hdev, u8 instance, u8 *ptr) { struct adv_info *adv_instance; + u32 instance_flags; + u8 scan_rsp_len = 0; adv_instance = hci_find_adv_instance(hdev, instance); if (!adv_instance) return 0; - /* TODO: Set the appropriate entries based on advertising instance flags - * here once flags other than 0 are supported. - */ + instance_flags = adv_instance->flags; + + if ((instance_flags & MGMT_ADV_FLAG_APPEARANCE) && hdev->appearance) { + ptr[0] = 3; + ptr[1] = EIR_APPEARANCE; + put_unaligned_le16(hdev->appearance, ptr + 2); + scan_rsp_len += 4; + ptr += 4; + } + memcpy(ptr, adv_instance->scan_rsp_data, adv_instance->scan_rsp_len); - return adv_instance->scan_rsp_len; + scan_rsp_len += adv_instance->scan_rsp_len; + ptr += adv_instance->scan_rsp_len; + + if (instance_flags & MGMT_ADV_FLAG_LOCAL_NAME) + scan_rsp_len = append_local_name(hdev, ptr, scan_rsp_len); + + return scan_rsp_len; } void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) @@ -1194,7 +1214,7 @@ static void adv_timeout_expire(struct work_struct *work) hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, instance, false); + hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); @@ -1284,8 +1304,9 @@ static void cancel_adv_timeout(struct hci_dev *hdev) * setting. * - force == false: Only instances that have a timeout will be removed. */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force) +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force) { struct adv_info *adv_instance, *n, *next_instance = NULL; int err; @@ -1311,7 +1332,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, rem_inst = adv_instance->instance; err = hci_remove_adv_instance(hdev, rem_inst); if (!err) - mgmt_advertising_removed(NULL, hdev, rem_inst); + mgmt_advertising_removed(sk, hdev, rem_inst); } } else { adv_instance = hci_find_adv_instance(hdev, instance); @@ -1325,7 +1346,7 @@ void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, err = hci_remove_adv_instance(hdev, instance); if (!err) - mgmt_advertising_removed(NULL, hdev, instance); + mgmt_advertising_removed(sk, hdev, instance); } } @@ -1716,7 +1737,7 @@ void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, * function. To be safe hard-code one of the * values that's suitable for SCO. */ - rej.reason = HCI_ERROR_REMOTE_LOW_RESOURCES; + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, sizeof(rej), &rej); diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index b2d044bdc732..ac1e11006f38 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -73,8 +73,9 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct hci_request *req, - u8 instance, bool force); +void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, + struct hci_request *req, u8 instance, + bool force); void __hci_req_update_class(struct hci_request *req); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 96f04b7b9556..48f9471e7c85 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -38,6 +39,8 @@ static LIST_HEAD(mgmt_chan_list); static DEFINE_MUTEX(mgmt_chan_list_lock); +static DEFINE_IDA(sock_cookie_ida); + static atomic_t monitor_promisc = ATOMIC_INIT(0); /* ----- HCI socket interface ----- */ @@ -52,6 +55,8 @@ struct hci_pinfo { __u32 cmsg_mask; unsigned short channel; unsigned long flags; + __u32 cookie; + char comm[TASK_COMM_LEN]; }; void hci_sock_set_flag(struct sock *sk, int nr) @@ -74,6 +79,38 @@ unsigned short hci_sock_get_channel(struct sock *sk) return hci_pi(sk)->channel; } +u32 hci_sock_get_cookie(struct sock *sk) +{ + return hci_pi(sk)->cookie; +} + +static bool hci_sock_gen_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (!id) { + id = ida_simple_get(&sock_cookie_ida, 1, 0, GFP_KERNEL); + if (id < 0) + id = 0xffffffff; + + hci_pi(sk)->cookie = id; + get_task_comm(hci_pi(sk)->comm, current); + return true; + } + + return false; +} + +static void hci_sock_free_cookie(struct sock *sk) +{ + int id = hci_pi(sk)->cookie; + + if (id) { + hci_pi(sk)->cookie = 0xffffffff; + ida_simple_remove(&sock_cookie_ida, id); + } +} + static inline int hci_test_bit(int nr, const void *addr) { return *((const __u32 *) addr + (nr >> 5)) & ((__u32) 1 << (nr & 31)); @@ -305,6 +342,60 @@ void hci_send_to_monitor(struct hci_dev *hdev, struct sk_buff *skb) kfree_skb(skb_copy); } +void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, + void *data, u16 data_len, ktime_t tstamp, + int flag, struct sock *skip_sk) +{ + struct sock *sk; + __le16 index; + + if (hdev) + index = cpu_to_le16(hdev->id); + else + index = cpu_to_le16(MGMT_INDEX_NONE); + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + if (hci_pi(sk)->channel != HCI_CHANNEL_CONTROL) + continue; + + /* Ignore socket without the flag set */ + if (!hci_sock_test_flag(sk, flag)) + continue; + + /* Skip the original socket */ + if (sk == skip_sk) + continue; + + skb = bt_skb_alloc(6 + data_len, GFP_ATOMIC); + if (!skb) + continue; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(event, skb_put(skb, 2)); + + if (data) + memcpy(skb_put(skb, data_len), data, data_len); + + skb->tstamp = tstamp; + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) { struct hci_mon_hdr *hdr; @@ -384,6 +475,129 @@ static struct sk_buff *create_monitor_event(struct hci_dev *hdev, int event) return skb; } +static struct sk_buff *create_monitor_ctrl_open(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + u16 format; + u8 ver[3]; + u32 flags; + + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + format = 0x0000; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; + case HCI_CHANNEL_USER: + format = 0x0001; + ver[0] = BT_SUBSYS_VERSION; + put_unaligned_le16(BT_SUBSYS_REVISION, ver + 1); + break; + case HCI_CHANNEL_CONTROL: + format = 0x0002; + mgmt_fill_version_info(ver); + break; + default: + /* No message for unsupported format */ + return NULL; + } + + skb = bt_skb_alloc(14 + TASK_COMM_LEN , GFP_ATOMIC); + if (!skb) + return NULL; + + flags = hci_sock_test_flag(sk, HCI_SOCK_TRUSTED) ? 0x1 : 0x0; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(format, skb_put(skb, 2)); + memcpy(skb_put(skb, sizeof(ver)), ver, sizeof(ver)); + put_unaligned_le32(flags, skb_put(skb, 4)); + *skb_put(skb, 1) = TASK_COMM_LEN; + memcpy(skb_put(skb, TASK_COMM_LEN), hci_pi(sk)->comm, TASK_COMM_LEN); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_OPEN); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_close(struct sock *sk) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + /* No message needed when cookie is not present */ + if (!hci_pi(sk)->cookie) + return NULL; + + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: + case HCI_CHANNEL_CONTROL: + break; + default: + /* No message for unsupported format */ + return NULL; + } + + skb = bt_skb_alloc(4, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_CLOSE); + if (hci_pi(sk)->hdev) + hdr->index = cpu_to_le16(hci_pi(sk)->hdev->id); + else + hdr->index = cpu_to_le16(HCI_DEV_NONE); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + +static struct sk_buff *create_monitor_ctrl_command(struct sock *sk, u16 index, + u16 opcode, u16 len, + const void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(hci_pi(sk)->cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_COMMAND); + hdr->index = cpu_to_le16(index); + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + static void __printf(2, 3) send_monitor_note(struct sock *sk, const char *fmt, ...) { @@ -458,6 +672,26 @@ static void send_monitor_replay(struct sock *sk) read_unlock(&hci_dev_list_lock); } +static void send_monitor_control_replay(struct sock *mon_sk) +{ + struct sock *sk; + + read_lock(&hci_sk_list.lock); + + sk_for_each(sk, &hci_sk_list.head) { + struct sk_buff *skb; + + skb = create_monitor_ctrl_open(sk); + if (!skb) + continue; + + if (sock_queue_rcv_skb(mon_sk, skb)) + kfree_skb(skb); + } + + read_unlock(&hci_sk_list.lock); +} + /* Generate internal stack event */ static void hci_si_event(struct hci_dev *hdev, int type, int dlen, void *data) { @@ -585,6 +819,7 @@ static int hci_sock_release(struct socket *sock) { struct sock *sk = sock->sk; struct hci_dev *hdev; + struct sk_buff *skb; BT_DBG("sock %p sk %p", sock, sk); @@ -593,8 +828,24 @@ static int hci_sock_release(struct socket *sock) hdev = hci_pi(sk)->hdev; - if (hci_pi(sk)->channel == HCI_CHANNEL_MONITOR) + switch (hci_pi(sk)->channel) { + case HCI_CHANNEL_MONITOR: atomic_dec(&monitor_promisc); + break; + case HCI_CHANNEL_RAW: + case HCI_CHANNEL_USER: + case HCI_CHANNEL_CONTROL: + /* Send event to monitor */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + hci_sock_free_cookie(sk); + break; + } bt_sock_unlink(&hci_sk_list, sk); @@ -721,6 +972,27 @@ static int hci_sock_ioctl(struct socket *sock, unsigned int cmd, goto done; } + /* When calling an ioctl on an unbound raw socket, then ensure + * that the monitor gets informed. Ensure that the resulting event + * is only send once by checking if the cookie exists or not. The + * socket cookie will be only ever generated once for the lifetime + * of a given socket. + */ + if (hci_sock_gen_cookie(sk)) { + struct sk_buff *skb; + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + release_sock(sk); switch (cmd) { @@ -784,6 +1056,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, struct sockaddr_hci haddr; struct sock *sk = sock->sk; struct hci_dev *hdev = NULL; + struct sk_buff *skb; int len, err = 0; BT_DBG("sock %p sk %p", sock, sk); @@ -822,7 +1095,35 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, atomic_inc(&hdev->promisc); } + hci_pi(sk)->channel = haddr.hci_channel; + + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * then there has been already an ioctl issued against + * an unbound socket and with that triggerd an open + * notification. Send a close notification first to + * allow the state transition to bounded. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + if (capable(CAP_NET_ADMIN)) + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->hdev = hdev; + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } break; case HCI_CHANNEL_USER: @@ -884,9 +1185,38 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } } - atomic_inc(&hdev->promisc); + hci_pi(sk)->channel = haddr.hci_channel; + + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been assigned, + * this socket will transition from a raw socket into + * an user channel socket. For a clean transition, send + * the close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + /* The user channel is restricted to CAP_NET_ADMIN + * capabilities and with that implicitly trusted. + */ + hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); hci_pi(sk)->hdev = hdev; + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + + atomic_inc(&hdev->promisc); break; case HCI_CHANNEL_MONITOR: @@ -900,6 +1230,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, goto done; } + hci_pi(sk)->channel = haddr.hci_channel; + /* The monitor interface is restricted to CAP_NET_RAW * capabilities and with that implicitly trusted. */ @@ -908,9 +1240,10 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, send_monitor_note(sk, "Linux version %s (%s)", init_utsname()->release, init_utsname()->machine); - send_monitor_note(sk, "Bluetooth subsystem version %s", - BT_SUBSYS_VERSION); + send_monitor_note(sk, "Bluetooth subsystem version %u.%u", + BT_SUBSYS_VERSION, BT_SUBSYS_REVISION); send_monitor_replay(sk); + send_monitor_control_replay(sk); atomic_inc(&monitor_promisc); break; @@ -925,6 +1258,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = -EPERM; goto done; } + + hci_pi(sk)->channel = haddr.hci_channel; break; default: @@ -946,6 +1281,8 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (capable(CAP_NET_ADMIN)) hci_sock_set_flag(sk, HCI_SOCK_TRUSTED); + hci_pi(sk)->channel = haddr.hci_channel; + /* At the moment the index and unconfigured index events * are enabled unconditionally. Setting them on each * socket when binding keeps this functionality. They @@ -956,16 +1293,40 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, * received by untrusted users. Example for such events * are changes to settings, class of device, name etc. */ - if (haddr.hci_channel == HCI_CHANNEL_CONTROL) { + if (hci_pi(sk)->channel == HCI_CHANNEL_CONTROL) { + if (!hci_sock_gen_cookie(sk)) { + /* In the case when a cookie has already been + * assigned, this socket will transtion from + * a raw socket into a control socket. To + * allow for a clean transtion, send the + * close notification first. + */ + skb = create_monitor_ctrl_close(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + + /* Send event to monitor */ + skb = create_monitor_ctrl_open(sk); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + hci_sock_set_flag(sk, HCI_MGMT_INDEX_EVENTS); hci_sock_set_flag(sk, HCI_MGMT_UNCONF_INDEX_EVENTS); - hci_sock_set_flag(sk, HCI_MGMT_GENERIC_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_OPTION_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_SETTING_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_set_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); } break; } - - hci_pi(sk)->channel = haddr.hci_channel; sk->sk_state = BT_BOUND; done: @@ -1133,6 +1494,19 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk, goto done; } + if (chan->channel == HCI_CHANNEL_CONTROL) { + struct sk_buff *skb; + + /* Send event to monitor */ + skb = create_monitor_ctrl_command(sk, index, opcode, len, + buf + sizeof(*hdr)); + if (skb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(skb); + } + } + if (opcode >= chan->handler_count || chan->handlers[opcode].func == NULL) { BT_DBG("Unknown op %u", opcode); @@ -1440,6 +1814,9 @@ static int hci_sock_setsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + lock_sock(sk); if (hci_pi(sk)->channel != HCI_CHANNEL_RAW) { @@ -1523,6 +1900,9 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, BT_DBG("sk %p, opt %d", sk, optname); + if (level != SOL_HCI) + return -ENOPROTOOPT; + if (get_user(len, optlen)) return -EFAULT; diff --git a/net/bluetooth/leds.c b/net/bluetooth/leds.c index 8319c8440c89..cb670b5594eb 100644 --- a/net/bluetooth/leds.c +++ b/net/bluetooth/leds.c @@ -11,6 +11,8 @@ #include "leds.h" +DEFINE_LED_TRIGGER(bt_power_led_trigger); + struct hci_basic_led_trigger { struct led_trigger led_trigger; struct hci_dev *hdev; @@ -24,6 +26,21 @@ void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) if (hdev->power_led) led_trigger_event(hdev->power_led, enabled ? LED_FULL : LED_OFF); + + if (!enabled) { + struct hci_dev *d; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(d, &hci_dev_list, list) { + if (test_bit(HCI_UP, &d->flags)) + enabled = true; + } + + read_unlock(&hci_dev_list_lock); + } + + led_trigger_event(bt_power_led_trigger, enabled ? LED_FULL : LED_OFF); } static void power_activate(struct led_classdev *led_cdev) @@ -72,3 +89,13 @@ void hci_leds_init(struct hci_dev *hdev) /* initialize power_led */ hdev->power_led = led_allocate_basic(hdev, power_activate, "power"); } + +void bt_leds_init(void) +{ + led_trigger_register_simple("bluetooth-power", &bt_power_led_trigger); +} + +void bt_leds_cleanup(void) +{ + led_trigger_unregister_simple(bt_power_led_trigger); +} diff --git a/net/bluetooth/leds.h b/net/bluetooth/leds.h index a9c4d6ea01cf..08725a2fbd9b 100644 --- a/net/bluetooth/leds.h +++ b/net/bluetooth/leds.h @@ -7,10 +7,20 @@ */ #if IS_ENABLED(CONFIG_BT_LEDS) + void hci_leds_update_powered(struct hci_dev *hdev, bool enabled); void hci_leds_init(struct hci_dev *hdev); + +void bt_leds_init(void); +void bt_leds_cleanup(void); + #else + static inline void hci_leds_update_powered(struct hci_dev *hdev, bool enabled) {} static inline void hci_leds_init(struct hci_dev *hdev) {} + +static inline void bt_leds_init(void) {} +static inline void bt_leds_cleanup(void) {} + #endif diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 7639290b6de3..19b8a5e9420d 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -38,7 +38,7 @@ #include "mgmt_util.h" #define MGMT_VERSION 1 -#define MGMT_REVISION 13 +#define MGMT_REVISION 14 static const u16 mgmt_commands[] = { MGMT_OP_READ_INDEX_LIST, @@ -104,6 +104,8 @@ static const u16 mgmt_commands[] = { MGMT_OP_REMOVE_ADVERTISING, MGMT_OP_GET_ADV_SIZE_INFO, MGMT_OP_START_LIMITED_DISCOVERY, + MGMT_OP_READ_EXT_INFO, + MGMT_OP_SET_APPEARANCE, }; static const u16 mgmt_events[] = { @@ -141,6 +143,7 @@ static const u16 mgmt_events[] = { MGMT_EV_LOCAL_OOB_DATA_UPDATED, MGMT_EV_ADVERTISING_ADDED, MGMT_EV_ADVERTISING_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; static const u16 mgmt_untrusted_commands[] = { @@ -149,6 +152,7 @@ static const u16 mgmt_untrusted_commands[] = { MGMT_OP_READ_UNCONF_INDEX_LIST, MGMT_OP_READ_CONFIG_INFO, MGMT_OP_READ_EXT_INDEX_LIST, + MGMT_OP_READ_EXT_INFO, }; static const u16 mgmt_untrusted_events[] = { @@ -162,6 +166,7 @@ static const u16 mgmt_untrusted_events[] = { MGMT_EV_NEW_CONFIG_OPTIONS, MGMT_EV_EXT_INDEX_ADDED, MGMT_EV_EXT_INDEX_REMOVED, + MGMT_EV_EXT_INFO_CHANGED, }; #define CACHE_TIMEOUT msecs_to_jiffies(2 * 1000) @@ -256,13 +261,6 @@ static int mgmt_limited_event(u16 event, struct hci_dev *hdev, void *data, flag, skip_sk); } -static int mgmt_generic_event(u16 event, struct hci_dev *hdev, void *data, - u16 len, struct sock *skip_sk) -{ - return mgmt_send_event(event, hdev, HCI_CHANNEL_CONTROL, data, len, - HCI_MGMT_GENERIC_EVENTS, skip_sk); -} - static int mgmt_event(u16 event, struct hci_dev *hdev, void *data, u16 len, struct sock *skip_sk) { @@ -278,6 +276,14 @@ static u8 le_addr_type(u8 mgmt_addr_type) return ADDR_LE_DEV_RANDOM; } +void mgmt_fill_version_info(void *ver) +{ + struct mgmt_rp_read_version *rp = ver; + + rp->version = MGMT_VERSION; + rp->revision = cpu_to_le16(MGMT_REVISION); +} + static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -285,8 +291,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG("sock %p", sk); - rp.version = MGMT_VERSION; - rp.revision = cpu_to_le16(MGMT_REVISION); + mgmt_fill_version_info(&rp); return mgmt_cmd_complete(sk, MGMT_INDEX_NONE, MGMT_OP_READ_VERSION, 0, &rp, sizeof(rp)); @@ -572,8 +577,8 @@ static int new_options(struct hci_dev *hdev, struct sock *skip) { __le32 options = get_missing_options(hdev); - return mgmt_generic_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, - sizeof(options), skip); + return mgmt_limited_event(MGMT_EV_NEW_CONFIG_OPTIONS, hdev, &options, + sizeof(options), HCI_MGMT_OPTION_EVENTS, skip); } static int send_options_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) @@ -862,6 +867,107 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev, sizeof(rp)); } +static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, + u8 data_len) +{ + eir[eir_len++] = sizeof(type) + data_len; + eir[eir_len++] = type; + memcpy(&eir[eir_len], data, data_len); + eir_len += data_len; + + return eir_len; +} + +static inline u16 eir_append_le16(u8 *eir, u16 eir_len, u8 type, u16 data) +{ + eir[eir_len++] = sizeof(type) + sizeof(data); + eir[eir_len++] = type; + put_unaligned_le16(data, &eir[eir_len]); + eir_len += sizeof(data); + + return eir_len; +} + +static u16 append_eir_data_to_buf(struct hci_dev *hdev, u8 *eir) +{ + u16 eir_len = 0; + size_t name_len; + + if (hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) + eir_len = eir_append_data(eir, eir_len, EIR_CLASS_OF_DEV, + hdev->dev_class, 3); + + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + eir_len = eir_append_le16(eir, eir_len, EIR_APPEARANCE, + hdev->appearance); + + name_len = strlen(hdev->dev_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_COMPLETE, + hdev->dev_name, name_len); + + name_len = strlen(hdev->short_name); + eir_len = eir_append_data(eir, eir_len, EIR_NAME_SHORT, + hdev->short_name, name_len); + + return eir_len; +} + +static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev, + void *data, u16 data_len) +{ + char buf[512]; + struct mgmt_rp_read_ext_info *rp = (void *)buf; + u16 eir_len; + + BT_DBG("sock %p %s", sk, hdev->name); + + memset(&buf, 0, sizeof(buf)); + + hci_dev_lock(hdev); + + bacpy(&rp->bdaddr, &hdev->bdaddr); + + rp->version = hdev->hci_ver; + rp->manufacturer = cpu_to_le16(hdev->manufacturer); + + rp->supported_settings = cpu_to_le32(get_supported_settings(hdev)); + rp->current_settings = cpu_to_le32(get_current_settings(hdev)); + + + eir_len = append_eir_data_to_buf(hdev, rp->eir); + rp->eir_len = cpu_to_le16(eir_len); + + hci_dev_unlock(hdev); + + /* If this command is called at least once, then the events + * for class of device and local name changes are disabled + * and only the new extended controller information event + * is used. + */ + hci_sock_set_flag(sk, HCI_MGMT_EXT_INFO_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_DEV_CLASS_EVENTS); + hci_sock_clear_flag(sk, HCI_MGMT_LOCAL_NAME_EVENTS); + + return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_EXT_INFO, 0, rp, + sizeof(*rp) + eir_len); +} + +static int ext_info_changed(struct hci_dev *hdev, struct sock *skip) +{ + char buf[512]; + struct mgmt_ev_ext_info_changed *ev = (void *)buf; + u16 eir_len; + + memset(buf, 0, sizeof(buf)); + + eir_len = append_eir_data_to_buf(hdev, ev->eir); + ev->eir_len = cpu_to_le16(eir_len); + + return mgmt_limited_event(MGMT_EV_EXT_INFO_CHANGED, hdev, ev, + sizeof(*ev) + eir_len, + HCI_MGMT_EXT_INFO_EVENTS, skip); +} + static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev) { __le32 settings = cpu_to_le32(get_current_settings(hdev)); @@ -922,7 +1028,7 @@ static int clean_up_hci_state(struct hci_dev *hdev) hci_req_add(&req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); } - hci_req_clear_adv_instance(hdev, NULL, 0x00, false); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, false); if (hci_dev_test_flag(hdev, HCI_LE_ADV)) __hci_req_disable_advertising(&req); @@ -1000,8 +1106,8 @@ static int new_settings(struct hci_dev *hdev, struct sock *skip) { __le32 ev = cpu_to_le32(get_current_settings(hdev)); - return mgmt_generic_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, - sizeof(ev), skip); + return mgmt_limited_event(MGMT_EV_NEW_SETTINGS, hdev, &ev, + sizeof(ev), HCI_MGMT_SETTING_EVENTS, skip); } int mgmt_new_settings(struct hci_dev *hdev) @@ -1690,7 +1796,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) enabled = lmp_host_le_capable(hdev); if (!val) - hci_req_clear_adv_instance(hdev, NULL, 0x00, true); + hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -2435,6 +2541,8 @@ static int send_pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev, if (!cmd) return -ENOMEM; + cmd->cmd_complete = addr_cmd_complete; + err = hci_send_cmd(hdev, HCI_OP_PIN_CODE_NEG_REPLY, sizeof(cp->addr.bdaddr), &cp->addr.bdaddr); if (err < 0) @@ -2513,8 +2621,8 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data, BT_DBG(""); if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY) - return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, - MGMT_STATUS_INVALID_PARAMS, NULL, 0); + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY, + MGMT_STATUS_INVALID_PARAMS); hci_dev_lock(hdev); @@ -2932,6 +3040,35 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev, HCI_OP_USER_PASSKEY_NEG_REPLY, 0); } +static void adv_expire(struct hci_dev *hdev, u32 flags) +{ + struct adv_info *adv_instance; + struct hci_request req; + int err; + + adv_instance = hci_find_adv_instance(hdev, hdev->cur_adv_instance); + if (!adv_instance) + return; + + /* stop if current instance doesn't need to be changed */ + if (!(adv_instance->flags & flags)) + return; + + cancel_adv_timeout(hdev); + + adv_instance = hci_get_next_instance(hdev, adv_instance->instance); + if (!adv_instance) + return; + + hci_req_init(&req, hdev); + err = __hci_req_schedule_adv_instance(&req, adv_instance->instance, + true); + if (err) + return; + + hci_req_run(&req, NULL); +} + static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) { struct mgmt_cp_set_local_name *cp; @@ -2947,13 +3084,17 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode) cp = cmd->param; - if (status) + if (status) { mgmt_cmd_status(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, mgmt_status(status)); - else + } else { mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_SET_LOCAL_NAME, 0, cp, sizeof(*cp)); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_LOCAL_NAME); + } + mgmt_pending_remove(cmd); unlock: @@ -2993,8 +3134,9 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, if (err < 0) goto failed; - err = mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, - data, len, sk); + err = mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, data, + len, HCI_MGMT_LOCAL_NAME_EVENTS, sk); + ext_info_changed(hdev, sk); goto failed; } @@ -3017,7 +3159,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data, /* The name is stored in the scan response data and so * no need to udpate the advertising data here. */ - if (lmp_le_capable(hdev)) + if (lmp_le_capable(hdev) && hci_dev_test_flag(hdev, HCI_ADVERTISING)) __hci_req_update_scan_rsp_data(&req, hdev->cur_adv_instance); err = hci_req_run(&req, set_name_complete); @@ -3029,6 +3171,40 @@ failed: return err; } +static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data, + u16 len) +{ + struct mgmt_cp_set_appearance *cp = data; + u16 apperance; + int err; + + BT_DBG(""); + + if (!lmp_le_capable(hdev)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE, + MGMT_STATUS_NOT_SUPPORTED); + + apperance = le16_to_cpu(cp->appearance); + + hci_dev_lock(hdev); + + if (hdev->appearance != apperance) { + hdev->appearance = apperance; + + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) + adv_expire(hdev, MGMT_ADV_FLAG_APPEARANCE); + + ext_info_changed(hdev, sk); + } + + err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_APPEARANCE, 0, NULL, + 0); + + hci_dev_unlock(hdev); + + return err; +} + static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -4869,7 +5045,7 @@ static int clock_info_cmd_complete(struct mgmt_pending_cmd *cmd, u8 status) int err; memset(&rp, 0, sizeof(rp)); - memcpy(&rp.addr, &cmd->param, sizeof(rp.addr)); + memcpy(&rp.addr, cmd->param, sizeof(rp.addr)); if (status) goto complete; @@ -5501,17 +5677,6 @@ unlock: return err; } -static inline u16 eir_append_data(u8 *eir, u16 eir_len, u8 type, u8 *data, - u8 data_len) -{ - eir[eir_len++] = sizeof(type) + data_len; - eir[eir_len++] = type; - memcpy(&eir[eir_len], data, data_len); - eir_len += data_len; - - return eir_len; -} - static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status, u16 opcode, struct sk_buff *skb) { @@ -5815,6 +5980,8 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) flags |= MGMT_ADV_FLAG_DISCOV; flags |= MGMT_ADV_FLAG_LIMITED_DISCOV; flags |= MGMT_ADV_FLAG_MANAGED_FLAGS; + flags |= MGMT_ADV_FLAG_APPEARANCE; + flags |= MGMT_ADV_FLAG_LOCAL_NAME; if (hdev->adv_tx_power != HCI_TX_POWER_INVALID) flags |= MGMT_ADV_FLAG_TX_POWER; @@ -5871,28 +6038,59 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, return err; } -static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, - u8 len, bool is_adv_data) +static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) { u8 max_len = HCI_MAX_AD_LENGTH; - int i, cur_len; - bool flags_managed = false; - bool tx_power_managed = false; if (is_adv_data) { if (adv_flags & (MGMT_ADV_FLAG_DISCOV | MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) { - flags_managed = true; + MGMT_ADV_FLAG_MANAGED_FLAGS)) max_len -= 3; - } - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) { - tx_power_managed = true; + if (adv_flags & MGMT_ADV_FLAG_TX_POWER) max_len -= 3; - } + } else { + /* at least 1 byte of name should fit in */ + if (adv_flags & MGMT_ADV_FLAG_LOCAL_NAME) + max_len -= 3; + + if (adv_flags & (MGMT_ADV_FLAG_APPEARANCE)) + max_len -= 4; } + return max_len; +} + +static bool flags_managed(u32 adv_flags) +{ + return adv_flags & (MGMT_ADV_FLAG_DISCOV | + MGMT_ADV_FLAG_LIMITED_DISCOV | + MGMT_ADV_FLAG_MANAGED_FLAGS); +} + +static bool tx_power_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_TX_POWER; +} + +static bool name_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_LOCAL_NAME; +} + +static bool appearance_managed(u32 adv_flags) +{ + return adv_flags & MGMT_ADV_FLAG_APPEARANCE; +} + +static bool tlv_data_is_valid(u32 adv_flags, u8 *data, u8 len, bool is_adv_data) +{ + int i, cur_len; + u8 max_len; + + max_len = tlv_data_max_len(adv_flags, is_adv_data); + if (len > max_len) return false; @@ -5900,10 +6098,21 @@ static bool tlv_data_is_valid(struct hci_dev *hdev, u32 adv_flags, u8 *data, for (i = 0, cur_len = 0; i < len; i += (cur_len + 1)) { cur_len = data[i]; - if (flags_managed && data[i + 1] == EIR_FLAGS) + if (data[i + 1] == EIR_FLAGS && + (!is_adv_data || flags_managed(adv_flags))) return false; - if (tx_power_managed && data[i + 1] == EIR_TX_POWER) + if (data[i + 1] == EIR_TX_POWER && tx_power_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_COMPLETE && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_NAME_SHORT && name_managed(adv_flags)) + return false; + + if (data[i + 1] == EIR_APPEARANCE && + appearance_managed(adv_flags)) return false; /* If the current field length would exceed the total data @@ -6027,8 +6236,8 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, goto unlock; } - if (!tlv_data_is_valid(hdev, flags, cp->data, cp->adv_data_len, true) || - !tlv_data_is_valid(hdev, flags, cp->data + cp->adv_data_len, + if (!tlv_data_is_valid(flags, cp->data, cp->adv_data_len, true) || + !tlv_data_is_valid(flags, cp->data + cp->adv_data_len, cp->scan_rsp_len, false)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_INVALID_PARAMS); @@ -6175,7 +6384,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, hci_req_init(&req, hdev); - hci_req_clear_adv_instance(hdev, &req, cp->instance, true); + hci_req_clear_adv_instance(hdev, sk, &req, cp->instance, true); if (list_empty(&hdev->adv_instances)) __hci_req_disable_advertising(&req); @@ -6211,23 +6420,6 @@ unlock: return err; } -static u8 tlv_data_max_len(u32 adv_flags, bool is_adv_data) -{ - u8 max_len = HCI_MAX_AD_LENGTH; - - if (is_adv_data) { - if (adv_flags & (MGMT_ADV_FLAG_DISCOV | - MGMT_ADV_FLAG_LIMITED_DISCOV | - MGMT_ADV_FLAG_MANAGED_FLAGS)) - max_len -= 3; - - if (adv_flags & MGMT_ADV_FLAG_TX_POWER) - max_len -= 3; - } - - return max_len; -} - static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { @@ -6356,6 +6548,9 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { remove_advertising, MGMT_REMOVE_ADVERTISING_SIZE }, { get_adv_size_info, MGMT_GET_ADV_SIZE_INFO_SIZE }, { start_limited_discovery, MGMT_START_DISCOVERY_SIZE }, + { read_ext_controller_info,MGMT_READ_EXT_INFO_SIZE, + HCI_MGMT_UNTRUSTED }, + { set_appearance, MGMT_SET_APPEARANCE_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -6494,9 +6689,12 @@ void __mgmt_power_off(struct hci_dev *hdev) mgmt_pending_foreach(0, hdev, cmd_complete_rsp, &status); - if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - zero_cod, sizeof(zero_cod), NULL); + if (memcmp(hdev->dev_class, zero_cod, sizeof(zero_cod)) != 0) { + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, + zero_cod, sizeof(zero_cod), + HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } new_settings(hdev, match.sk); @@ -7092,9 +7290,11 @@ void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class, mgmt_pending_foreach(MGMT_OP_ADD_UUID, hdev, sk_lookup, &match); mgmt_pending_foreach(MGMT_OP_REMOVE_UUID, hdev, sk_lookup, &match); - if (!status) - mgmt_generic_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, - dev_class, 3, NULL); + if (!status) { + mgmt_limited_event(MGMT_EV_CLASS_OF_DEV_CHANGED, hdev, dev_class, + 3, HCI_MGMT_DEV_CLASS_EVENTS, NULL); + ext_info_changed(hdev, NULL); + } if (match.sk) sock_put(match.sk); @@ -7123,8 +7323,9 @@ void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status) return; } - mgmt_generic_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), - cmd ? cmd->sk : NULL); + mgmt_limited_event(MGMT_EV_LOCAL_NAME_CHANGED, hdev, &ev, sizeof(ev), + HCI_MGMT_LOCAL_NAME_EVENTS, cmd ? cmd->sk : NULL); + ext_info_changed(hdev, cmd ? cmd->sk : NULL); } static inline bool has_uuid(u8 *uuid, u16 uuid_count, u8 (*uuids)[16]) diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index 8c30c7eb8bef..c933bd08c1fe 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -21,12 +21,41 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include +#include #include #include "mgmt_util.h" +static struct sk_buff *create_monitor_ctrl_event(__le16 index, u32 cookie, + u16 opcode, u16 len, void *buf) +{ + struct hci_mon_hdr *hdr; + struct sk_buff *skb; + + skb = bt_skb_alloc(6 + len, GFP_ATOMIC); + if (!skb) + return NULL; + + put_unaligned_le32(cookie, skb_put(skb, 4)); + put_unaligned_le16(opcode, skb_put(skb, 2)); + + if (buf) + memcpy(skb_put(skb, len), buf, len); + + __net_timestamp(skb); + + hdr = (void *)skb_push(skb, HCI_MON_HDR_SIZE); + hdr->opcode = cpu_to_le16(HCI_MON_CTRL_EVENT); + hdr->index = index; + hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); + + return skb; +} + int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, void *data, u16 data_len, int flag, struct sock *skip_sk) { @@ -52,14 +81,18 @@ int mgmt_send_event(u16 event, struct hci_dev *hdev, unsigned short channel, __net_timestamp(skb); hci_send_to_channel(channel, skb, flag, skip_sk); - kfree_skb(skb); + if (channel == HCI_CHANNEL_CONTROL) + hci_send_monitor_ctrl_event(hdev, event, data, data_len, + skb_get_ktime(skb), flag, skip_sk); + + kfree_skb(skb); return 0; } int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_status *ev; int err; @@ -80,17 +113,30 @@ int mgmt_cmd_status(struct sock *sk, u16 index, u16 cmd, u8 status) ev->status = status; ev->opcode = cpu_to_le16(cmd); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_STATUS, sizeof(*ev), ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, void *rp, size_t rp_len) { - struct sk_buff *skb; + struct sk_buff *skb, *mskb; struct mgmt_hdr *hdr; struct mgmt_ev_cmd_complete *ev; int err; @@ -114,10 +160,24 @@ int mgmt_cmd_complete(struct sock *sk, u16 index, u16 cmd, u8 status, if (rp) memcpy(ev->data, rp, rp_len); + mskb = create_monitor_ctrl_event(hdr->index, hci_sock_get_cookie(sk), + MGMT_EV_CMD_COMPLETE, + sizeof(*ev) + rp_len, ev); + if (mskb) + skb->tstamp = mskb->tstamp; + else + __net_timestamp(skb); + err = sock_queue_rcv_skb(sk, skb); if (err < 0) kfree_skb(skb); + if (mskb) { + hci_send_to_channel(HCI_CHANNEL_MONITOR, mskb, + HCI_SOCK_TRUSTED, NULL); + kfree_skb(mskb); + } + return err; } diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index 4c1a16a96ae5..43faf2aea2ab 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -3387,7 +3387,10 @@ int smp_register(struct hci_dev *hdev) if (!lmp_sc_capable(hdev)) { debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs, hdev, &force_bredr_smp_fops); - return 0; + + /* Flag can be already set here (due to power toggle) */ + if (!hci_dev_test_flag(hdev, HCI_FORCE_BREDR_SMP)) + return 0; } if (WARN_ON(hdev->smp_bredr_data)) { diff --git a/net/bridge/Makefile b/net/bridge/Makefile index a1cda5d4718d..0aefc011b668 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -20,4 +20,6 @@ bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o +bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o + obj-$(CONFIG_NETFILTER) += netfilter/ diff --git a/net/bridge/br.c b/net/bridge/br.c index 3addc05b9a16..889e5640455f 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -227,9 +227,11 @@ static int __init br_init(void) br_fdb_test_addr_hook = br_fdb_test_addr; #endif - pr_info("bridge: automatic filtering via arp/ip/ip6tables has been " - "deprecated. Update your scripts to load br_netfilter if you " +#if IS_MODULE(CONFIG_BRIDGE_NETFILTER) + pr_info("bridge: filtering via arp/ip/ip6tables is no longer available " + "by default. Update your scripts to load br_netfilter if you " "need this.\n"); +#endif return 0; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 09f26940aba5..89a687f3c0a3 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -62,10 +62,10 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) goto out; if (is_broadcast_ether_addr(dest)) { - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_BROADCAST, false, true); } else if (is_multicast_ether_addr(dest)) { if (unlikely(netpoll_tx_running(dev))) { - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_MULTICAST, false, true); goto out; } if (br_multicast_rcv(br, NULL, skb, vid)) { @@ -78,11 +78,11 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) br_multicast_querier_exists(br, eth_hdr(skb))) br_multicast_flood(mdst, skb, false, true); else - br_flood(br, skb, false, false, true); + br_flood(br, skb, BR_PKT_MULTICAST, false, true); } else if ((dst = __br_fdb_get(br, dest, vid)) != NULL) { br_forward(dst->dst, skb, false, true); } else { - br_flood(br, skb, true, false, true); + br_flood(br, skb, BR_PKT_UNICAST, false, true); } out: rcu_read_unlock(); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index cd620fab41b0..6b43c8c88f19 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -710,24 +710,27 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { struct net_bridge *br = netdev_priv(dev); + int err = 0; int i; if (!(dev->priv_flags & IFF_EBRIDGE)) goto out; - if (!filter_dev) - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (!filter_dev) { + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); + if (err < 0) + goto out; + } for (i = 0; i < BR_HASH_SIZE; i++) { struct net_bridge_fdb_entry *f; hlist_for_each_entry_rcu(f, &br->hash[i], hlist) { - int err; - if (idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; if (filter_dev && @@ -750,17 +753,15 @@ int br_fdb_dump(struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI); - if (err < 0) { - cb->args[1] = err; - break; - } + if (err < 0) + goto out; skip: - ++idx; + *idx += 1; } } out: - return idx; + return err; } /* Update (create or replace) forwarding database entry */ diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 63a83d8d7da3..7cb41aee4c82 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -29,7 +29,8 @@ static inline int should_deliver(const struct net_bridge_port *p, vg = nbp_vlan_group_rcu(p); return ((p->flags & BR_HAIRPIN_MODE) || skb->dev != p->dev) && - br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING; + br_allowed_egress(vg, skb) && p->state == BR_STATE_FORWARDING && + nbp_switchdev_allowed_egress(p, skb); } int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -175,7 +176,7 @@ out: /* called under rcu_read_lock */ void br_flood(struct net_bridge *br, struct sk_buff *skb, - bool unicast, bool local_rcv, bool local_orig) + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig) { u8 igmp_type = br_multicast_igmp_type(skb); struct net_bridge_port *prev = NULL; @@ -183,7 +184,10 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, list_for_each_entry_rcu(p, &br->port_list, list) { /* Do not flood unicast traffic to ports that turn it off */ - if (unicast && !(p->flags & BR_FLOOD)) + if (pkt_type == BR_PKT_UNICAST && !(p->flags & BR_FLOOD)) + continue; + if (pkt_type == BR_PKT_MULTICAST && + !(p->flags & BR_MCAST_FLOOD)) continue; /* Do not flood to ports that enable proxy ARP */ diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f2fede05d32c..ed0dd3340084 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -362,7 +362,7 @@ static struct net_bridge_port *new_nbp(struct net_bridge *br, p->path_cost = port_cost(dev); p->priority = 0x8000 >> BR_PORT_BITS; p->port_no = index; - p->flags = BR_LEARNING | BR_FLOOD; + p->flags = BR_LEARNING | BR_FLOOD | BR_MCAST_FLOOD; br_init_port(p); br_set_state(p, BR_STATE_DISABLED); br_stp_port_timer_init(p); @@ -545,6 +545,10 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) if (err) goto err5; + err = nbp_switchdev_mark_set(p); + if (err) + goto err6; + dev_disable_lro(dev); list_add_rcu(&p->list, &br->port_list); @@ -566,7 +570,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) err = nbp_vlan_init(p); if (err) { netdev_err(dev, "failed to initialize vlan filtering on this port\n"); - goto err6; + goto err7; } spin_lock_bh(&br->lock); @@ -589,12 +593,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return 0; -err6: +err7: list_del_rcu(&p->list); br_fdb_delete_by_port(br, p, 0, 1); nbp_update_port_count(br); +err6: netdev_upper_dev_unlink(dev, br->dev); - err5: dev->priv_flags &= ~IFF_BRIDGE_PORT; netdev_rx_handler_unregister(dev); diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index abe11f085479..855b72fbe1da 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -128,11 +128,12 @@ static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - bool local_rcv = false, mcast_hit = false, unicast = true; struct net_bridge_port *p = br_port_get_rcu(skb->dev); const unsigned char *dest = eth_hdr(skb)->h_dest; + enum br_pkt_type pkt_type = BR_PKT_UNICAST; struct net_bridge_fdb_entry *dst = NULL; struct net_bridge_mdb_entry *mdst; + bool local_rcv, mcast_hit = false; struct net_bridge *br; u16 vid = 0; @@ -142,29 +143,36 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb if (!br_allowed_ingress(p->br, nbp_vlan_group_rcu(p), skb, &vid)) goto out; + nbp_switchdev_frame_mark(p, skb); + /* insert into forwarding database after filtering to avoid spoofing */ br = p->br; if (p->flags & BR_LEARNING) br_fdb_update(br, p, eth_hdr(skb)->h_source, vid, false); - if (!is_broadcast_ether_addr(dest) && is_multicast_ether_addr(dest) && - br_multicast_rcv(br, p, skb, vid)) - goto drop; + local_rcv = !!(br->dev->flags & IFF_PROMISC); + if (is_multicast_ether_addr(dest)) { + /* by definition the broadcast is also a multicast address */ + if (is_broadcast_ether_addr(dest)) { + pkt_type = BR_PKT_BROADCAST; + local_rcv = true; + } else { + pkt_type = BR_PKT_MULTICAST; + if (br_multicast_rcv(br, p, skb, vid)) + goto drop; + } + } if (p->state == BR_STATE_LEARNING) goto drop; BR_INPUT_SKB_CB(skb)->brdev = br->dev; - local_rcv = !!(br->dev->flags & IFF_PROMISC); - if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) br_do_proxy_arp(skb, br, vid, p); - if (is_broadcast_ether_addr(dest)) { - local_rcv = true; - unicast = false; - } else if (is_multicast_ether_addr(dest)) { + switch (pkt_type) { + case BR_PKT_MULTICAST: mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { @@ -178,18 +186,22 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb local_rcv = true; br->dev->stats.multicast++; } - unicast = false; - } else if ((dst = __br_fdb_get(br, dest, vid)) && dst->is_local) { - /* Do not forward the packet since it's local. */ - return br_pass_frame_up(skb); + break; + case BR_PKT_UNICAST: + dst = __br_fdb_get(br, dest, vid); + default: + break; } if (dst) { + if (dst->is_local) + return br_pass_frame_up(skb); + dst->used = jiffies; br_forward(dst->dst, skb, local_rcv, false); } else { if (!mcast_hit) - br_flood(br, skb, unicast, local_rcv, false); + br_flood(br, skb, pkt_type, local_rcv, false); else br_multicast_flood(mdst, skb, local_rcv, false); } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 77e7f69bf80d..2fe9345c1407 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -395,11 +396,10 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, - NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, + NULL, + br_nf_pre_routing_finish); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -417,10 +417,8 @@ bridged_dnat: skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); - + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, skb->dev, NULL, + br_handle_frame_finish); return 0; } @@ -992,6 +990,43 @@ static struct notifier_block brnf_notifier __read_mostly = { .notifier_call = brnf_device_event, }; +/* recursively invokes nf_hook_slow (again), skipping already-called + * hooks (< NF_BR_PRI_BRNF). + * + * Called with rcu read lock held. + */ +int br_nf_hook_thresh(unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct net *, struct sock *, + struct sk_buff *)) +{ + struct nf_hook_entry *elem; + struct nf_hook_state state; + int ret; + + elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + + while (elem && (elem->ops.priority <= NF_BR_PRI_BRNF)) + elem = rcu_dereference(elem->next); + + if (!elem) + return okfn(net, sk, skb); + + /* We may already have this, but read-locks nest anyway */ + rcu_read_lock(); + nf_hook_state_init(&state, elem, hook, NF_BR_PRI_BRNF + 1, + NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); + + ret = nf_hook_slow(skb, &state); + rcu_read_unlock(); + if (ret == 1) + ret = okfn(net, sk, skb); + + return ret; +} + #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 5e59a8457e7b..5989661c659f 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -187,10 +187,9 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, - net, sk, skb, skb->dev, NULL, - br_nf_pre_routing_finish_bridge, - 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, + net, sk, skb, skb->dev, NULL, + br_nf_pre_routing_finish_bridge); return 0; } ether_addr_copy(eth_hdr(skb)->h_dest, dev->dev_addr); @@ -207,9 +206,8 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc skb->dev = nf_bridge->physindev; nf_bridge_update_protocol(skb); nf_bridge_push_encap_header(skb); - NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, net, sk, skb, - skb->dev, NULL, - br_handle_frame_finish, 1); + br_nf_hook_thresh(NF_BR_PRE_ROUTING, net, sk, skb, + skb->dev, NULL, br_handle_frame_finish); return 0; } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index f2a29e467e78..e99037c6f7b7 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -169,10 +169,15 @@ static int br_port_fill_attrs(struct sk_buff *skb, nla_put_u32(skb, IFLA_BRPORT_COST, p->path_cost) || nla_put_u8(skb, IFLA_BRPORT_MODE, mode) || nla_put_u8(skb, IFLA_BRPORT_GUARD, !!(p->flags & BR_BPDU_GUARD)) || - nla_put_u8(skb, IFLA_BRPORT_PROTECT, !!(p->flags & BR_ROOT_BLOCK)) || - nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || + nla_put_u8(skb, IFLA_BRPORT_PROTECT, + !!(p->flags & BR_ROOT_BLOCK)) || + nla_put_u8(skb, IFLA_BRPORT_FAST_LEAVE, + !!(p->flags & BR_MULTICAST_FAST_LEAVE)) || nla_put_u8(skb, IFLA_BRPORT_LEARNING, !!(p->flags & BR_LEARNING)) || - nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_UNICAST_FLOOD, + !!(p->flags & BR_FLOOD)) || + nla_put_u8(skb, IFLA_BRPORT_MCAST_FLOOD, + !!(p->flags & BR_MCAST_FLOOD)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP, !!(p->flags & BR_PROXYARP)) || nla_put_u8(skb, IFLA_BRPORT_PROXYARP_WIFI, !!(p->flags & BR_PROXYARP_WIFI)) || @@ -630,6 +635,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) br_set_port_flag(p, tb, IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK); br_set_port_flag(p, tb, IFLA_BRPORT_LEARNING, BR_LEARNING); br_set_port_flag(p, tb, IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD); + br_set_port_flag(p, tb, IFLA_BRPORT_MCAST_FLOOD, BR_MCAST_FLOOD); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP, BR_PROXYARP); br_set_port_flag(p, tb, IFLA_BRPORT_PROXYARP_WIFI, BR_PROXYARP_WIFI); @@ -1245,14 +1251,30 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) return 0; } -static size_t bridge_get_linkxstats_size(const struct net_device *dev) +static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) { - struct net_bridge *br = netdev_priv(dev); + struct net_bridge_port *p = NULL; struct net_bridge_vlan_group *vg; struct net_bridge_vlan *v; + struct net_bridge *br; int numvls = 0; - vg = br_vlan_group(br); + switch (attr) { + case IFLA_STATS_LINK_XSTATS: + br = netdev_priv(dev); + vg = br_vlan_group(br); + break; + case IFLA_STATS_LINK_XSTATS_SLAVE: + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); + break; + default: + return 0; + } + if (vg) { /* we need to count all, even placeholder entries */ list_for_each_entry(v, &vg->vlan_list, vlist) @@ -1264,45 +1286,42 @@ static size_t bridge_get_linkxstats_size(const struct net_device *dev) nla_total_size(0); } -static size_t brport_get_linkxstats_size(const struct net_device *dev) +static int br_fill_linkxstats(struct sk_buff *skb, + const struct net_device *dev, + int *prividx, int attr) { - return nla_total_size(sizeof(struct br_mcast_stats)) + - nla_total_size(0); -} - -static size_t br_get_linkxstats_size(const struct net_device *dev, int attr) -{ - size_t retsize = 0; + struct nlattr *nla __maybe_unused; + struct net_bridge_port *p = NULL; + struct net_bridge_vlan_group *vg; + struct net_bridge_vlan *v; + struct net_bridge *br; + struct nlattr *nest; + int vl_idx = 0; switch (attr) { case IFLA_STATS_LINK_XSTATS: - retsize = bridge_get_linkxstats_size(dev); + br = netdev_priv(dev); + vg = br_vlan_group(br); break; case IFLA_STATS_LINK_XSTATS_SLAVE: - retsize = brport_get_linkxstats_size(dev); + p = br_port_get_rtnl(dev); + if (!p) + return 0; + br = p->br; + vg = nbp_vlan_group(p); break; + default: + return -EINVAL; } - return retsize; -} - -static int bridge_fill_linkxstats(struct sk_buff *skb, - const struct net_device *dev, - int *prividx) -{ - struct net_bridge *br = netdev_priv(dev); - struct nlattr *nla __maybe_unused; - struct net_bridge_vlan_group *vg; - struct net_bridge_vlan *v; - struct nlattr *nest; - int vl_idx = 0; - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); if (!nest) return -EMSGSIZE; - vg = br_vlan_group(br); if (vg) { + u16 pvid; + + pvid = br_get_pvid(vg); list_for_each_entry(v, &vg->vlan_list, vlist) { struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; @@ -1311,6 +1330,9 @@ static int bridge_fill_linkxstats(struct sk_buff *skb, continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; + vxi.flags = v->flags; + if (v->vid == pvid) + vxi.flags |= BRIDGE_VLAN_INFO_PVID; br_vlan_get_stats(v, &stats); vxi.rx_bytes = stats.rx_bytes; vxi.rx_packets = stats.rx_packets; @@ -1329,7 +1351,7 @@ static int bridge_fill_linkxstats(struct sk_buff *skb, BRIDGE_XSTATS_PAD); if (!nla) goto nla_put_failure; - br_multicast_get_stats(br, NULL, nla_data(nla)); + br_multicast_get_stats(br, p, nla_data(nla)); } #endif nla_nest_end(skb, nest); @@ -1344,52 +1366,6 @@ nla_put_failure: return -EMSGSIZE; } -static int brport_fill_linkxstats(struct sk_buff *skb, - const struct net_device *dev, - int *prividx) -{ - struct net_bridge_port *p = br_port_get_rtnl(dev); - struct nlattr *nla __maybe_unused; - struct nlattr *nest; - - if (!p) - return 0; - - nest = nla_nest_start(skb, LINK_XSTATS_TYPE_BRIDGE); - if (!nest) - return -EMSGSIZE; -#ifdef CONFIG_BRIDGE_IGMP_SNOOPING - nla = nla_reserve_64bit(skb, BRIDGE_XSTATS_MCAST, - sizeof(struct br_mcast_stats), - BRIDGE_XSTATS_PAD); - if (!nla) { - nla_nest_end(skb, nest); - return -EMSGSIZE; - } - br_multicast_get_stats(p->br, p, nla_data(nla)); -#endif - nla_nest_end(skb, nest); - - return 0; -} - -static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, - int *prividx, int attr) -{ - int ret = -EINVAL; - - switch (attr) { - case IFLA_STATS_LINK_XSTATS: - ret = bridge_fill_linkxstats(skb, dev, prividx); - break; - case IFLA_STATS_LINK_XSTATS_SLAVE: - ret = brport_fill_linkxstats(skb, dev, prividx); - break; - } - - return ret; -} - static struct rtnl_af_ops br_af_ops __read_mostly = { .family = AF_BRIDGE, .get_link_af_size = br_get_link_af_size_filtered, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index aac2a6e6b008..1b63177e0ccd 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -251,6 +251,9 @@ struct net_bridge_port #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; #endif +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -359,6 +362,11 @@ struct net_bridge struct timer_list gc_timer; struct kobject *ifobj; u32 auto_cnt; + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif + #ifdef CONFIG_BRIDGE_VLAN_FILTERING struct net_bridge_vlan_group __rcu *vlgrp; u8 vlan_enabled; @@ -381,6 +389,10 @@ struct br_input_skb_cb { #ifdef CONFIG_BRIDGE_VLAN_FILTERING bool vlan_filtered; #endif + +#ifdef CONFIG_NET_SWITCHDEV + int offload_fwd_mark; +#endif }; #define BR_INPUT_SKB_CB(__skb) ((struct br_input_skb_cb *)(__skb)->cb) @@ -496,7 +508,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[], int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev, const unsigned char *addr, u16 vid, u16 nlh_flags); int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, struct net_device *fdev, int idx); + struct net_device *dev, struct net_device *fdev, int *idx); int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p); void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p); int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, @@ -505,12 +517,17 @@ int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p, const unsigned char *addr, u16 vid); /* br_forward.c */ +enum br_pkt_type { + BR_PKT_UNICAST, + BR_PKT_MULTICAST, + BR_PKT_BROADCAST +}; int br_dev_queue_push_xmit(struct net *net, struct sock *sk, struct sk_buff *skb); void br_forward(const struct net_bridge_port *to, struct sk_buff *skb, bool local_rcv, bool local_orig); int br_forward_finish(struct net *net, struct sock *sk, struct sk_buff *skb); void br_flood(struct net_bridge *br, struct sk_buff *skb, - bool unicast, bool local_rcv, bool local_orig); + enum br_pkt_type pkt_type, bool local_rcv, bool local_orig); /* br_if.c */ void br_port_carrier_check(struct net_bridge_port *p); @@ -1034,4 +1051,29 @@ static inline int br_sysfs_addbr(struct net_device *dev) { return 0; } static inline void br_sysfs_delbr(struct net_device *dev) { return; } #endif /* CONFIG_SYSFS */ +/* br_switchdev.c */ +#ifdef CONFIG_NET_SWITCHDEV +int nbp_switchdev_mark_set(struct net_bridge_port *p); +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb); +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb); +#else +static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + return 0; +} + +static inline void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ +} + +static inline bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return true; +} +#endif /* CONFIG_NET_SWITCHDEV */ + #endif diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 341caa0ca63a..d8ad73b38de2 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -134,17 +134,36 @@ void br_stp_disable_port(struct net_bridge_port *p) br_become_root_bridge(br); } +static int br_stp_call_user(struct net_bridge *br, char *arg) +{ + char *argv[] = { BR_STP_PROG, br->dev->name, arg, NULL }; + char *envp[] = { NULL }; + int rc; + + /* call userspace STP and report program errors */ + rc = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); + if (rc > 0) { + if (rc & 0xff) + br_debug(br, BR_STP_PROG " received signal %d\n", + rc & 0x7f); + else + br_debug(br, BR_STP_PROG " exited with code %d\n", + (rc >> 8) & 0xff); + } + + return rc; +} + static void br_stp_start(struct net_bridge *br) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "start", NULL }; - char *envp[] = { NULL }; struct net_bridge_port *p; + int err = -ENOENT; if (net_eq(dev_net(br->dev), &init_net)) - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - else - r = -ENOENT; + err = br_stp_call_user(br, "start"); + + if (err && err != -ENOENT) + br_err(br, "failed to start userspace STP (%d)\n", err); spin_lock_bh(&br->lock); @@ -153,9 +172,10 @@ static void br_stp_start(struct net_bridge *br) else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - if (r == 0) { + if (!err) { br->stp_enabled = BR_USER_STP; br_debug(br, "userspace STP started\n"); + /* Stop hello and hold timers */ del_timer(&br->hello_timer); list_for_each_entry(p, &br->port_list, list) @@ -173,14 +193,13 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - int r; - char *argv[] = { BR_STP_PROG, br->dev->name, "stop", NULL }; - char *envp[] = { NULL }; struct net_bridge_port *p; + int err; if (br->stp_enabled == BR_USER_STP) { - r = call_usermodehelper(BR_STP_PROG, argv, envp, UMH_WAIT_PROC); - br_info(br, "userspace STP stopped, return code %d\n", r); + err = br_stp_call_user(br, "stop"); + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); /* To start timers on any ports left in blocking */ mod_timer(&br->hello_timer, jiffies + br->hello_time); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c new file mode 100644 index 000000000000..f4097b900de1 --- /dev/null +++ b/net/bridge/br_switchdev.c @@ -0,0 +1,57 @@ +#include +#include +#include +#include +#include +#include + +#include "br_private.h" + +static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev) +{ + struct net_bridge_port *p; + + /* dev is yet to be added to the port list. */ + list_for_each_entry(p, &br->port_list, list) { + if (switchdev_port_same_parent_id(dev, p->dev)) + return p->offload_fwd_mark; + } + + return ++br->offload_fwd_mark; +} + +int nbp_switchdev_mark_set(struct net_bridge_port *p) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; + int err; + + ASSERT_RTNL(); + + err = switchdev_port_attr_get(p->dev, &attr); + if (err) { + if (err == -EOPNOTSUPP) + return 0; + return err; + } + + p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev); + + return 0; +} + +void nbp_switchdev_frame_mark(const struct net_bridge_port *p, + struct sk_buff *skb) +{ + if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark)) + BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark; +} + +bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p, + const struct sk_buff *skb) +{ + return !skb->offload_fwd_mark || + BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark; +} diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 1e04d4d44273..e657258e1f2c 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -171,6 +171,7 @@ BRPORT_ATTR_FLAG(learning, BR_LEARNING); BRPORT_ATTR_FLAG(unicast_flood, BR_FLOOD); BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); +BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index 152300d164ac..9a11086ba6ff 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -91,7 +91,7 @@ ebt_log_packet(struct net *net, u_int8_t pf, unsigned int hooknum, if (loginfo->type == NF_LOG_TYPE_LOG) bitmask = loginfo->u.log.logflags; else - bitmask = NF_LOG_MASK; + bitmask = NF_LOG_DEFAULT_MASK; if ((bitmask & EBT_LOG_IP) && eth_hdr(skb)->h_proto == htons(ETH_P_IP)) { diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c index 203964997a51..2e7c4f974340 100644 --- a/net/bridge/netfilter/ebt_redirect.c +++ b/net/bridge/netfilter/ebt_redirect.c @@ -24,7 +24,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) return EBT_DROP; if (par->hooknum != NF_BR_BROUTING) - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ ether_addr_copy(eth_hdr(skb)->h_dest, br_port_get_rcu(par->in)->br->dev->dev_addr); else diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 0833c251aef7..f5c11bbe27db 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -146,7 +146,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb, return 1; if (NF_INVF(e, EBT_IOUT, ebt_dev_check(e->out, out))) return 1; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ if (in && (p = br_port_get_rcu(in)) != NULL && NF_INVF(e, EBT_ILOGICALIN, ebt_dev_check(e->logical_in, p->br->dev))) diff --git a/net/bridge/netfilter/nf_log_bridge.c b/net/bridge/netfilter/nf_log_bridge.c index 5d9953a90929..1663df598545 100644 --- a/net/bridge/netfilter/nf_log_bridge.c +++ b/net/bridge/netfilter/nf_log_bridge.c @@ -50,8 +50,7 @@ static struct nf_logger nf_bridge_logger __read_mostly = { static int __net_init nf_log_bridge_net_init(struct net *net) { - nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); - return 0; + return nf_log_set(net, NFPROTO_BRIDGE, &nf_bridge_logger); } static void __net_exit nf_log_bridge_net_exit(struct net *net) diff --git a/net/bridge/netfilter/nf_tables_bridge.c b/net/bridge/netfilter/nf_tables_bridge.c index a78c4e2826e5..97afdc0744e6 100644 --- a/net/bridge/netfilter/nf_tables_bridge.c +++ b/net/bridge/netfilter/nf_tables_bridge.c @@ -13,79 +13,11 @@ #include #include #include -#include #include #include #include #include -int nft_bridge_iphdr_validate(struct sk_buff *skb) -{ - struct iphdr *iph; - u32 len; - - if (!pskb_may_pull(skb, sizeof(struct iphdr))) - return 0; - - iph = ip_hdr(skb); - if (iph->ihl < 5 || iph->version != 4) - return 0; - - len = ntohs(iph->tot_len); - if (skb->len < len) - return 0; - else if (len < (iph->ihl*4)) - return 0; - - if (!pskb_may_pull(skb, iph->ihl*4)) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_iphdr_validate); - -int nft_bridge_ip6hdr_validate(struct sk_buff *skb) -{ - struct ipv6hdr *hdr; - u32 pkt_len; - - if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) - return 0; - - hdr = ipv6_hdr(skb); - if (hdr->version != 6) - return 0; - - pkt_len = ntohs(hdr->payload_len); - if (pkt_len + sizeof(struct ipv6hdr) > skb->len) - return 0; - - return 1; -} -EXPORT_SYMBOL_GPL(nft_bridge_ip6hdr_validate); - -static inline void nft_bridge_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - if (nft_bridge_iphdr_validate(skb)) - nft_set_pktinfo_ipv4(pkt, skb, state); - else - nft_set_pktinfo(pkt, skb, state); -} - -static inline void nft_bridge_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - if (nft_bridge_ip6hdr_validate(skb) && - nft_set_pktinfo_ipv6(pkt, skb, state) == 0) - return; -#endif - nft_set_pktinfo(pkt, skb, state); -} - static unsigned int nft_do_chain_bridge(void *priv, struct sk_buff *skb, @@ -95,13 +27,13 @@ nft_do_chain_bridge(void *priv, switch (eth_hdr(skb)->h_proto) { case htons(ETH_P_IP): - nft_bridge_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_bridge_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } @@ -207,12 +139,20 @@ static int __init nf_tables_bridge_init(void) int ret; nf_register_afinfo(&nf_br_afinfo); - nft_register_chain_type(&filter_bridge); + ret = nft_register_chain_type(&filter_bridge); + if (ret < 0) + goto err1; + ret = register_pernet_subsys(&nf_tables_bridge_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&filter_bridge); - nf_unregister_afinfo(&nf_br_afinfo); - } + if (ret < 0) + goto err2; + + return ret; + +err2: + nft_unregister_chain_type(&filter_bridge); +err1: + nf_unregister_afinfo(&nf_br_afinfo); return ret; } diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 0b77ffbc27d6..4b3df6b0e3b9 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -37,6 +36,30 @@ static void nft_reject_br_push_etherhdr(struct sk_buff *oldskb, skb_pull(nskb, ETH_HLEN); } +static int nft_bridge_iphdr_validate(struct sk_buff *skb) +{ + struct iphdr *iph; + u32 len; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + return 0; + + iph = ip_hdr(skb); + if (iph->ihl < 5 || iph->version != 4) + return 0; + + len = ntohs(iph->tot_len); + if (skb->len < len) + return 0; + else if (len < (iph->ihl*4)) + return 0; + + if (!pskb_may_pull(skb, iph->ihl*4)) + return 0; + + return 1; +} + /* We cannot use oldskb->dev, it can be either bridge device (NF_BRIDGE INPUT) * or the bridge port (NF_BRIDGE PREROUTING). */ @@ -143,6 +166,25 @@ static void nft_reject_br_send_v4_unreach(struct net *net, br_forward(br_port_get_rcu(dev), nskb, false, true); } +static int nft_bridge_ip6hdr_validate(struct sk_buff *skb) +{ + struct ipv6hdr *hdr; + u32 pkt_len; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + return 0; + + hdr = ipv6_hdr(skb); + if (hdr->version != 6) + return 0; + + pkt_len = ntohs(hdr->payload_len); + if (pkt_len + sizeof(struct ipv6hdr) > skb->len) + return 0; + + return 1; +} + static void nft_reject_br_send_v6_tcp_reset(struct net *net, struct sk_buff *oldskb, const struct net_device *dev, diff --git a/net/core/dev.c b/net/core/dev.c index ea6312057a71..f1fe26f66458 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3355,16 +3355,6 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv) else skb_dst_force(skb); -#ifdef CONFIG_NET_SWITCHDEV - /* Don't forward if offload device already forwarded */ - if (skb->offload_fwd_mark && - skb->offload_fwd_mark == dev->offload_fwd_mark) { - consume_skb(skb); - rc = NET_XMIT_SUCCESS; - goto out; - } -#endif - txq = netdev_pick_tx(dev, skb, accel_priv); q = rcu_dereference_bh(txq->qdisc); @@ -3914,8 +3904,7 @@ static void net_tx_action(struct softirq_action *h) } } -#if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \ - (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE)) +#if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_ATM_LANE) /* This hook is defined here for ATM LANE */ int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) __read_mostly; @@ -4066,12 +4055,17 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, { #ifdef CONFIG_NETFILTER_INGRESS if (nf_hook_ingress_active(skb)) { + int ingress_retval; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; } - return nf_hook_ingress(skb); + rcu_read_lock(); + ingress_retval = nf_hook_ingress(skb); + rcu_read_unlock(); + return ingress_retval; } #endif /* CONFIG_NETFILTER_INGRESS */ return 0; @@ -4308,32 +4302,53 @@ int netif_receive_skb(struct sk_buff *skb) } EXPORT_SYMBOL(netif_receive_skb); -/* Network device is going away, flush any packets still pending - * Called with irqs disabled. - */ -static void flush_backlog(void *arg) -{ - struct net_device *dev = arg; - struct softnet_data *sd = this_cpu_ptr(&softnet_data); - struct sk_buff *skb, *tmp; +DEFINE_PER_CPU(struct work_struct, flush_works); +/* Network device is going away, flush any packets still pending */ +static void flush_backlog(struct work_struct *work) +{ + struct sk_buff *skb, *tmp; + struct softnet_data *sd; + + local_bh_disable(); + sd = this_cpu_ptr(&softnet_data); + + local_irq_disable(); rps_lock(sd); skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); kfree_skb(skb); input_queue_head_incr(sd); } } rps_unlock(sd); + local_irq_enable(); skb_queue_walk_safe(&sd->process_queue, skb, tmp) { - if (skb->dev == dev) { + if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->process_queue); kfree_skb(skb); input_queue_head_incr(sd); } } + local_bh_enable(); +} + +static void flush_all_backlogs(void) +{ + unsigned int cpu; + + get_online_cpus(); + + for_each_online_cpu(cpu) + queue_work_on(cpu, system_highpri_wq, + per_cpu_ptr(&flush_works, cpu)); + + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(&flush_works, cpu)); + + put_online_cpus(); } static int napi_gro_complete(struct sk_buff *skb) @@ -4821,8 +4836,9 @@ static bool sd_has_rps_ipi_waiting(struct softnet_data *sd) static int process_backlog(struct napi_struct *napi, int quota) { - int work = 0; struct softnet_data *sd = container_of(napi, struct softnet_data, backlog); + bool again = true; + int work = 0; /* Check if we have pending ipi, its better to send them now, * not waiting net_rx_action() end. @@ -4833,23 +4849,20 @@ static int process_backlog(struct napi_struct *napi, int quota) } napi->weight = weight_p; - local_irq_disable(); - while (1) { + while (again) { struct sk_buff *skb; while ((skb = __skb_dequeue(&sd->process_queue))) { rcu_read_lock(); - local_irq_enable(); __netif_receive_skb(skb); rcu_read_unlock(); - local_irq_disable(); input_queue_head_incr(sd); - if (++work >= quota) { - local_irq_enable(); + if (++work >= quota) return work; - } + } + local_irq_disable(); rps_lock(sd); if (skb_queue_empty(&sd->input_pkt_queue)) { /* @@ -4861,16 +4874,14 @@ static int process_backlog(struct napi_struct *napi, int quota) * and we dont need an smp_mb() memory barrier. */ napi->state = 0; - rps_unlock(sd); - - break; + again = false; + } else { + skb_queue_splice_tail_init(&sd->input_pkt_queue, + &sd->process_queue); } - - skb_queue_splice_tail_init(&sd->input_pkt_queue, - &sd->process_queue); rps_unlock(sd); + local_irq_enable(); } - local_irq_enable(); return work; } @@ -5578,6 +5589,7 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5587,7 +5599,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr++; + adj->ref_nr += ref_nr; return 0; } @@ -5597,7 +5609,7 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = 1; + adj->ref_nr = ref_nr; adj->private = private; dev_hold(adj_dev); @@ -5636,6 +5648,7 @@ free_adj: static void __netdev_adjacent_dev_remove(struct net_device *dev, struct net_device *adj_dev, + u16 ref_nr, struct list_head *dev_list) { struct netdev_adjacent *adj; @@ -5648,10 +5661,10 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, BUG(); } - if (adj->ref_nr > 1) { - pr_debug("%s to %s ref_nr-- = %d\n", dev->name, adj_dev->name, - adj->ref_nr-1); - adj->ref_nr--; + if (adj->ref_nr > ref_nr) { + pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, + ref_nr, adj->ref_nr-ref_nr); + adj->ref_nr -= ref_nr; return; } @@ -5670,21 +5683,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, - master); + ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, - false); + ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); return ret; } @@ -5692,9 +5706,10 @@ static int __netdev_adjacent_dev_link_lists(struct net_device *dev, } static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - return __netdev_adjacent_dev_link_lists(dev, upper_dev, + return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower, NULL, false); @@ -5702,17 +5717,19 @@ static int __netdev_adjacent_dev_link(struct net_device *dev, static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, + u16 ref_nr, struct list_head *up_list, struct list_head *down_list) { - __netdev_adjacent_dev_remove(dev, upper_dev, up_list); - __netdev_adjacent_dev_remove(upper_dev, dev, down_list); + __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + u16 ref_nr) { - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, &dev->all_adj_list.upper, &upper_dev->all_adj_list.lower); } @@ -5721,17 +5738,17 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev); + int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); if (ret) return ret; - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, + ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower, private, master); if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev); + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); return ret; } @@ -5741,8 +5758,8 @@ static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev); - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, + __netdev_adjacent_dev_unlink(dev, upper_dev, 1); + __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); } @@ -5795,7 +5812,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { pr_debug("Interlinking %s with %s, non-neighbour\n", i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev); + ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); if (ret) goto rollback_mesh; } @@ -5805,7 +5822,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { pr_debug("linking %s's upper device %s with %s\n", upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev); + ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); if (ret) goto rollback_upper_mesh; } @@ -5814,7 +5831,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, list_for_each_entry(i, &dev->all_adj_list.lower, list) { pr_debug("linking %s's lower device %s with %s\n", dev->name, i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev); + ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); if (ret) goto rollback_lower_mesh; } @@ -5832,7 +5849,7 @@ rollback_lower_mesh: list_for_each_entry(i, &dev->all_adj_list.lower, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); } i = NULL; @@ -5842,7 +5859,7 @@ rollback_upper_mesh: list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { if (i == to_i) break; - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); } i = j = NULL; @@ -5854,7 +5871,7 @@ rollback_mesh: list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { if (i == to_i && j == to_j) break; - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); } if (i == to_i) break; @@ -5934,16 +5951,16 @@ void netdev_upper_dev_unlink(struct net_device *dev, */ list_for_each_entry(i, &dev->all_adj_list.lower, list) list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev); + __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); /* remove also the devices itself from lower/upper device * list */ list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev); + __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev); + __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); @@ -6723,8 +6740,8 @@ static void rollback_registered_many(struct list_head *head) unlist_netdevice(dev); dev->reg_state = NETREG_UNREGISTERING; - on_each_cpu(flush_backlog, dev, 1); } + flush_all_backlogs(); synchronize_net(); @@ -7641,6 +7658,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); +#ifdef CONFIG_NET_SCHED + hash_init(dev->qdisc_hash); +#endif dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM; setup(dev); @@ -8286,8 +8306,11 @@ static int __init net_dev_init(void) */ for_each_possible_cpu(i) { + struct work_struct *flush = per_cpu_ptr(&flush_works, i); struct softnet_data *sd = &per_cpu(softnet_data, i); + INIT_WORK(flush, flush_backlog); + skb_queue_head_init(&sd->input_pkt_queue); skb_queue_head_init(&sd->process_queue); INIT_LIST_HEAD(&sd->poll_list); diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index d6b3b579560d..72cfb0c61125 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -105,7 +105,7 @@ static struct sk_buff *reset_per_cpu_data(struct per_cpu_dm_data *data) return skb; } -static struct genl_multicast_group dropmon_mcgrps[] = { +static const struct genl_multicast_group dropmon_mcgrps[] = { { .name = "events", }, }; diff --git a/net/core/filter.c b/net/core/filter.c index cb06aceb512a..00351cdf7d0c 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -94,14 +94,13 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) } EXPORT_SYMBOL(sk_filter_trim_cap); -static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_1(__skb_get_pay_offset, struct sk_buff *, skb) { - return skb_get_poff((struct sk_buff *)(unsigned long) ctx); + return skb_get_poff(skb); } -static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -120,9 +119,8 @@ static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_3(__skb_get_nlattr_nest, struct sk_buff *, skb, u32, a, u32, x) { - struct sk_buff *skb = (struct sk_buff *)(unsigned long) ctx; struct nlattr *nla; if (skb_is_nonlinear(skb)) @@ -145,7 +143,7 @@ static u64 __skb_get_nlattr_nest(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) return 0; } -static u64 __get_raw_cpu_id(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) +BPF_CALL_0(__get_raw_cpu_id) { return raw_smp_processor_id(); } @@ -233,9 +231,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, case SKF_AD_OFF + SKF_AD_HATYPE: BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, type) != 2); - BUILD_BUG_ON(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)) < 0); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), BPF_REG_TMP, BPF_REG_CTX, offsetof(struct sk_buff, dev)); /* if (tmp != 0) goto pc + 1 */ @@ -1350,17 +1347,26 @@ struct bpf_scratchpad { static DEFINE_PER_CPU(struct bpf_scratchpad, bpf_sp); +static inline int __bpf_try_make_writable(struct sk_buff *skb, + unsigned int write_len) +{ + return skb_ensure_writable(skb, write_len); +} + static inline int bpf_try_make_writable(struct sk_buff *skb, unsigned int write_len) { - int err; + int err = __bpf_try_make_writable(skb, write_len); - err = skb_ensure_writable(skb, write_len); bpf_compute_data_end(skb); - return err; } +static int bpf_try_make_head_writable(struct sk_buff *skb) +{ + return bpf_try_make_writable(skb, skb_headlen(skb)); +} + static inline void bpf_push_mac_rcsum(struct sk_buff *skb) { if (skb_at_tc_ingress(skb)) @@ -1373,12 +1379,9 @@ static inline void bpf_pull_mac_rcsum(struct sk_buff *skb) skb_postpull_rcsum(skb, skb_mac_header(skb), skb->mac_len); } -static u64 bpf_skb_store_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 flags) +BPF_CALL_5(bpf_skb_store_bytes, struct sk_buff *, skb, u32, offset, + const void *, from, u32, len, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; - void *from = (void *) (long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(flags & ~(BPF_F_RECOMPUTE_CSUM | BPF_F_INVALIDATE_HASH))) @@ -1413,12 +1416,9 @@ static const struct bpf_func_proto bpf_skb_store_bytes_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_skb_load_bytes(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_4(bpf_skb_load_bytes, const struct sk_buff *, skb, u32, offset, + void *, to, u32, len) { - const struct sk_buff *skb = (const struct sk_buff *)(unsigned long) r1; - unsigned int offset = (unsigned int) r2; - void *to = (void *)(unsigned long) r3; - unsigned int len = (unsigned int) r4; void *ptr; if (unlikely(offset > 0xffff)) @@ -1446,10 +1446,31 @@ static const struct bpf_func_proto bpf_skb_load_bytes_proto = { .arg4_type = ARG_CONST_STACK_SIZE, }; -static u64 bpf_l3_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_2(bpf_skb_pull_data, struct sk_buff *, skb, u32, len) +{ + /* Idea is the following: should the needed direct read/write + * test fail during runtime, we can pull in more data and redo + * again, since implicitly, we invalidate previous checks here. + * + * Or, since we know how much we need to make read/writeable, + * this can be done once at the program beginning for direct + * access case. By this we overcome limitations of only current + * headroom being accessible. + */ + return bpf_try_make_writable(skb, len ? : skb_headlen(skb)); +} + +static const struct bpf_func_proto bpf_skb_pull_data_proto = { + .func = bpf_skb_pull_data, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_5(bpf_l3_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_HDR_FIELD_MASK))) @@ -1491,12 +1512,11 @@ static const struct bpf_func_proto bpf_l3_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_l4_csum_replace(u64 r1, u64 r2, u64 from, u64 to, u64 flags) +BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, + u64, from, u64, to, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; - unsigned int offset = (unsigned int) r2; __sum16 *ptr; if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_PSEUDO_HDR | @@ -1544,12 +1564,11 @@ static const struct bpf_func_proto bpf_l4_csum_replace_proto = { .arg5_type = ARG_ANYTHING, }; -static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) +BPF_CALL_5(bpf_csum_diff, __be32 *, from, u32, from_size, + __be32 *, to, u32, to_size, __wsum, seed) { struct bpf_scratchpad *sp = this_cpu_ptr(&bpf_sp); - u64 diff_size = from_size + to_size; - __be32 *from = (__be32 *) (long) r1; - __be32 *to = (__be32 *) (long) r3; + u32 diff_size = from_size + to_size; int i, j = 0; /* This is quite flexible, some examples: @@ -1575,6 +1594,7 @@ static u64 bpf_csum_diff(u64 r1, u64 from_size, u64 r3, u64 to_size, u64 seed) static const struct bpf_func_proto bpf_csum_diff_proto = { .func = bpf_csum_diff, .gpl_only = false, + .pkt_access = true, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_STACK, .arg2_type = ARG_CONST_STACK_SIZE_OR_ZERO, @@ -1583,6 +1603,26 @@ static const struct bpf_func_proto bpf_csum_diff_proto = { .arg5_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_csum_update, struct sk_buff *, skb, __wsum, csum) +{ + /* The interface is to be used in combination with bpf_csum_diff() + * for direct packet writes. csum rotation for alignment as well + * as emulating csum_sub() can be done from the eBPF program. + */ + if (skb->ip_summed == CHECKSUM_COMPLETE) + return (skb->csum = csum_add(skb->csum, csum)); + + return -ENOTSUPP; +} + +static const struct bpf_func_proto bpf_csum_update_proto = { + .func = bpf_csum_update, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb) { return dev_forward_skb(dev, skb); @@ -1607,10 +1647,11 @@ static inline int __bpf_tx_skb(struct net_device *dev, struct sk_buff *skb) return ret; } -static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; struct net_device *dev; + struct sk_buff *clone; + int ret; if (unlikely(flags & ~(BPF_F_INGRESS))) return -EINVAL; @@ -1619,14 +1660,25 @@ static u64 bpf_clone_redirect(u64 r1, u64 ifindex, u64 flags, u64 r4, u64 r5) if (unlikely(!dev)) return -EINVAL; - skb = skb_clone(skb, GFP_ATOMIC); - if (unlikely(!skb)) + clone = skb_clone(skb, GFP_ATOMIC); + if (unlikely(!clone)) return -ENOMEM; - bpf_push_mac_rcsum(skb); + /* For direct write, we need to keep the invariant that the skbs + * we're dealing with need to be uncloned. Should uncloning fail + * here, we need to free the just generated clone to unclone once + * again. + */ + ret = bpf_try_make_head_writable(skb); + if (unlikely(ret)) { + kfree_skb(clone); + return -ENOMEM; + } + + bpf_push_mac_rcsum(clone); return flags & BPF_F_INGRESS ? - __bpf_rx_skb(dev, skb) : __bpf_tx_skb(dev, skb); + __bpf_rx_skb(dev, clone) : __bpf_tx_skb(dev, clone); } static const struct bpf_func_proto bpf_clone_redirect_proto = { @@ -1645,7 +1697,7 @@ struct redirect_info { static DEFINE_PER_CPU(struct redirect_info, redirect_info); -static u64 bpf_redirect(u64 ifindex, u64 flags, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_redirect, u32, ifindex, u64, flags) { struct redirect_info *ri = this_cpu_ptr(&redirect_info); @@ -1684,9 +1736,9 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; -static u64 bpf_get_cgroup_classid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { - return task_get_classid((struct sk_buff *) (unsigned long) r1); + return task_get_classid(skb); } static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { @@ -1696,9 +1748,9 @@ static const struct bpf_func_proto bpf_get_cgroup_classid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_route_realm(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_route_realm, const struct sk_buff *, skb) { - return dst_tclassid((struct sk_buff *) (unsigned long) r1); + return dst_tclassid(skb); } static const struct bpf_func_proto bpf_get_route_realm_proto = { @@ -1708,14 +1760,14 @@ static const struct bpf_func_proto bpf_get_route_realm_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_get_hash_recalc(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_get_hash_recalc, struct sk_buff *, skb) { /* If skb_clear_hash() was called due to mangling, we can * trigger SW recalculation here. Later access to hash * can then use the inline skb->hash via context directly * instead of calling this helper again. */ - return skb_get_hash((struct sk_buff *) (unsigned long) r1); + return skb_get_hash(skb); } static const struct bpf_func_proto bpf_get_hash_recalc_proto = { @@ -1725,10 +1777,25 @@ static const struct bpf_func_proto bpf_get_hash_recalc_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static u64 bpf_skb_vlan_push(u64 r1, u64 r2, u64 vlan_tci, u64 r4, u64 r5) +BPF_CALL_1(bpf_set_hash_invalid, struct sk_buff *, skb) +{ + /* After all direct packet write, this can be used once for + * triggering a lazy recalc on next skb_get_hash() invocation. + */ + skb_clear_hash(skb); + return 0; +} + +static const struct bpf_func_proto bpf_set_hash_invalid_proto = { + .func = bpf_set_hash_invalid, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +}; + +BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, + u16, vlan_tci) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 vlan_proto = (__force __be16) r2; int ret; if (unlikely(vlan_proto != htons(ETH_P_8021Q) && @@ -1753,9 +1820,8 @@ const struct bpf_func_proto bpf_skb_vlan_push_proto = { }; EXPORT_SYMBOL_GPL(bpf_skb_vlan_push_proto); -static u64 bpf_skb_vlan_pop(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; int ret; bpf_push_mac_rcsum(skb); @@ -1930,10 +1996,9 @@ static int bpf_skb_proto_xlat(struct sk_buff *skb, __be16 to_proto) return -ENOTSUPP; } -static u64 bpf_skb_change_proto(u64 r1, u64 r2, u64 flags, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, + u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - __be16 proto = (__force __be16) r2; int ret; if (unlikely(flags)) @@ -1970,14 +2035,11 @@ static const struct bpf_func_proto bpf_skb_change_proto_proto = { .arg3_type = ARG_ANYTHING, }; -static u64 bpf_skb_change_type(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_2(bpf_skb_change_type, struct sk_buff *, skb, u32, pkt_type) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u32 pkt_type = r2; - /* We only allow a restricted subset to be changed for now. */ - if (unlikely(skb->pkt_type > PACKET_OTHERHOST || - pkt_type > PACKET_OTHERHOST)) + if (unlikely(!skb_pkt_type_ok(skb->pkt_type) || + !skb_pkt_type_ok(pkt_type))) return -EINVAL; skb->pkt_type = pkt_type; @@ -1992,19 +2054,100 @@ static const struct bpf_func_proto bpf_skb_change_type_proto = { .arg2_type = ARG_ANYTHING, }; +static u32 __bpf_skb_min_len(const struct sk_buff *skb) +{ + u32 min_len = skb_network_offset(skb); + + if (skb_transport_header_was_set(skb)) + min_len = skb_transport_offset(skb); + if (skb->ip_summed == CHECKSUM_PARTIAL) + min_len = skb_checksum_start_offset(skb) + + skb->csum_offset + sizeof(__sum16); + return min_len; +} + +static u32 __bpf_skb_max_len(const struct sk_buff *skb) +{ + return skb->dev->mtu + skb->dev->hard_header_len; +} + +static int bpf_skb_grow_rcsum(struct sk_buff *skb, unsigned int new_len) +{ + unsigned int old_len = skb->len; + int ret; + + ret = __skb_grow_rcsum(skb, new_len); + if (!ret) + memset(skb->data + old_len, 0, new_len - old_len); + return ret; +} + +static int bpf_skb_trim_rcsum(struct sk_buff *skb, unsigned int new_len) +{ + return __skb_trim_rcsum(skb, new_len); +} + +BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, + u64, flags) +{ + u32 max_len = __bpf_skb_max_len(skb); + u32 min_len = __bpf_skb_min_len(skb); + int ret; + + if (unlikely(flags || new_len > max_len || new_len < min_len)) + return -EINVAL; + if (skb->encapsulation) + return -ENOTSUPP; + + /* The basic idea of this helper is that it's performing the + * needed work to either grow or trim an skb, and eBPF program + * rewrites the rest via helpers like bpf_skb_store_bytes(), + * bpf_lX_csum_replace() and others rather than passing a raw + * buffer here. This one is a slow path helper and intended + * for replies with control messages. + * + * Like in bpf_skb_change_proto(), we want to keep this rather + * minimal and without protocol specifics so that we are able + * to separate concerns as in bpf_skb_store_bytes() should only + * be the one responsible for writing buffers. + * + * It's really expected to be a slow path operation here for + * control message replies, so we're implicitly linearizing, + * uncloning and drop offloads from the skb by this. + */ + ret = __bpf_try_make_writable(skb, skb->len); + if (!ret) { + if (new_len > skb->len) + ret = bpf_skb_grow_rcsum(skb, new_len); + else if (new_len < skb->len) + ret = bpf_skb_trim_rcsum(skb, new_len); + if (!ret && skb_is_gso(skb)) + skb_gso_reset(skb); + } + + bpf_compute_data_end(skb); + return ret; +} + +static const struct bpf_func_proto bpf_skb_change_tail_proto = { + .func = bpf_skb_change_tail, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_skb_data(void *func) { - if (func == bpf_skb_vlan_push) - return true; - if (func == bpf_skb_vlan_pop) - return true; - if (func == bpf_skb_store_bytes) - return true; - if (func == bpf_skb_change_proto) - return true; - if (func == bpf_l3_csum_replace) - return true; - if (func == bpf_l4_csum_replace) + if (func == bpf_skb_vlan_push || + func == bpf_skb_vlan_pop || + func == bpf_skb_store_bytes || + func == bpf_skb_change_proto || + func == bpf_skb_change_tail || + func == bpf_skb_pull_data || + func == bpf_l3_csum_replace || + func == bpf_l4_csum_replace) return true; return false; @@ -2023,13 +2166,10 @@ static unsigned long bpf_skb_copy(void *dst_buff, const void *skb, return 0; } -static u64 bpf_skb_event_output(u64 r1, u64 r2, u64 flags, u64 r4, - u64 meta_size) +BPF_CALL_5(bpf_skb_event_output, struct sk_buff *, skb, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) { - struct sk_buff *skb = (struct sk_buff *)(long) r1; - struct bpf_map *map = (struct bpf_map *)(long) r2; u64 skb_size = (flags & BPF_F_CTXLEN_MASK) >> 32; - void *meta = (void *)(long) r4; if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) return -EINVAL; @@ -2056,10 +2196,9 @@ static unsigned short bpf_tunnel_key_af(u64 flags) return flags & BPF_F_TUNINFO_IPV6 ? AF_INET6 : AF_INET; } -static u64 bpf_skb_get_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key *, to, + u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *to = (struct bpf_tunnel_key *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); u8 compat[sizeof(struct bpf_tunnel_key)]; void *to_orig = to; @@ -2124,10 +2263,8 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_get_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_get_tunnel_opt, struct sk_buff *, skb, u8 *, to, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *to = (u8 *) (long) r2; const struct ip_tunnel_info *info = skb_tunnel_info(skb); int err; @@ -2162,10 +2299,9 @@ static const struct bpf_func_proto bpf_skb_get_tunnel_opt_proto = { static struct metadata_dst __percpu *md_dst; -static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) +BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb, + const struct bpf_tunnel_key *, from, u32, size, u64, flags) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - struct bpf_tunnel_key *from = (struct bpf_tunnel_key *) (long) r2; struct metadata_dst *md = this_cpu_ptr(md_dst); u8 compat[sizeof(struct bpf_tunnel_key)]; struct ip_tunnel_info *info; @@ -2183,7 +2319,7 @@ static u64 bpf_skb_set_tunnel_key(u64 r1, u64 r2, u64 size, u64 flags, u64 r5) */ memcpy(compat, from, size); memset(compat + size, 0, sizeof(compat) - size); - from = (struct bpf_tunnel_key *)compat; + from = (const struct bpf_tunnel_key *) compat; break; default: return -EINVAL; @@ -2233,10 +2369,9 @@ static const struct bpf_func_proto bpf_skb_set_tunnel_key_proto = { .arg4_type = ARG_ANYTHING, }; -static u64 bpf_skb_set_tunnel_opt(u64 r1, u64 r2, u64 size, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_set_tunnel_opt, struct sk_buff *, skb, + const u8 *, from, u32, size) { - struct sk_buff *skb = (struct sk_buff *) (long) r1; - u8 *from = (u8 *) (long) r2; struct ip_tunnel_info *info = skb_tunnel_info(skb); const struct metadata_dst *md = this_cpu_ptr(md_dst); @@ -2282,28 +2417,24 @@ bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) } } -#ifdef CONFIG_SOCK_CGROUP_DATA -static u64 bpf_skb_under_cgroup(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5) +BPF_CALL_3(bpf_skb_under_cgroup, struct sk_buff *, skb, struct bpf_map *, map, + u32, idx) { - struct sk_buff *skb = (struct sk_buff *)(long)r1; - struct bpf_map *map = (struct bpf_map *)(long)r2; struct bpf_array *array = container_of(map, struct bpf_array, map); struct cgroup *cgrp; struct sock *sk; - u32 i = (u32)r3; - sk = skb->sk; + sk = skb_to_full_sk(skb); if (!sk || !sk_fullsock(sk)) return -ENOENT; - - if (unlikely(i >= array->map.max_entries)) + if (unlikely(idx >= array->map.max_entries)) return -E2BIG; - cgrp = READ_ONCE(array->ptrs[i]); + cgrp = READ_ONCE(array->ptrs[idx]); if (unlikely(!cgrp)) return -EAGAIN; - return cgroup_is_descendant(sock_cgroup_ptr(&sk->sk_cgrp_data), cgrp); + return sk_under_cgroup_hierarchy(sk, cgrp); } static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { @@ -2314,7 +2445,38 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = { .arg2_type = ARG_CONST_MAP_PTR, .arg3_type = ARG_ANYTHING, }; -#endif + +static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff, + unsigned long off, unsigned long len) +{ + memcpy(dst_buff, src_buff + off, len); + return 0; +} + +BPF_CALL_5(bpf_xdp_event_output, struct xdp_buff *, xdp, struct bpf_map *, map, + u64, flags, void *, meta, u64, meta_size) +{ + u64 xdp_size = (flags & BPF_F_CTXLEN_MASK) >> 32; + + if (unlikely(flags & ~(BPF_F_CTXLEN_MASK | BPF_F_INDEX_MASK))) + return -EINVAL; + if (unlikely(xdp_size > (unsigned long)(xdp->data_end - xdp->data))) + return -EFAULT; + + return bpf_event_output(map, flags, meta, meta_size, xdp, xdp_size, + bpf_xdp_copy); +} + +static const struct bpf_func_proto bpf_xdp_event_output_proto = { + .func = bpf_xdp_event_output, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_STACK, + .arg5_type = ARG_CONST_STACK_SIZE, +}; static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) @@ -2350,8 +2512,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_store_bytes_proto; case BPF_FUNC_skb_load_bytes: return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; case BPF_FUNC_csum_diff: return &bpf_csum_diff_proto; + case BPF_FUNC_csum_update: + return &bpf_csum_update_proto; case BPF_FUNC_l3_csum_replace: return &bpf_l3_csum_replace_proto; case BPF_FUNC_l4_csum_replace: @@ -2368,6 +2534,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_skb_change_proto_proto; case BPF_FUNC_skb_change_type: return &bpf_skb_change_type_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; case BPF_FUNC_skb_get_tunnel_key: return &bpf_skb_get_tunnel_key_proto; case BPF_FUNC_skb_set_tunnel_key: @@ -2382,14 +2550,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) return &bpf_get_route_realm_proto; case BPF_FUNC_get_hash_recalc: return &bpf_get_hash_recalc_proto; + case BPF_FUNC_set_hash_invalid: + return &bpf_set_hash_invalid_proto; case BPF_FUNC_perf_event_output: return &bpf_skb_event_output_proto; case BPF_FUNC_get_smp_processor_id: return &bpf_get_smp_processor_id_proto; -#ifdef CONFIG_SOCK_CGROUP_DATA case BPF_FUNC_skb_under_cgroup: return &bpf_skb_under_cgroup_proto; -#endif default: return sk_filter_func_proto(func_id); } @@ -2398,7 +2566,14 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) static const struct bpf_func_proto * xdp_func_proto(enum bpf_func_id func_id) { - return sk_filter_func_proto(func_id); + switch (func_id) { + case BPF_FUNC_perf_event_output: + return &bpf_xdp_event_output_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_smp_processor_id_proto; + default: + return sk_filter_func_proto(func_id); + } } static bool __is_valid_access(int off, int size, enum bpf_access_type type) @@ -2438,6 +2613,45 @@ static bool sk_filter_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + if (!direct_write) + return 0; + + /* if (!skb->cloned) + * goto start; + * + * (Fast-path, otherwise approximation that we might be + * a clone, do the rest in helper.) + */ + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_6, BPF_REG_1, CLONED_OFFSET()); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_6, CLONED_MASK); + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 7); + + /* ret = bpf_skb_pull_data(skb, 0); */ + *insn++ = BPF_MOV64_REG(BPF_REG_6, BPF_REG_1); + *insn++ = BPF_ALU64_REG(BPF_XOR, BPF_REG_2, BPF_REG_2); + *insn++ = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_skb_pull_data); + /* if (!ret) + * goto restore; + * return TC_ACT_SHOT; + */ + *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); + *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT); + *insn++ = BPF_EXIT_INSN(); + + /* restore: */ + *insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6); + /* start: */ + *insn++ = prog->insnsi[0]; + + return insn - insn_buf; +} + static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, enum bpf_reg_type *reg_type) @@ -2475,7 +2689,7 @@ static bool __is_valid_xdp_access(int off, int size, return false; if (off % size != 0) return false; - if (size != 4) + if (size != sizeof(__u32)) return false; return true; @@ -2506,10 +2720,10 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); -static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, - int src_reg, int ctx_off, - struct bpf_insn *insn_buf, - struct bpf_prog *prog) +static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) { struct bpf_insn *insn = insn_buf; @@ -2556,7 +2770,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, case offsetof(struct __sk_buff, ifindex): BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, dev)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), dst_reg, src_reg, offsetof(struct sk_buff, dev)); *insn++ = BPF_JMP_IMM(BPF_JEQ, dst_reg, 0, 1); @@ -2597,7 +2811,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, dst_reg, src_reg, insn); case offsetof(struct __sk_buff, cb[0]) ... - offsetof(struct __sk_buff, cb[4]): + offsetof(struct __sk_buff, cb[4]): BUILD_BUG_ON(FIELD_SIZEOF(struct qdisc_skb_cb, data) < 20); prog->cb_access = 1; @@ -2621,7 +2835,7 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, break; case offsetof(struct __sk_buff, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct sk_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, data), dst_reg, src_reg, offsetof(struct sk_buff, data)); break; @@ -2630,8 +2844,8 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, ctx_off -= offsetof(struct __sk_buff, data_end); ctx_off += offsetof(struct sk_buff, cb); ctx_off += offsetof(struct bpf_skb_data_end, data_end); - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(sizeof(void *)), - dst_reg, src_reg, ctx_off); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), dst_reg, src_reg, + ctx_off); break; case offsetof(struct __sk_buff, tc_index): @@ -2657,6 +2871,31 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct __sk_buff, ifindex): + BUILD_BUG_ON(FIELD_SIZEOF(struct net_device, ifindex) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, dev), + dst_reg, src_reg, + offsetof(struct sk_buff, dev)); + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, dst_reg, + offsetof(struct net_device, ifindex)); + break; + default: + return sk_filter_convert_ctx_access(type, dst_reg, src_reg, + ctx_off, insn_buf, prog); + } + + return insn - insn_buf; +} + static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -2666,12 +2905,12 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, switch (ctx_off) { case offsetof(struct xdp_md, data): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data), dst_reg, src_reg, offsetof(struct xdp_buff, data)); break; case offsetof(struct xdp_md, data_end): - *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)), + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), dst_reg, src_reg, offsetof(struct xdp_buff, data_end)); break; @@ -2683,13 +2922,14 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = sk_filter_convert_ctx_access, }; static const struct bpf_verifier_ops tc_cls_act_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, - .convert_ctx_access = bpf_net_convert_ctx_access, + .convert_ctx_access = tc_cls_act_convert_ctx_access, + .gen_prologue = tc_cls_act_prologue, }; static const struct bpf_verifier_ops xdp_ops = { diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 52742a02814f..1a7b80f73376 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include #include @@ -116,13 +118,16 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_addrs *key_addrs; struct flow_dissector_key_ports *key_ports; struct flow_dissector_key_tags *key_tags; + struct flow_dissector_key_vlan *key_vlan; struct flow_dissector_key_keyid *key_keyid; + bool skip_vlan = false; u8 ip_proto = 0; bool ret = false; if (!data) { data = skb->data; - proto = skb->protocol; + proto = skb_vlan_tag_present(skb) ? + skb->vlan_proto : skb->protocol; nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); } @@ -241,23 +246,45 @@ ipv6: case htons(ETH_P_8021AD): case htons(ETH_P_8021Q): { const struct vlan_hdr *vlan; - struct vlan_hdr _vlan; - vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); - if (!vlan) - goto out_bad; + if (skb_vlan_tag_present(skb)) + proto = skb->protocol; - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID)) { - key_tags = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_VLANID, - target_container); + if (!skb_vlan_tag_present(skb) || + proto == cpu_to_be16(ETH_P_8021Q) || + proto == cpu_to_be16(ETH_P_8021AD)) { + struct vlan_hdr _vlan; - key_tags->vlan_id = skb_vlan_tag_get_id(skb); + vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), + data, hlen, &_vlan); + if (!vlan) + goto out_bad; + proto = vlan->h_vlan_encapsulated_proto; + nhoff += sizeof(*vlan); + if (skip_vlan) + goto again; + } + + skip_vlan = true; + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_VLAN)) { + key_vlan = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_VLAN, + target_container); + + if (skb_vlan_tag_present(skb)) { + key_vlan->vlan_id = skb_vlan_tag_get_id(skb); + key_vlan->vlan_priority = + (skb_vlan_tag_get_prio(skb) >> VLAN_PRIO_SHIFT); + } else { + key_vlan->vlan_id = ntohs(vlan->h_vlan_TCI) & + VLAN_VID_MASK; + key_vlan->vlan_priority = + (ntohs(vlan->h_vlan_TCI) & + VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT; + } } - proto = vlan->h_vlan_encapsulated_proto; - nhoff += sizeof(*vlan); goto again; } case htons(ETH_P_PPP_SES): { @@ -338,32 +365,42 @@ mpls: ip_proto_again: switch (ip_proto) { case IPPROTO_GRE: { - struct gre_hdr { - __be16 flags; - __be16 proto; - } *hdr, _hdr; + struct gre_base_hdr *hdr, _hdr; + u16 gre_ver; + int offset = 0; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); if (!hdr) goto out_bad; - /* - * Only look inside GRE if version zero and no - * routing - */ - if (hdr->flags & (GRE_VERSION | GRE_ROUTING)) + + /* Only look inside GRE without routing */ + if (hdr->flags & GRE_ROUTING) break; - proto = hdr->proto; - nhoff += 4; + /* Only look inside GRE for version 0 and 1 */ + gre_ver = ntohs(hdr->flags & GRE_VERSION); + if (gre_ver > 1) + break; + + proto = hdr->protocol; + if (gre_ver) { + /* Version1 must be PPTP, and check the flags */ + if (!(proto == GRE_PROTO_PPP && (hdr->flags & GRE_KEY))) + break; + } + + offset += sizeof(struct gre_base_hdr); + if (hdr->flags & GRE_CSUM) - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->csum) + + sizeof(((struct gre_full_hdr *)0)->reserved1); + if (hdr->flags & GRE_KEY) { const __be32 *keyid; __be32 _keyid; - keyid = __skb_header_pointer(skb, nhoff, sizeof(_keyid), + keyid = __skb_header_pointer(skb, nhoff + offset, sizeof(_keyid), data, hlen, &_keyid); - if (!keyid) goto out_bad; @@ -372,32 +409,65 @@ ip_proto_again: key_keyid = skb_flow_dissector_target(flow_dissector, FLOW_DISSECTOR_KEY_GRE_KEYID, target_container); - key_keyid->keyid = *keyid; + if (gre_ver == 0) + key_keyid->keyid = *keyid; + else + key_keyid->keyid = *keyid & GRE_PPTP_KEY_MASK; } - nhoff += 4; + offset += sizeof(((struct gre_full_hdr *)0)->key); } + if (hdr->flags & GRE_SEQ) - nhoff += 4; - if (proto == htons(ETH_P_TEB)) { - const struct ethhdr *eth; - struct ethhdr _eth; + offset += sizeof(((struct pptp_gre_header *)0)->seq); - eth = __skb_header_pointer(skb, nhoff, - sizeof(_eth), - data, hlen, &_eth); - if (!eth) + if (gre_ver == 0) { + if (proto == htons(ETH_P_TEB)) { + const struct ethhdr *eth; + struct ethhdr _eth; + + eth = __skb_header_pointer(skb, nhoff + offset, + sizeof(_eth), + data, hlen, &_eth); + if (!eth) + goto out_bad; + proto = eth->h_proto; + offset += sizeof(*eth); + + /* Cap headers that we access via pointers at the + * end of the Ethernet header as our maximum alignment + * at that point is only 2 bytes. + */ + if (NET_IP_ALIGN) + hlen = (nhoff + offset); + } + } else { /* version 1, must be PPTP */ + u8 _ppp_hdr[PPP_HDRLEN]; + u8 *ppp_hdr; + + if (hdr->flags & GRE_ACK) + offset += sizeof(((struct pptp_gre_header *)0)->ack); + + ppp_hdr = skb_header_pointer(skb, nhoff + offset, + sizeof(_ppp_hdr), _ppp_hdr); + if (!ppp_hdr) goto out_bad; - proto = eth->h_proto; - nhoff += sizeof(*eth); - /* Cap headers that we access via pointers at the - * end of the Ethernet header as our maximum alignment - * at that point is only 2 bytes. - */ - if (NET_IP_ALIGN) - hlen = nhoff; + switch (PPP_PROTOCOL(ppp_hdr)) { + case PPP_IP: + proto = htons(ETH_P_IP); + break; + case PPP_IPV6: + proto = htons(ETH_P_IPV6); + break; + default: + /* Could probably catch some more like MPLS */ + break; + } + + offset += PPP_HDRLEN; } + nhoff += offset; key_control->flags |= FLOW_DIS_ENCAPSULATION; if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) goto out_good; @@ -874,8 +944,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, ports), }, { - .key_id = FLOW_DISSECTOR_KEY_VLANID, - .offset = offsetof(struct flow_keys, tags), + .key_id = FLOW_DISSECTOR_KEY_VLAN, + .offset = offsetof(struct flow_keys, vlan), }, { .key_id = FLOW_DISSECTOR_KEY_FLOW_LABEL, diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 669ecc9f884e..e5f84c26ba1a 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -251,6 +251,41 @@ drop: } EXPORT_SYMBOL(lwtunnel_output); +int lwtunnel_xmit(struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct lwtunnel_encap_ops *ops; + struct lwtunnel_state *lwtstate; + int ret = -EINVAL; + + if (!dst) + goto drop; + + lwtstate = dst->lwtstate; + + if (lwtstate->type == LWTUNNEL_ENCAP_NONE || + lwtstate->type > LWTUNNEL_ENCAP_MAX) + return 0; + + ret = -EOPNOTSUPP; + rcu_read_lock(); + ops = rcu_dereference(lwtun_encaps[lwtstate->type]); + if (likely(ops && ops->xmit)) + ret = ops->xmit(skb); + rcu_read_unlock(); + + if (ret == -EOPNOTSUPP) + goto drop; + + return ret; + +drop: + kfree_skb(skb); + + return ret; +} +EXPORT_SYMBOL(lwtunnel_xmit); + int lwtunnel_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index cf26e04c4046..2ae929f9bd06 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1148,7 +1148,8 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } else goto out; } else { - if (lladdr == neigh->ha && new == NUD_STALE) + if (lladdr == neigh->ha && new == NUD_STALE && + !(flags & NEIGH_UPDATE_F_ADMIN)) new = old; } } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 2c2eb1b629b1..42bdda0e616b 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -37,6 +37,8 @@ struct net init_net = { }; EXPORT_SYMBOL(init_net); +static bool init_net_initialized; + #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; @@ -213,31 +215,29 @@ static void rtnl_net_notifyid(struct net *net, int cmd, int id); */ int peernet2id_alloc(struct net *net, struct net *peer) { - unsigned long flags; bool alloc; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); alloc = atomic_read(&peer->count) == 0 ? false : true; id = __peernet2id_alloc(net, peer, &alloc); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (alloc && id >= 0) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } -EXPORT_SYMBOL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) { - unsigned long flags; int id; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); id = __peernet2id(net, peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); return id; } +EXPORT_SYMBOL(peernet2id); /* This function returns true is the peer netns has an id assigned into the * current netns. @@ -249,18 +249,17 @@ bool peernet_has_id(struct net *net, struct net *peer) struct net *get_net_ns_by_id(struct net *net, int id) { - unsigned long flags; struct net *peer; if (id < 0) return NULL; rcu_read_lock(); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); peer = idr_find(&net->netns_ids, id); if (peer) get_net(peer); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); rcu_read_unlock(); return peer; @@ -404,17 +403,17 @@ static void cleanup_net(struct work_struct *work) for_each_net(tmp) { int id; - spin_lock_irq(&tmp->nsid_lock); + spin_lock_bh(&tmp->nsid_lock); id = __peernet2id(tmp, net); if (id >= 0) idr_remove(&tmp->netns_ids, id); - spin_unlock_irq(&tmp->nsid_lock); + spin_unlock_bh(&tmp->nsid_lock); if (id >= 0) rtnl_net_notifyid(tmp, RTM_DELNSID, id); } - spin_lock_irq(&net->nsid_lock); + spin_lock_bh(&net->nsid_lock); idr_destroy(&net->netns_ids); - spin_unlock_irq(&net->nsid_lock); + spin_unlock_bh(&net->nsid_lock); } rtnl_unlock(); @@ -531,7 +530,7 @@ static struct pernet_operations __net_initdata net_ns_ops = { .exit = net_ns_net_exit, }; -static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { +static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { [NETNSA_NONE] = { .type = NLA_UNSPEC }, [NETNSA_NSID] = { .type = NLA_S32 }, [NETNSA_PID] = { .type = NLA_U32 }, @@ -542,7 +541,6 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); struct nlattr *tb[NETNSA_MAX + 1]; - unsigned long flags; struct net *peer; int nsid, err; @@ -563,15 +561,15 @@ static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) if (IS_ERR(peer)) return PTR_ERR(peer); - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); if (__peernet2id(net, peer) >= 0) { - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); err = -EEXIST; goto out; } err = alloc_netid(net, peer, nsid); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); if (err >= 0) { rtnl_net_notifyid(net, RTM_NEWNSID, err); err = 0; @@ -693,11 +691,10 @@ static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) .idx = 0, .s_idx = cb->args[0], }; - unsigned long flags; - spin_lock_irqsave(&net->nsid_lock, flags); + spin_lock_bh(&net->nsid_lock); idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); - spin_unlock_irqrestore(&net->nsid_lock, flags); + spin_unlock_bh(&net->nsid_lock); cb->args[0] = net_cb.idx; return skb->len; @@ -750,6 +747,8 @@ static int __init net_ns_init(void) if (setup_net(&init_net, &init_user_ns)) panic("Could not setup the initial network namespace"); + init_net_initialized = true; + rtnl_lock(); list_add_tail_rcu(&init_net.list, &net_namespace_list); rtnl_unlock(); @@ -811,15 +810,24 @@ static void __unregister_pernet_operations(struct pernet_operations *ops) static int __register_pernet_operations(struct list_head *list, struct pernet_operations *ops) { + if (!init_net_initialized) { + list_add_tail(&ops->list, list); + return 0; + } + return ops_init(ops, &init_net); } static void __unregister_pernet_operations(struct pernet_operations *ops) { - LIST_HEAD(net_exit_list); - list_add(&init_net.exit_list, &net_exit_list); - ops_exit_list(ops, &net_exit_list); - ops_free_list(ops, &net_exit_list); + if (!init_net_initialized) { + list_del(&ops->list); + } else { + LIST_HEAD(net_exit_list); + list_add(&init_net.exit_list, &net_exit_list); + ops_exit_list(ops, &net_exit_list); + ops_free_list(ops, &net_exit_list); + } } #endif /* CONFIG_NET_NS */ diff --git a/net/core/pktgen.c b/net/core/pktgen.c index bbd118b19aef..5219a9e2127a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2286,7 +2286,7 @@ out: static inline void set_pkt_overhead(struct pktgen_dev *pkt_dev) { - pkt_dev->pkt_overhead = LL_RESERVED_SPACE(pkt_dev->odev); + pkt_dev->pkt_overhead = 0; pkt_dev->pkt_overhead += pkt_dev->nr_labels*sizeof(u32); pkt_dev->pkt_overhead += VLAN_TAG_SIZE(pkt_dev); pkt_dev->pkt_overhead += SVLAN_TAG_SIZE(pkt_dev); @@ -2777,13 +2777,13 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, } static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, - struct pktgen_dev *pkt_dev, - unsigned int extralen) + struct pktgen_dev *pkt_dev) { + unsigned int extralen = LL_RESERVED_SPACE(dev); struct sk_buff *skb = NULL; - unsigned int size = pkt_dev->cur_pkt_size + 64 + extralen + - pkt_dev->pkt_overhead; + unsigned int size; + size = pkt_dev->cur_pkt_size + 64 + extralen + pkt_dev->pkt_overhead; if (pkt_dev->flags & F_NODE) { int node = pkt_dev->node >= 0 ? pkt_dev->node : numa_node_id(); @@ -2796,8 +2796,9 @@ static struct sk_buff *pktgen_alloc_skb(struct net_device *dev, skb = __netdev_alloc_skb(dev, size, GFP_NOWAIT); } + /* the caller pre-fetches from skb->data and reserves for the mac hdr */ if (likely(skb)) - skb_reserve(skb, LL_RESERVED_SPACE(dev)); + skb_reserve(skb, extralen - 16); return skb; } @@ -2830,16 +2831,14 @@ static struct sk_buff *fill_packet_ipv4(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - datalen = (odev->hard_header_len + 16) & ~0xf; - - skb = pktgen_alloc_skb(odev, pkt_dev, datalen); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; } prefetchw(skb->data); - skb_reserve(skb, datalen); + skb_reserve(skb, 16); /* Reserve for ethernet and IP header */ eth = (__u8 *) skb_push(skb, 14); @@ -2959,7 +2958,7 @@ static struct sk_buff *fill_packet_ipv6(struct net_device *odev, mod_cur_headers(pkt_dev); queue_map = pkt_dev->cur_queue_map; - skb = pktgen_alloc_skb(odev, pkt_dev, 16); + skb = pktgen_alloc_skb(odev, pkt_dev); if (!skb) { sprintf(pkt_dev->result, "No memory"); return NULL; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 189cc78c77eb..b06d2f46b83e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -704,6 +704,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) } else if (i == RTAX_FEATURES - 1) { u32 user_features = metrics[i] & RTAX_FEATURE_MASK; + if (!user_features) + continue; BUILD_BUG_ON(RTAX_FEATURE_MASK & DST_FEATURE_MASK); if (nla_put_u32(skb, i + 1, user_features)) goto nla_put_failure; @@ -841,7 +843,10 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev, size += nla_total_size(num_vfs * sizeof(struct nlattr)); size += num_vfs * (nla_total_size(sizeof(struct ifla_vf_mac)) + - nla_total_size(sizeof(struct ifla_vf_vlan)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct nlattr)) + + nla_total_size(MAX_VLAN_LIST_LEN * + sizeof(struct ifla_vf_vlan_info)) + nla_total_size(sizeof(struct ifla_vf_spoofchk)) + nla_total_size(sizeof(struct ifla_vf_rate)) + nla_total_size(sizeof(struct ifla_vf_link_state)) + @@ -1109,14 +1114,15 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, struct nlattr *vfinfo) { struct ifla_vf_rss_query_en vf_rss_query_en; + struct nlattr *vf, *vfstats, *vfvlanlist; struct ifla_vf_link_state vf_linkstate; + struct ifla_vf_vlan_info vf_vlan_info; struct ifla_vf_spoofchk vf_spoofchk; struct ifla_vf_tx_rate vf_tx_rate; struct ifla_vf_stats vf_stats; struct ifla_vf_trust vf_trust; struct ifla_vf_vlan vf_vlan; struct ifla_vf_rate vf_rate; - struct nlattr *vf, *vfstats; struct ifla_vf_mac vf_mac; struct ifla_vf_info ivi; @@ -1133,11 +1139,14 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, * IFLA_VF_LINK_STATE_AUTO which equals zero */ ivi.linkstate = 0; + /* VLAN Protocol by default is 802.1Q */ + ivi.vlan_proto = htons(ETH_P_8021Q); if (dev->netdev_ops->ndo_get_vf_config(dev, vfs_num, &ivi)) return 0; vf_mac.vf = vf_vlan.vf = + vf_vlan_info.vf = vf_rate.vf = vf_tx_rate.vf = vf_spoofchk.vf = @@ -1148,6 +1157,9 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac)); vf_vlan.vlan = ivi.vlan; vf_vlan.qos = ivi.qos; + vf_vlan_info.vlan = ivi.vlan; + vf_vlan_info.qos = ivi.qos; + vf_vlan_info.vlan_proto = ivi.vlan_proto; vf_tx_rate.rate = ivi.max_tx_rate; vf_rate.min_tx_rate = ivi.min_tx_rate; vf_rate.max_tx_rate = ivi.max_tx_rate; @@ -1156,10 +1168,8 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, vf_rss_query_en.setting = ivi.rss_query_en; vf_trust.setting = ivi.trusted; vf = nla_nest_start(skb, IFLA_VF_INFO); - if (!vf) { - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vf) + goto nla_put_vfinfo_failure; if (nla_put(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac) || nla_put(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan) || nla_put(skb, IFLA_VF_RATE, sizeof(vf_rate), @@ -1175,17 +1185,23 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, &vf_rss_query_en) || nla_put(skb, IFLA_VF_TRUST, sizeof(vf_trust), &vf_trust)) - return -EMSGSIZE; + goto nla_put_vf_failure; + vfvlanlist = nla_nest_start(skb, IFLA_VF_VLAN_LIST); + if (!vfvlanlist) + goto nla_put_vf_failure; + if (nla_put(skb, IFLA_VF_VLAN_INFO, sizeof(vf_vlan_info), + &vf_vlan_info)) { + nla_nest_cancel(skb, vfvlanlist); + goto nla_put_vf_failure; + } + nla_nest_end(skb, vfvlanlist); memset(&vf_stats, 0, sizeof(vf_stats)); if (dev->netdev_ops->ndo_get_vf_stats) dev->netdev_ops->ndo_get_vf_stats(dev, vfs_num, &vf_stats); vfstats = nla_nest_start(skb, IFLA_VF_STATS); - if (!vfstats) { - nla_nest_cancel(skb, vf); - nla_nest_cancel(skb, vfinfo); - return -EMSGSIZE; - } + if (!vfstats) + goto nla_put_vf_failure; if (nla_put_u64_64bit(skb, IFLA_VF_STATS_RX_PACKETS, vf_stats.rx_packets, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_TX_PACKETS, @@ -1197,11 +1213,19 @@ static noinline_for_stack int rtnl_fill_vfinfo(struct sk_buff *skb, nla_put_u64_64bit(skb, IFLA_VF_STATS_BROADCAST, vf_stats.broadcast, IFLA_VF_STATS_PAD) || nla_put_u64_64bit(skb, IFLA_VF_STATS_MULTICAST, - vf_stats.multicast, IFLA_VF_STATS_PAD)) - return -EMSGSIZE; + vf_stats.multicast, IFLA_VF_STATS_PAD)) { + nla_nest_cancel(skb, vfstats); + goto nla_put_vf_failure; + } nla_nest_end(skb, vfstats); nla_nest_end(skb, vf); return 0; + +nla_put_vf_failure: + nla_nest_cancel(skb, vf); +nla_put_vfinfo_failure: + nla_nest_cancel(skb, vfinfo); + return -EMSGSIZE; } static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) @@ -1446,6 +1470,7 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = { [IFLA_VF_MAC] = { .len = sizeof(struct ifla_vf_mac) }, [IFLA_VF_VLAN] = { .len = sizeof(struct ifla_vf_vlan) }, + [IFLA_VF_VLAN_LIST] = { .type = NLA_NESTED }, [IFLA_VF_TX_RATE] = { .len = sizeof(struct ifla_vf_tx_rate) }, [IFLA_VF_SPOOFCHK] = { .len = sizeof(struct ifla_vf_spoofchk) }, [IFLA_VF_RATE] = { .len = sizeof(struct ifla_vf_rate) }, @@ -1702,7 +1727,37 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) err = -EOPNOTSUPP; if (ops->ndo_set_vf_vlan) err = ops->ndo_set_vf_vlan(dev, ivv->vf, ivv->vlan, - ivv->qos); + ivv->qos, + htons(ETH_P_8021Q)); + if (err < 0) + return err; + } + + if (tb[IFLA_VF_VLAN_LIST]) { + struct ifla_vf_vlan_info *ivvl[MAX_VLAN_LIST_LEN]; + struct nlattr *attr; + int rem, len = 0; + + err = -EOPNOTSUPP; + if (!ops->ndo_set_vf_vlan) + return err; + + nla_for_each_nested(attr, tb[IFLA_VF_VLAN_LIST], rem) { + if (nla_type(attr) != IFLA_VF_VLAN_INFO || + nla_len(attr) < NLA_HDRLEN) { + return -EINVAL; + } + if (len >= MAX_VLAN_LIST_LEN) + return -EOPNOTSUPP; + ivvl[len] = nla_data(attr); + + len++; + } + if (len == 0) + return -EINVAL; + + err = ops->ndo_set_vf_vlan(dev, ivvl[0]->vf, ivvl[0]->vlan, + ivvl[0]->qos, ivvl[0]->vlan_proto); if (err < 0) return err; } @@ -3066,7 +3121,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { - if (*idx < cb->args[0]) + if (*idx < cb->args[2]) goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, 0, @@ -3093,19 +3148,18 @@ int ndo_dflt_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, struct net_device *filter_dev, - int idx) + int *idx) { int err; netif_addr_lock_bh(dev); - err = nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->uc); + err = nlmsg_populate_fdb(skb, cb, dev, idx, &dev->uc); if (err) goto out; - nlmsg_populate_fdb(skb, cb, dev, &idx, &dev->mc); + nlmsg_populate_fdb(skb, cb, dev, idx, &dev->mc); out: netif_addr_unlock_bh(dev); - cb->args[1] = err; - return idx; + return err; } EXPORT_SYMBOL(ndo_dflt_fdb_dump); @@ -3118,9 +3172,13 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) const struct net_device_ops *cops = NULL; struct ifinfomsg *ifm = nlmsg_data(cb->nlh); struct net *net = sock_net(skb->sk); + struct hlist_head *head; int brport_idx = 0; int br_idx = 0; - int idx = 0; + int h, s_h; + int idx = 0, s_idx; + int err = 0; + int fidx = 0; if (nlmsg_parse(cb->nlh, sizeof(struct ifinfomsg), tb, IFLA_MAX, ifla_policy) == 0) { @@ -3138,49 +3196,71 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb) ops = br_dev->netdev_ops; } - cb->args[1] = 0; - for_each_netdev(net, dev) { - if (brport_idx && (dev->ifindex != brport_idx)) - continue; + s_h = cb->args[0]; + s_idx = cb->args[1]; - if (!br_idx) { /* user did not specify a specific bridge */ - if (dev->priv_flags & IFF_BRIDGE_PORT) { - br_dev = netdev_master_upper_dev_get(dev); - cops = br_dev->netdev_ops; + for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { + idx = 0; + head = &net->dev_index_head[h]; + hlist_for_each_entry(dev, head, index_hlist) { + + if (brport_idx && (dev->ifindex != brport_idx)) + continue; + + if (!br_idx) { /* user did not specify a specific bridge */ + if (dev->priv_flags & IFF_BRIDGE_PORT) { + br_dev = netdev_master_upper_dev_get(dev); + cops = br_dev->netdev_ops; + } + } else { + if (dev != br_dev && + !(dev->priv_flags & IFF_BRIDGE_PORT)) + continue; + + if (br_dev != netdev_master_upper_dev_get(dev) && + !(dev->priv_flags & IFF_EBRIDGE)) + continue; + cops = ops; } - } else { - if (dev != br_dev && - !(dev->priv_flags & IFF_BRIDGE_PORT)) - continue; + if (idx < s_idx) + goto cont; - if (br_dev != netdev_master_upper_dev_get(dev) && - !(dev->priv_flags & IFF_EBRIDGE)) - continue; + if (dev->priv_flags & IFF_BRIDGE_PORT) { + if (cops && cops->ndo_fdb_dump) { + err = cops->ndo_fdb_dump(skb, cb, + br_dev, dev, + &fidx); + if (err == -EMSGSIZE) + goto out; + } + } - cops = ops; + if (dev->netdev_ops->ndo_fdb_dump) + err = dev->netdev_ops->ndo_fdb_dump(skb, cb, + dev, NULL, + &fidx); + else + err = ndo_dflt_fdb_dump(skb, cb, dev, NULL, + &fidx); + if (err == -EMSGSIZE) + goto out; + + cops = NULL; + + /* reset fdb offset to 0 for rest of the interfaces */ + cb->args[2] = 0; + fidx = 0; +cont: + idx++; } - - if (dev->priv_flags & IFF_BRIDGE_PORT) { - if (cops && cops->ndo_fdb_dump) - idx = cops->ndo_fdb_dump(skb, cb, br_dev, dev, - idx); - } - if (cb->args[1] == -EMSGSIZE) - break; - - if (dev->netdev_ops->ndo_fdb_dump) - idx = dev->netdev_ops->ndo_fdb_dump(skb, cb, dev, NULL, - idx); - else - idx = ndo_dflt_fdb_dump(skb, cb, dev, NULL, idx); - if (cb->args[1] == -EMSGSIZE) - break; - - cops = NULL; } - cb->args[0] = idx; +out: + cb->args[0] = h; + cb->args[1] = idx; + cb->args[2] = fidx; + return skb->len; } @@ -3550,6 +3630,91 @@ static bool stats_attr_valid(unsigned int mask, int attrid, int idxattr) (!idxattr || idxattr == attrid); } +#define IFLA_OFFLOAD_XSTATS_FIRST (IFLA_OFFLOAD_XSTATS_UNSPEC + 1) +static int rtnl_get_offload_stats_attr_size(int attr_id) +{ + switch (attr_id) { + case IFLA_OFFLOAD_XSTATS_CPU_HIT: + return sizeof(struct rtnl_link_stats64); + } + + return 0; +} + +static int rtnl_get_offload_stats(struct sk_buff *skb, struct net_device *dev, + int *prividx) +{ + struct nlattr *attr = NULL; + int attr_id, size; + void *attr_data; + int err; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return -ENODATA; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (attr_id < *prividx) + continue; + + size = rtnl_get_offload_stats_attr_size(attr_id); + if (!size) + continue; + + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + + attr = nla_reserve_64bit(skb, attr_id, size, + IFLA_OFFLOAD_XSTATS_UNSPEC); + if (!attr) + goto nla_put_failure; + + attr_data = nla_data(attr); + memset(attr_data, 0, size); + err = dev->netdev_ops->ndo_get_offload_stats(attr_id, dev, + attr_data); + if (err) + goto get_offload_stats_failure; + } + + if (!attr) + return -ENODATA; + + *prividx = 0; + return 0; + +nla_put_failure: + err = -EMSGSIZE; +get_offload_stats_failure: + *prividx = attr_id; + return err; +} + +static int rtnl_get_offload_stats_size(const struct net_device *dev) +{ + int nla_size = 0; + int attr_id; + int size; + + if (!(dev->netdev_ops && dev->netdev_ops->ndo_has_offload_stats && + dev->netdev_ops->ndo_get_offload_stats)) + return 0; + + for (attr_id = IFLA_OFFLOAD_XSTATS_FIRST; + attr_id <= IFLA_OFFLOAD_XSTATS_MAX; attr_id++) { + if (!dev->netdev_ops->ndo_has_offload_stats(attr_id)) + continue; + size = rtnl_get_offload_stats_attr_size(attr_id); + nla_size += nla_total_size_64bit(size); + } + + if (nla_size != 0) + nla_size += nla_total_size(0); + + return nla_size; +} + static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, unsigned int filter_mask, @@ -3559,6 +3724,7 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, struct nlmsghdr *nlh; struct nlattr *attr; int s_prividx = *prividx; + int err; ASSERT_RTNL(); @@ -3587,8 +3753,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, const struct rtnl_link_ops *ops = dev->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS); @@ -3612,8 +3776,6 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (master) ops = master->rtnl_link_ops; if (ops && ops->fill_linkxstats) { - int err; - *idxattr = IFLA_STATS_LINK_XSTATS_SLAVE; attr = nla_nest_start(skb, IFLA_STATS_LINK_XSTATS_SLAVE); @@ -3628,6 +3790,24 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, + *idxattr)) { + *idxattr = IFLA_STATS_LINK_OFFLOAD_XSTATS; + attr = nla_nest_start(skb, IFLA_STATS_LINK_OFFLOAD_XSTATS); + if (!attr) + goto nla_put_failure; + + err = rtnl_get_offload_stats(skb, dev, prividx); + if (err == -ENODATA) + nla_nest_cancel(skb, attr); + else + nla_nest_end(skb, attr); + + if (err && err != -ENODATA) + goto nla_put_failure; + *idxattr = 0; + } + nlmsg_end(skb, nlh); return 0; @@ -3642,10 +3822,6 @@ nla_put_failure: return -EMSGSIZE; } -static const struct nla_policy ifla_stats_policy[IFLA_STATS_MAX + 1] = { - [IFLA_STATS_LINK_64] = { .len = sizeof(struct rtnl_link_stats64) }, -}; - static size_t if_nlmsg_stats_size(const struct net_device *dev, u32 filter_mask) { @@ -3685,6 +3861,9 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, } } + if (stats_attr_valid(filter_mask, IFLA_STATS_LINK_OFFLOAD_XSTATS, 0)) + size += rtnl_get_offload_stats_size(dev); + return size; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3864b4b68fa1..cbd19d250947 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -2444,6 +2444,25 @@ void skb_queue_purge(struct sk_buff_head *list) } EXPORT_SYMBOL(skb_queue_purge); +/** + * skb_rbtree_purge - empty a skb rbtree + * @root: root of the rbtree to empty + * + * Delete all buffers on an &sk_buff rbtree. Each buffer is removed from + * the list and one reference dropped. This function does not take + * any lock. Synchronization should be handled by the caller (e.g., TCP + * out-of-order queue is protected by the socket lock). + */ +void skb_rbtree_purge(struct rb_root *root) +{ + struct sk_buff *skb, *next; + + rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) + kfree_skb(skb); + + *root = RB_ROOT; +} + /** * skb_queue_head - queue a buffer at the list head * @list: list to use @@ -3078,11 +3097,31 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, sg = !!(features & NETIF_F_SG); csum = !!can_checksum_protocol(features, proto); - /* GSO partial only requires that we trim off any excess that - * doesn't fit into an MSS sized block, so take care of that - * now. - */ - if (sg && csum && (features & NETIF_F_GSO_PARTIAL)) { + if (sg && csum && (mss != GSO_BY_FRAGS)) { + if (!(features & NETIF_F_GSO_PARTIAL)) { + struct sk_buff *iter; + + if (!list_skb || + !net_gso_ok(features, skb_shinfo(head_skb)->gso_type)) + goto normal; + + /* Split the buffer at the frag_list pointer. + * This is based on the assumption that all + * buffers in the chain excluding the last + * containing the same amount of data. + */ + skb_walk_frags(head_skb, iter) { + if (skb_headlen(iter)) + goto normal; + + len -= iter->len; + } + } + + /* GSO partial only requires that we trim off any excess that + * doesn't fit into an MSS sized block, so take care of that + * now. + */ partial_segs = len / mss; if (partial_segs > 1) mss *= partial_segs; @@ -3090,6 +3129,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb, partial_segs = 0; } +normal: headroom = skb_headroom(head_skb); pos = skb_headlen(head_skb); @@ -3281,21 +3321,29 @@ perform_csum_check: */ segs->prev = tail; - /* Update GSO info on first skb in partial sequence. */ if (partial_segs) { + struct sk_buff *iter; int type = skb_shinfo(head_skb)->gso_type; + unsigned short gso_size = skb_shinfo(head_skb)->gso_size; /* Update type to add partial and then remove dodgy if set */ - type |= SKB_GSO_PARTIAL; + type |= (features & NETIF_F_GSO_PARTIAL) / NETIF_F_GSO_PARTIAL * SKB_GSO_PARTIAL; type &= ~SKB_GSO_DODGY; /* Update GSO info and prepare to start updating headers on * our way back down the stack of protocols. */ - skb_shinfo(segs)->gso_size = skb_shinfo(head_skb)->gso_size; - skb_shinfo(segs)->gso_segs = partial_segs; - skb_shinfo(segs)->gso_type = type; - SKB_GSO_CB(segs)->data_offset = skb_headroom(segs) + doffset; + for (iter = segs; iter; iter = iter->next) { + skb_shinfo(iter)->gso_size = gso_size; + skb_shinfo(iter)->gso_segs = partial_segs; + skb_shinfo(iter)->gso_type = type; + SKB_GSO_CB(iter)->data_offset = skb_headroom(iter) + doffset; + } + + if (tail->len - doffset <= gso_size) + skb_shinfo(tail)->gso_size = 0; + else if (tail != segs) + skb_shinfo(tail)->gso_segs = DIV_ROUND_UP(tail->len - doffset, gso_size); } /* Following permits correct backpressure, for protocols @@ -4474,17 +4522,24 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len) } EXPORT_SYMBOL(skb_ensure_writable); -/* remove VLAN header from packet and update csum accordingly. */ -static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) +/* remove VLAN header from packet and update csum accordingly. + * expects a non skb_vlan_tag_present skb with a vlan tag payload + */ +int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) { struct vlan_hdr *vhdr; - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "__skb_vlan_pop got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = skb_ensure_writable(skb, VLAN_ETH_HLEN); if (unlikely(err)) - goto pull; + return err; skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); @@ -4501,12 +4556,14 @@ static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci) skb_set_network_header(skb, ETH_HLEN); skb_reset_mac_len(skb); -pull: - __skb_pull(skb, offset); return err; } +EXPORT_SYMBOL(__skb_vlan_pop); +/* Pop a vlan tag either from hwaccel or from payload. + * Expects skb->data at mac header. + */ int skb_vlan_pop(struct sk_buff *skb) { u16 vlan_tci; @@ -4516,9 +4573,7 @@ int skb_vlan_pop(struct sk_buff *skb) if (likely(skb_vlan_tag_present(skb))) { skb->vlan_tci = 0; } else { - if (unlikely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (unlikely(!eth_type_vlan(skb->protocol))) return 0; err = __skb_vlan_pop(skb, &vlan_tci); @@ -4526,9 +4581,7 @@ int skb_vlan_pop(struct sk_buff *skb) return err; } /* move next vlan tag to hw accel tag */ - if (likely((skb->protocol != htons(ETH_P_8021Q) && - skb->protocol != htons(ETH_P_8021AD)) || - skb->len < VLAN_ETH_HLEN)) + if (likely(!eth_type_vlan(skb->protocol))) return 0; vlan_proto = skb->protocol; @@ -4541,29 +4594,30 @@ int skb_vlan_pop(struct sk_buff *skb) } EXPORT_SYMBOL(skb_vlan_pop); +/* Push a vlan tag either into hwaccel or into payload (if hwaccel tag present). + * Expects skb->data at mac header. + */ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) { if (skb_vlan_tag_present(skb)) { - unsigned int offset = skb->data - skb_mac_header(skb); + int offset = skb->data - skb_mac_header(skb); int err; - /* __vlan_insert_tag expect skb->data pointing to mac header. - * So change skb->data before calling it and change back to - * original position later - */ - __skb_push(skb, offset); + if (WARN_ONCE(offset, + "skb_vlan_push got skb with skb->data not at mac header (offset %d)\n", + offset)) { + return -EINVAL; + } + err = __vlan_insert_tag(skb, skb->vlan_proto, skb_vlan_tag_get(skb)); - if (err) { - __skb_pull(skb, offset); + if (err) return err; - } skb->protocol = skb->vlan_proto; skb->mac_len += VLAN_HLEN; skb_postpush_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN); - __skb_pull(skb, offset); } __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); return 0; diff --git a/net/core/sock.c b/net/core/sock.c index fd7b41edf1ce..038e660ef844 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1315,24 +1315,6 @@ static void sock_copy(struct sock *nsk, const struct sock *osk) #endif } -void sk_prot_clear_portaddr_nulls(struct sock *sk, int size) -{ - unsigned long nulls1, nulls2; - - nulls1 = offsetof(struct sock, __sk_common.skc_node.next); - nulls2 = offsetof(struct sock, __sk_common.skc_portaddr_node.next); - if (nulls1 > nulls2) - swap(nulls1, nulls2); - - if (nulls1 != 0) - memset((char *)sk, 0, nulls1); - memset((char *)sk + nulls1 + sizeof(void *), 0, - nulls2 - nulls1 - sizeof(void *)); - memset((char *)sk + nulls2 + sizeof(void *), 0, - size - nulls2 - sizeof(void *)); -} -EXPORT_SYMBOL(sk_prot_clear_portaddr_nulls); - static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, int family) { @@ -1344,12 +1326,8 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmem_cache_alloc(slab, priority & ~__GFP_ZERO); if (!sk) return sk; - if (priority & __GFP_ZERO) { - if (prot->clear_sk) - prot->clear_sk(sk, prot->obj_size); - else - sk_prot_clear_nulls(sk, prot->obj_size); - } + if (priority & __GFP_ZERO) + sk_prot_clear_nulls(sk, prot->obj_size); } else sk = kmalloc(prot->obj_size, priority); diff --git a/net/core/stream.c b/net/core/stream.c index 159516a11b7e..1086c8b280a8 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -43,7 +43,6 @@ void sk_stream_write_space(struct sock *sk) rcu_read_unlock(); } } -EXPORT_SYMBOL(sk_stream_write_space); /** * sk_stream_wait_connect - Wait for a socket to get into the connected state diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index ff7736f7ff42..96e47c539bee 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -38,4 +38,7 @@ config NET_DSA_TAG_EDSA config NET_DSA_TAG_TRAILER bool +config NET_DSA_TAG_QCA + bool + endif diff --git a/net/dsa/Makefile b/net/dsa/Makefile index 8af4ded70f1c..a3380ed0e0be 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -7,3 +7,4 @@ dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_TRAILER) += tag_trailer.o +dsa_core-$(CONFIG_NET_DSA_TAG_QCA) += tag_qca.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 7e68bc6bc853..a6902c1e2f28 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -53,6 +53,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #endif #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, +#endif +#ifdef CONFIG_NET_DSA_TAG_QCA + [DSA_TAG_PROTO_QCA] = &qca_netdev_ops, #endif [DSA_TAG_PROTO_NONE] = &none_ops, }; @@ -61,27 +64,27 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { static DEFINE_MUTEX(dsa_switch_drivers_mutex); static LIST_HEAD(dsa_switch_drivers); -void register_switch_driver(struct dsa_switch_driver *drv) +void register_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_add_tail(&drv->list, &dsa_switch_drivers); + list_add_tail(&ops->list, &dsa_switch_drivers); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(register_switch_driver); -void unregister_switch_driver(struct dsa_switch_driver *drv) +void unregister_switch_driver(struct dsa_switch_ops *ops) { mutex_lock(&dsa_switch_drivers_mutex); - list_del_init(&drv->list); + list_del_init(&ops->list); mutex_unlock(&dsa_switch_drivers_mutex); } EXPORT_SYMBOL_GPL(unregister_switch_driver); -static struct dsa_switch_driver * +static struct dsa_switch_ops * dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, const char **_name, void **priv) { - struct dsa_switch_driver *ret; + struct dsa_switch_ops *ret; struct list_head *list; const char *name; @@ -90,13 +93,13 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, mutex_lock(&dsa_switch_drivers_mutex); list_for_each(list, &dsa_switch_drivers) { - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; - drv = list_entry(list, struct dsa_switch_driver, list); + ops = list_entry(list, struct dsa_switch_ops, list); - name = drv->probe(parent, host_dev, sw_addr, priv); + name = ops->probe(parent, host_dev, sw_addr, priv); if (name != NULL) { - ret = drv; + ret = ops; break; } } @@ -117,7 +120,7 @@ static ssize_t temp1_input_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp(ds, &temp); + ret = ds->ops->get_temp(ds, &temp); if (ret < 0) return ret; @@ -131,7 +134,7 @@ static ssize_t temp1_max_show(struct device *dev, struct dsa_switch *ds = dev_get_drvdata(dev); int temp, ret; - ret = ds->drv->get_temp_limit(ds, &temp); + ret = ds->ops->get_temp_limit(ds, &temp); if (ret < 0) return ret; @@ -149,7 +152,7 @@ static ssize_t temp1_max_store(struct device *dev, if (ret < 0) return ret; - ret = ds->drv->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); + ret = ds->ops->set_temp_limit(ds, DIV_ROUND_CLOSEST(temp, 1000)); if (ret < 0) return ret; @@ -164,7 +167,7 @@ static ssize_t temp1_max_alarm_show(struct device *dev, bool alarm; int ret; - ret = ds->drv->get_temp_alarm(ds, &alarm); + ret = ds->ops->get_temp_alarm(ds, &alarm); if (ret < 0) return ret; @@ -184,15 +187,15 @@ static umode_t dsa_hwmon_attrs_visible(struct kobject *kobj, { struct device *dev = container_of(kobj, struct device, kobj); struct dsa_switch *ds = dev_get_drvdata(dev); - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; umode_t mode = attr->mode; if (index == 1) { - if (!drv->get_temp_limit) + if (!ops->get_temp_limit) mode = 0; - else if (!drv->set_temp_limit) + else if (!ops->set_temp_limit) mode &= ~S_IWUSR; - } else if (index == 2 && !drv->get_temp_alarm) { + } else if (index == 2 && !ops->get_temp_alarm) { mode = 0; } return mode; @@ -228,8 +231,8 @@ int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, genphy_config_init(phydev); genphy_read_status(phydev); - if (ds->drv->adjust_link) - ds->drv->adjust_link(ds, port, phydev); + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); } return 0; @@ -303,7 +306,7 @@ void dsa_cpu_port_ethtool_restore(struct dsa_switch *ds) static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) { - struct dsa_switch_driver *drv = ds->drv; + struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; @@ -354,7 +357,10 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * switch. */ if (dst->cpu_switch == index) { - dst->tag_ops = dsa_resolve_tag_protocol(drv->tag_protocol); + enum dsa_tag_protocol tag_protocol; + + tag_protocol = ops->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { ret = PTR_ERR(dst->tag_ops); goto out; @@ -368,15 +374,17 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) /* * Do basic register setup. */ - ret = drv->setup(ds); + ret = ops->setup(ds); if (ret < 0) goto out; - ret = drv->set_addr(ds, dst->master_netdev->dev_addr); - if (ret < 0) - goto out; + if (ops->set_addr) { + ret = ops->set_addr(ds, dst->master_netdev->dev_addr); + if (ret < 0) + goto out; + } - if (!ds->slave_mii_bus && drv->phy_read) { + if (!ds->slave_mii_bus && ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(parent); if (!ds->slave_mii_bus) { ret = -ENOMEM; @@ -423,7 +431,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct device *parent) * register with hardware monitoring subsystem. * Treat registration error as non-fatal and ignore it. */ - if (drv->get_temp) { + if (ops->get_temp) { const char *netname = netdev_name(dst->master_netdev); char hname[IFNAMSIZ + 1]; int i, j; @@ -454,7 +462,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, struct device *parent, struct device *host_dev) { struct dsa_chip_data *cd = dst->pd->chip + index; - struct dsa_switch_driver *drv; + struct dsa_switch_ops *ops; struct dsa_switch *ds; int ret; const char *name; @@ -463,8 +471,8 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Probe for switch model. */ - drv = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); - if (drv == NULL) { + ops = dsa_switch_probe(parent, host_dev, cd->sw_addr, &name, &priv); + if (!ops) { netdev_err(dst->master_netdev, "[%d]: could not detect attached switch\n", index); return ERR_PTR(-EINVAL); @@ -483,7 +491,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->dst = dst; ds->index = index; ds->cd = cd; - ds->drv = drv; + ds->ops = ops; ds->priv = priv; ds->dev = parent; @@ -538,12 +546,12 @@ static void dsa_switch_destroy(struct dsa_switch *ds) ds->dsa_port_mask |= ~(1 << port); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } #ifdef CONFIG_PM_SLEEP -static int dsa_switch_suspend(struct dsa_switch *ds) +int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -557,18 +565,19 @@ static int dsa_switch_suspend(struct dsa_switch *ds) return ret; } - if (ds->drv->suspend) - ret = ds->drv->suspend(ds); + if (ds->ops->suspend) + ret = ds->ops->suspend(ds); return ret; } +EXPORT_SYMBOL_GPL(dsa_switch_suspend); -static int dsa_switch_resume(struct dsa_switch *ds) +int dsa_switch_resume(struct dsa_switch *ds) { int i, ret = 0; - if (ds->drv->resume) - ret = ds->drv->resume(ds); + if (ds->ops->resume) + ret = ds->ops->resume(ds); if (ret) return ret; @@ -585,6 +594,7 @@ static int dsa_switch_resume(struct dsa_switch *ds) return 0; } +EXPORT_SYMBOL_GPL(dsa_switch_resume); #endif /* platform driver init and cleanup *****************************************/ @@ -1086,7 +1096,6 @@ static int dsa_resume(struct device *d) static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); static const struct of_device_id dsa_of_match_table[] = { - { .compatible = "brcm,bcm7445-switch-v4.0" }, { .compatible = "marvell,dsa", }, {} }; diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index f30bad9678f0..f8a7d9aab437 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -294,25 +294,23 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus - * driver and before drv->setup() has run, since the switch drivers and + * driver and before ops->setup() has run, since the switch drivers and * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ ds->phys_mii_mask = ds->enabled_port_mask; - err = ds->drv->setup(ds); + err = ds->ops->setup(ds); if (err < 0) return err; - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; + if (ds->ops->set_addr) { + err = ds->ops->set_addr(ds, dst->master_netdev->dev_addr); + if (err < 0) + return err; + } - err = ds->drv->set_addr(ds, dst->master_netdev->dev_addr); - if (err < 0) - return err; - - if (!ds->slave_mii_bus && ds->drv->phy_read) { + if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; @@ -374,7 +372,7 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) dsa_user_port_unapply(port, index, ds); } - if (ds->slave_mii_bus && ds->drv->phy_read) + if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); } @@ -443,6 +441,7 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, struct dsa_switch_tree *dst, struct dsa_switch *ds) { + enum dsa_tag_protocol tag_protocol; struct net_device *ethernet_dev; struct device_node *ethernet; @@ -465,7 +464,8 @@ static int dsa_cpu_parse(struct device_node *port, u32 index, dst->cpu_port = index; } - dst->tag_ops = dsa_resolve_tag_protocol(ds->drv->tag_protocol); + tag_protocol = ds->ops->get_tag_protocol(ds); + dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); return PTR_ERR(dst->tag_ops); @@ -541,7 +541,7 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) ds->ports[reg].dn = port; - /* Initialize enabled_port_mask now for drv->setup() + /* Initialize enabled_port_mask now for ops->setup() * to have access to a correct value, just like what * net/dsa/dsa.c::dsa_switch_setup_one does. */ diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 00077a9c97f4..6cfd7388834e 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -81,5 +81,7 @@ extern const struct dsa_device_ops trailer_netdev_ops; /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +/* tag_qca.c */ +extern const struct dsa_device_ops qca_netdev_ops; #endif diff --git a/net/dsa/slave.c b/net/dsa/slave.c index fc9196745225..6b1282c006b1 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -28,7 +28,7 @@ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_read(ds, addr, reg); + return ds->ops->phy_read(ds, addr, reg); return 0xffff; } @@ -38,7 +38,7 @@ static int dsa_slave_phy_write(struct mii_bus *bus, int addr, int reg, u16 val) struct dsa_switch *ds = bus->priv; if (ds->phys_mii_mask & (1 << addr)) - return ds->drv->phy_write(ds, addr, reg, val); + return ds->ops->phy_write(ds, addr, reg, val); return 0; } @@ -69,6 +69,30 @@ static inline bool dsa_port_is_bridged(struct dsa_slave_priv *p) return !!p->bridge_dev; } +static void dsa_port_set_stp_state(struct dsa_switch *ds, int port, u8 state) +{ + struct dsa_port *dp = &ds->ports[port]; + + if (ds->ops->port_stp_state_set) + ds->ops->port_stp_state_set(ds, port, state); + + if (ds->ops->port_fast_age) { + /* Fast age FDB entries or flush appropriate forwarding database + * for the given port, if we are moving it from Learning or + * Forwarding state, to Disabled or Blocking or Listening state. + */ + + if ((dp->stp_state == BR_STATE_LEARNING || + dp->stp_state == BR_STATE_FORWARDING) && + (state == BR_STATE_DISABLED || + state == BR_STATE_BLOCKING || + state == BR_STATE_LISTENING)) + ds->ops->port_fast_age(ds, port); + } + + dp->stp_state = state; +} + static int dsa_slave_open(struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); @@ -98,14 +122,13 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->drv->port_enable) { - err = ds->drv->port_enable(ds, p->port, p->phy); + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, p->port, p->phy); if (err) goto clear_promisc; } - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, stp_state); + dsa_port_set_stp_state(ds, p->port, stp_state); if (p->phy) phy_start(p->phy); @@ -144,11 +167,10 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->drv->port_disable) - ds->drv->port_disable(ds, p->port, p->phy); + if (ds->ops->port_disable) + ds->ops->port_disable(ds, p->port, p->phy); - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_DISABLED); + dsa_port_set_stp_state(ds, p->port, BR_STATE_DISABLED); return 0; } @@ -209,13 +231,13 @@ static int dsa_slave_port_vlan_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_vlan_prepare || !ds->drv->port_vlan_add) + if (!ds->ops->port_vlan_prepare || !ds->ops->port_vlan_add) return -EOPNOTSUPP; - return ds->drv->port_vlan_prepare(ds, p->port, vlan, trans); + return ds->ops->port_vlan_prepare(ds, p->port, vlan, trans); } - ds->drv->port_vlan_add(ds, p->port, vlan, trans); + ds->ops->port_vlan_add(ds, p->port, vlan, trans); return 0; } @@ -226,10 +248,10 @@ static int dsa_slave_port_vlan_del(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (!ds->drv->port_vlan_del) + if (!ds->ops->port_vlan_del) return -EOPNOTSUPP; - return ds->drv->port_vlan_del(ds, p->port, vlan); + return ds->ops->port_vlan_del(ds, p->port, vlan); } static int dsa_slave_port_vlan_dump(struct net_device *dev, @@ -239,8 +261,8 @@ static int dsa_slave_port_vlan_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_vlan_dump) - return ds->drv->port_vlan_dump(ds, p->port, vlan, cb); + if (ds->ops->port_vlan_dump) + return ds->ops->port_vlan_dump(ds, p->port, vlan, cb); return -EOPNOTSUPP; } @@ -253,13 +275,13 @@ static int dsa_slave_port_fdb_add(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) { - if (!ds->drv->port_fdb_prepare || !ds->drv->port_fdb_add) + if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) return -EOPNOTSUPP; - return ds->drv->port_fdb_prepare(ds, p->port, fdb, trans); + return ds->ops->port_fdb_prepare(ds, p->port, fdb, trans); } - ds->drv->port_fdb_add(ds, p->port, fdb, trans); + ds->ops->port_fdb_add(ds, p->port, fdb, trans); return 0; } @@ -271,8 +293,8 @@ static int dsa_slave_port_fdb_del(struct net_device *dev, struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->port_fdb_del) - ret = ds->drv->port_fdb_del(ds, p->port, fdb); + if (ds->ops->port_fdb_del) + ret = ds->ops->port_fdb_del(ds, p->port, fdb); return ret; } @@ -284,8 +306,52 @@ static int dsa_slave_port_fdb_dump(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->port_fdb_dump) - return ds->drv->port_fdb_dump(ds, p->port, fdb, cb); + if (ds->ops->port_fdb_dump) + return ds->ops->port_fdb_dump(ds, p->port, fdb, cb); + + return -EOPNOTSUPP; +} + +static int dsa_slave_port_mdb_add(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb, + struct switchdev_trans *trans) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (switchdev_trans_ph_prepare(trans)) { + if (!ds->ops->port_mdb_prepare || !ds->ops->port_mdb_add) + return -EOPNOTSUPP; + + return ds->ops->port_mdb_prepare(ds, p->port, mdb, trans); + } + + ds->ops->port_mdb_add(ds, p->port, mdb, trans); + + return 0; +} + +static int dsa_slave_port_mdb_del(struct net_device *dev, + const struct switchdev_obj_port_mdb *mdb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_del) + return ds->ops->port_mdb_del(ds, p->port, mdb); + + return -EOPNOTSUPP; +} + +static int dsa_slave_port_mdb_dump(struct net_device *dev, + struct switchdev_obj_port_mdb *mdb, + switchdev_obj_dump_cb_t *cb) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + + if (ds->ops->port_mdb_dump) + return ds->ops->port_mdb_dump(ds, p->port, mdb, cb); return -EOPNOTSUPP; } @@ -308,9 +374,9 @@ static int dsa_slave_stp_state_set(struct net_device *dev, struct dsa_switch *ds = p->parent; if (switchdev_trans_ph_prepare(trans)) - return ds->drv->port_stp_state_set ? 0 : -EOPNOTSUPP; + return ds->ops->port_stp_state_set ? 0 : -EOPNOTSUPP; - ds->drv->port_stp_state_set(ds, p->port, attr->u.stp_state); + dsa_port_set_stp_state(ds, p->port, attr->u.stp_state); return 0; } @@ -326,8 +392,8 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, if (switchdev_trans_ph_prepare(trans)) return 0; - if (ds->drv->port_vlan_filtering) - return ds->drv->port_vlan_filtering(ds, p->port, + if (ds->ops->port_vlan_filtering) + return ds->ops->port_vlan_filtering(ds, p->port, attr->u.vlan_filtering); return 0; @@ -365,8 +431,8 @@ static int dsa_slave_ageing_time(struct net_device *dev, ds->ports[p->port].ageing_time = ageing_time; ageing_time = dsa_fastest_ageing_time(ds, ageing_time); - if (ds->drv->set_ageing_time) - return ds->drv->set_ageing_time(ds, ageing_time); + if (ds->ops->set_ageing_time) + return ds->ops->set_ageing_time(ds, ageing_time); return 0; } @@ -412,6 +478,10 @@ static int dsa_slave_port_obj_add(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_add(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_add(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), @@ -435,6 +505,9 @@ static int dsa_slave_port_obj_del(struct net_device *dev, err = dsa_slave_port_fdb_del(dev, SWITCHDEV_OBJ_PORT_FDB(obj)); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_del(dev, SWITCHDEV_OBJ_PORT_MDB(obj)); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_del(dev, SWITCHDEV_OBJ_PORT_VLAN(obj)); @@ -459,6 +532,10 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, SWITCHDEV_OBJ_PORT_FDB(obj), cb); break; + case SWITCHDEV_OBJ_ID_PORT_MDB: + err = dsa_slave_port_mdb_dump(dev, SWITCHDEV_OBJ_PORT_MDB(obj), + cb); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_slave_port_vlan_dump(dev, SWITCHDEV_OBJ_PORT_VLAN(obj), @@ -481,8 +558,8 @@ static int dsa_slave_bridge_port_join(struct net_device *dev, p->bridge_dev = br; - if (ds->drv->port_bridge_join) - ret = ds->drv->port_bridge_join(ds, p->port, br); + if (ds->ops->port_bridge_join) + ret = ds->ops->port_bridge_join(ds, p->port, br); return ret == -EOPNOTSUPP ? 0 : ret; } @@ -493,16 +570,15 @@ static void dsa_slave_bridge_port_leave(struct net_device *dev) struct dsa_switch *ds = p->parent; - if (ds->drv->port_bridge_leave) - ds->drv->port_bridge_leave(ds, p->port); + if (ds->ops->port_bridge_leave) + ds->ops->port_bridge_leave(ds, p->port); p->bridge_dev = NULL; /* Port left the bridge, put in BR_STATE_DISABLED by the bridge layer, * so allow it to be in BR_STATE_FORWARDING to be kept functional */ - if (ds->drv->port_stp_state_set) - ds->drv->port_stp_state_set(ds, p->port, BR_STATE_FORWARDING); + dsa_port_set_stp_state(ds, p->port, BR_STATE_FORWARDING); } static int dsa_slave_port_attr_get(struct net_device *dev, @@ -605,8 +681,8 @@ static int dsa_slave_get_regs_len(struct net_device *dev) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs_len) - return ds->drv->get_regs_len(ds, p->port); + if (ds->ops->get_regs_len) + return ds->ops->get_regs_len(ds, p->port); return -EOPNOTSUPP; } @@ -617,8 +693,8 @@ dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_regs) - ds->drv->get_regs(ds, p->port, regs, _p); + if (ds->ops->get_regs) + ds->ops->get_regs(ds, p->port, regs, _p); } static int dsa_slave_nway_reset(struct net_device *dev) @@ -651,8 +727,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; - if (ds->drv->get_eeprom_len) - return ds->drv->get_eeprom_len(ds); + if (ds->ops->get_eeprom_len) + return ds->ops->get_eeprom_len(ds); return 0; } @@ -663,8 +739,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_eeprom) - return ds->drv->get_eeprom(ds, eeprom, data); + if (ds->ops->get_eeprom) + return ds->ops->get_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -675,8 +751,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->set_eeprom) - return ds->drv->set_eeprom(ds, eeprom, data); + if (ds->ops->set_eeprom) + return ds->ops->set_eeprom(ds, eeprom, data); return -EOPNOTSUPP; } @@ -694,8 +770,8 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + len, "tx_bytes", len); strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); - if (ds->drv->get_strings != NULL) - ds->drv->get_strings(ds, p->port, data + 4 * len); + if (ds->ops->get_strings) + ds->ops->get_strings(ds, p->port, data + 4 * len); } } @@ -714,8 +790,8 @@ static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, dst->master_ethtool_ops.get_ethtool_stats(dev, stats, data); } - if (ds->drv->get_ethtool_stats) - ds->drv->get_ethtool_stats(ds, cpu_port, data + count); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, cpu_port, data + count); } static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) @@ -727,8 +803,8 @@ static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) if (dst->master_ethtool_ops.get_sset_count) count += dst->master_ethtool_ops.get_sset_count(dev, sset); - if (sset == ETH_SS_STATS && ds->drv->get_sset_count) - count += ds->drv->get_sset_count(ds); + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -755,14 +831,14 @@ static void dsa_cpu_port_get_strings(struct net_device *dev, dst->master_ethtool_ops.get_strings(dev, stringset, data); } - if (stringset == ETH_SS_STATS && ds->drv->get_strings) { + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { ndata = data + mcount * len; /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle * the output after to prepend our CPU port prefix we * constructed earlier */ - ds->drv->get_strings(ds, cpu_port, ndata); - count = ds->drv->get_sset_count(ds); + ds->ops->get_strings(ds, cpu_port, ndata); + count = ds->ops->get_sset_count(ds); for (i = 0; i < count; i++) { memmove(ndata + (i * len + sizeof(pfx)), ndata + i * len, len - sizeof(pfx)); @@ -782,8 +858,8 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, data[1] = dev->stats.tx_bytes; data[2] = dev->stats.rx_packets; data[3] = dev->stats.rx_bytes; - if (ds->drv->get_ethtool_stats != NULL) - ds->drv->get_ethtool_stats(ds, p->port, data + 4); + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, p->port, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) @@ -795,8 +871,8 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) int count; count = 4; - if (ds->drv->get_sset_count != NULL) - count += ds->drv->get_sset_count(ds); + if (ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); return count; } @@ -809,8 +885,8 @@ static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_switch *ds = p->parent; - if (ds->drv->get_wol) - ds->drv->get_wol(ds, p->port, w); + if (ds->ops->get_wol) + ds->ops->get_wol(ds, p->port, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) @@ -819,8 +895,8 @@ static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) struct dsa_switch *ds = p->parent; int ret = -EOPNOTSUPP; - if (ds->drv->set_wol) - ret = ds->drv->set_wol(ds, p->port, w); + if (ds->ops->set_wol) + ret = ds->ops->set_wol(ds, p->port, w); return ret; } @@ -831,10 +907,10 @@ static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->set_eee) + if (!ds->ops->set_eee) return -EOPNOTSUPP; - ret = ds->drv->set_eee(ds, p->port, p->phy, e); + ret = ds->ops->set_eee(ds, p->port, p->phy, e); if (ret) return ret; @@ -850,10 +926,10 @@ static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) struct dsa_switch *ds = p->parent; int ret; - if (!ds->drv->get_eee) + if (!ds->ops->get_eee) return -EOPNOTSUPP; - ret = ds->drv->get_eee(ds, p->port, e); + ret = ds->ops->get_eee(ds, p->port, e); if (ret) return ret; @@ -988,8 +1064,8 @@ static void dsa_slave_adjust_link(struct net_device *dev) p->old_pause = p->phy->pause; } - if (ds->drv->adjust_link && status_changed) - ds->drv->adjust_link(ds, p->port, p->phy); + if (ds->ops->adjust_link && status_changed) + ds->ops->adjust_link(ds, p->port, p->phy); if (status_changed) phy_print_status(p->phy); @@ -1004,8 +1080,8 @@ static int dsa_slave_fixed_link_update(struct net_device *dev, if (dev) { p = netdev_priv(dev); ds = p->parent; - if (ds->drv->fixed_link_update) - ds->drv->fixed_link_update(ds, p->port, status); + if (ds->ops->fixed_link_update) + ds->ops->fixed_link_update(ds, p->port, status); } return 0; @@ -1062,8 +1138,8 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, phy_dn = port_dn; } - if (ds->drv->get_phy_flags) - phy_flags = ds->drv->get_phy_flags(ds, p->port); + if (ds->ops->get_phy_flags) + phy_flags = ds->ops->get_phy_flags(ds, p->port); if (phy_dn) { int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c new file mode 100644 index 000000000000..0c90cacee7aa --- /dev/null +++ b/net/dsa/tag_qca.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include +#include "dsa_priv.h" + +#define QCA_HDR_LEN 2 +#define QCA_HDR_VERSION 0x2 + +#define QCA_HDR_RECV_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_RECV_VERSION_S 14 +#define QCA_HDR_RECV_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_RECV_PRIORITY_S 11 +#define QCA_HDR_RECV_TYPE_MASK GENMASK(10, 6) +#define QCA_HDR_RECV_TYPE_S 6 +#define QCA_HDR_RECV_FRAME_IS_TAGGED BIT(3) +#define QCA_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) + +#define QCA_HDR_XMIT_VERSION_MASK GENMASK(15, 14) +#define QCA_HDR_XMIT_VERSION_S 14 +#define QCA_HDR_XMIT_PRIORITY_MASK GENMASK(13, 11) +#define QCA_HDR_XMIT_PRIORITY_S 11 +#define QCA_HDR_XMIT_CONTROL_MASK GENMASK(10, 8) +#define QCA_HDR_XMIT_CONTROL_S 8 +#define QCA_HDR_XMIT_FROM_CPU BIT(7) +#define QCA_HDR_XMIT_DP_BIT_MASK GENMASK(6, 0) + +static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + u16 *phdr, hdr; + + dev->stats.tx_packets++; + dev->stats.tx_bytes += skb->len; + + if (skb_cow_head(skb, 0) < 0) + goto out_free; + + skb_push(skb, QCA_HDR_LEN); + + memmove(skb->data, skb->data + QCA_HDR_LEN, 2 * ETH_ALEN); + phdr = (u16 *)(skb->data + 2 * ETH_ALEN); + + /* Set the version field, and set destination port information */ + hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | + QCA_HDR_XMIT_FROM_CPU | + BIT(p->port); + + *phdr = htons(hdr); + + return skb; + +out_free: + kfree_skb(skb); + return NULL; +} + +static int qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_switch *ds; + u8 ver; + int port; + __be16 *phdr, hdr; + + if (unlikely(!dst)) + goto out_drop; + + skb = skb_unshare(skb, GFP_ATOMIC); + if (!skb) + goto out; + + if (unlikely(!pskb_may_pull(skb, QCA_HDR_LEN))) + goto out_drop; + + /* The QCA header is added by the switch between src addr and Ethertype + * At this point, skb->data points to ethertype so header should be + * right before + */ + phdr = (__be16 *)(skb->data - 2); + hdr = ntohs(*phdr); + + /* Make sure the version is correct */ + ver = (hdr & QCA_HDR_RECV_VERSION_MASK) >> QCA_HDR_RECV_VERSION_S; + if (unlikely(ver != QCA_HDR_VERSION)) + goto out_drop; + + /* Remove QCA tag and recalculate checksum */ + skb_pull_rcsum(skb, QCA_HDR_LEN); + memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, + ETH_HLEN - QCA_HDR_LEN); + + /* This protocol doesn't support cascading multiple switches so it's + * safe to assume the switch is first in the tree + */ + ds = dst->ds[0]; + if (!ds) + goto out_drop; + + /* Get source port information */ + port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); + if (!ds->ports[port].netdev) + goto out_drop; + + /* Update skb & forward the frame accordingly */ + skb_push(skb, ETH_HLEN); + skb->pkt_type = PACKET_HOST; + skb->dev = ds->ports[port].netdev; + skb->protocol = eth_type_trans(skb, skb->dev); + + skb->dev->stats.rx_packets++; + skb->dev->stats.rx_bytes += skb->len; + + netif_receive_skb(skb); + + return 0; + +out_drop: + kfree_skb(skb); +out: + return 0; +} + +const struct dsa_device_ops qca_netdev_ops = { + .xmit = qca_tag_xmit, + .rcv = qca_tag_rcv, +}; diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 50d6a9b49f6c..300b06888fdf 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -640,6 +640,21 @@ config TCP_CONG_CDG D.A. Hayes and G. Armitage. "Revisiting TCP congestion control using delay gradients." In Networking 2011. Preprint: http://goo.gl/No3vdg +config TCP_CONG_BBR + tristate "BBR TCP" + default n + ---help--- + + BBR (Bottleneck Bandwidth and RTT) TCP congestion control aims to + maximize network utilization and minimize queues. It builds an explicit + model of the the bottleneck delivery rate and path round-trip + propagation delay. It tolerates packet loss and delay unrelated to + congestion. It can operate over LAN, WAN, cellular, wifi, or cable + modem links. It can coexist with flows that use loss-based congestion + control, and can operate with shallow buffers, deep buffers, + bufferbloat, policers, or AQM schemes that do not provide a delay + signal. It requires the fq ("Fair Queue") pacing packet scheduler. + choice prompt "Default TCP congestion control" default DEFAULT_CUBIC @@ -674,6 +689,9 @@ choice config DEFAULT_CDG bool "CDG" if TCP_CONG_CDG=y + config DEFAULT_BBR + bool "BBR" if TCP_CONG_BBR=y + config DEFAULT_RENO bool "Reno" endchoice diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 24629b6f57cc..bc6a6c8b9bcd 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - tcp_recovery.o \ + tcp_rate.o tcp_recovery.o \ tcp_offload.o datagram.o raw.o udp.o udplite.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ @@ -41,6 +41,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_INET_UDP_DIAG) += udp_diag.o obj-$(CONFIG_NET_TCPPROBE) += tcp_probe.o +obj-$(CONFIG_TCP_CONG_BBR) += tcp_bbr.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o obj-$(CONFIG_TCP_CONG_CDG) += tcp_cdg.o obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 55513e654d79..1effc986739e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -211,24 +211,19 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { - /* Check special setups for testing purpose to enable TFO w/o - * requiring TCP_FASTOPEN sockopt. + /* Enable TFO w/o requiring TCP_FASTOPEN socket option. * Note that only TCP sockets (SOCK_STREAM) will reach here. - * Also fastopenq may already been allocated because this - * socket was in TCP_LISTEN state previously but was - * shutdown() (rather than close()). + * Also fastopen backlog may already been set via the option + * because the socket was in TCP_LISTEN state previously but + * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) - fastopen_queue_tune(sk, backlog); - else if ((sysctl_tcp_fastopen & - TFO_SERVER_WO_SOCKOPT2) != 0) - fastopen_queue_tune(sk, - ((uint)sysctl_tcp_fastopen) >> 16); - + fastopen_queue_tune(sk, backlog); tcp_fastopen_init_key_once(true); } + err = inet_csk_listen_start(sk, backlog); if (err) goto out; @@ -921,6 +916,8 @@ const struct proto_ops inet_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, @@ -1195,7 +1192,7 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, encap; + bool udpfrag = false, fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; unsigned int offset = 0; @@ -1248,6 +1245,8 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, if (IS_ERR_OR_NULL(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); @@ -1262,9 +1261,13 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; } - tot_len = skb_shinfo(skb)->gso_size + - SKB_GSO_CB(skb)->data_offset + - skb->head - (unsigned char *)iph; + + if (gso_partial) + tot_len = skb_shinfo(skb)->gso_size + + SKB_GSO_CB(skb)->data_offset + + skb->head - (unsigned char *)iph; + else + tot_len = skb->len - nhoff; } else { if (!fixedid) iph->id = htons(id++); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 1b25daf8c7f1..c3b80478226e 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -93,9 +93,6 @@ struct fib_table *fib_new_table(struct net *net, u32 id) return NULL; switch (id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, tb); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, tb); break; @@ -137,9 +134,6 @@ static void fib_replace_table(struct net *net, struct fib_table *old, { #ifdef CONFIG_IP_MULTIPLE_TABLES switch (new->tb_id) { - case RT_TABLE_LOCAL: - rcu_assign_pointer(net->ipv4.fib_local, new); - break; case RT_TABLE_MAIN: rcu_assign_pointer(net->ipv4.fib_main, new); break; @@ -188,26 +182,13 @@ static void fib_flush(struct net *net) struct fib_table *tb; hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) - flushed += fib_table_flush(tb); + flushed += fib_table_flush(net, tb); } if (flushed) rt_cache_flush(net); } -void fib_flush_external(struct net *net) -{ - struct fib_table *tb; - struct hlist_head *head; - unsigned int h; - - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - head = &net->ipv4.fib_table_hash[h]; - hlist_for_each_entry(tb, head, tb_hlist) - fib_table_flush_external(tb); - } -} - /* * Find address type as if only "dev" was present in the system. If * on_dev is NULL then all interfaces are taken into consideration. @@ -596,13 +577,13 @@ int ip_rt_ioctl(struct net *net, unsigned int cmd, void __user *arg) if (cmd == SIOCDELRT) { tb = fib_get_table(net, cfg.fc_table); if (tb) - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); else err = -ESRCH; } else { tb = fib_new_table(net, cfg.fc_table); if (tb) - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); else err = -ENOBUFS; } @@ -725,7 +706,7 @@ static int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_delete(tb, &cfg); + err = fib_table_delete(net, tb, &cfg); errout: return err; } @@ -747,7 +728,7 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh) goto errout; } - err = fib_table_insert(tb, &cfg); + err = fib_table_insert(net, tb, &cfg); errout: return err; } @@ -834,9 +815,9 @@ static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifad cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - fib_table_insert(tb, &cfg); + fib_table_insert(net, tb, &cfg); else - fib_table_delete(tb, &cfg); + fib_table_delete(net, tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -1250,7 +1231,6 @@ static void ip_fib_net_exit(struct net *net) rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES - RCU_INIT_POINTER(net->ipv4.fib_local, NULL); RCU_INIT_POINTER(net->ipv4.fib_main, NULL); RCU_INIT_POINTER(net->ipv4.fib_default, NULL); #endif @@ -1261,7 +1241,7 @@ static void ip_fib_net_exit(struct net *net) hlist_for_each_entry_safe(tb, tmp, head, tb_hlist) { hlist_del(&tb->tb_hlist); - fib_table_flush(tb); + fib_table_flush(net, tb); fib_free_table(tb); } } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 6e9ea69e5f75..2e50062f642d 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -56,6 +56,9 @@ int __fib_lookup(struct net *net, struct flowi4 *flp, }; int err; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi4_to_flowi(flp)); + err = fib_rules_lookup(net->ipv4.rules_ops, flowi4_to_flowi(flp), 0, &arg); #ifdef CONFIG_IP_ROUTE_CLASSID if (arg.rule) @@ -161,6 +164,14 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type) +{ + struct fib_notifier_info info; + + return call_fib_notifiers(net, event_type, &info); +} + static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -217,7 +228,7 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD); err = 0; errout: @@ -239,7 +250,7 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - fib_flush_external(rule->fr_net); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL); errout: return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index e9f56225e53f..388d3e21629b 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1580,7 +1580,8 @@ static bool fib_good_nh(const struct fib_nh *nh) rcu_read_lock_bh(); - n = __ipv4_neigh_lookup_noref(nh->nh_dev, nh->nh_gw); + n = __ipv4_neigh_lookup_noref(nh->nh_dev, + (__force u32)nh->nh_gw); if (n) state = n->nud_state; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index e2ffc2a5c7db..31cef3602585 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -80,10 +81,47 @@ #include #include #include -#include #include #include "fib_lookup.h" +static BLOCKING_NOTIFIER_HEAD(fib_chain); + +int register_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_register(&fib_chain, nb); +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return blocking_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return blocking_notifier_call_chain(&fib_chain, event_type, info); +} + +static int call_fib_entry_notifiers(struct net *net, + enum fib_event_type event_type, u32 dst, + int dst_len, struct fib_info *fi, + u8 tos, u8 type, u32 tb_id, u32 nlflags) +{ + struct fib_entry_notifier_info info = { + .dst = dst, + .dst_len = dst_len, + .fi = fi, + .tos = tos, + .type = type, + .tb_id = tb_id, + .nlflags = nlflags, + }; + return call_fib_notifiers(net, event_type, &info.info); +} + #define MAX_STAT_DEPTH 32 #define KEYLENGTH (8*sizeof(t_key)) @@ -1076,12 +1114,13 @@ static int fib_insert_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) +int fib_table_insert(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *)tb->tb_data; struct fib_alias *fa, *new_fa; struct key_vector *l, *tp; - unsigned int nlflags = 0; + u16 nlflags = NLM_F_EXCL; struct fib_info *fi; u8 plen = cfg->fc_dst_len; u8 slen = KEYLENGTH - plen; @@ -1126,6 +1165,8 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (cfg->fc_nlflags & NLM_F_EXCL) goto out; + nlflags &= ~NLM_F_EXCL; + /* We have 2 goals: * 1. Find exact match for type, scope, fib_info to avoid * duplicate routes @@ -1151,6 +1192,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) struct fib_info *fi_drop; u8 state; + nlflags |= NLM_F_REPLACE; fa = fa_first; if (fa_match) { if (fa == fa_match) @@ -1172,17 +1214,6 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - err = switchdev_fib_ipv4_add(key, plen, fi, - new_fa->fa_tos, - cfg->fc_type, - cfg->fc_nlflags, - tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - kmem_cache_free(fn_alias_kmem, new_fa); - goto out; - } - hlist_replace_rcu(&fa->fa_list, &new_fa->fa_list); alias_free_mem_rcu(fa); @@ -1190,8 +1221,13 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) fib_release_info(fi_drop); if (state & FA_S_ACCESSED) rt_cache_flush(cfg->fc_nlinfo.nl_net); + + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, + key, plen, fi, + new_fa->fa_tos, cfg->fc_type, + tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, - tb->tb_id, &cfg->fc_nlinfo, NLM_F_REPLACE); + tb->tb_id, &cfg->fc_nlinfo, nlflags); goto succeeded; } @@ -1203,7 +1239,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) goto out; if (cfg->fc_nlflags & NLM_F_APPEND) - nlflags = NLM_F_APPEND; + nlflags |= NLM_F_APPEND; else fa = fa_first; } @@ -1211,6 +1247,7 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; + nlflags |= NLM_F_CREATE; err = -ENOBUFS; new_fa = kmem_cache_alloc(fn_alias_kmem, GFP_KERNEL); if (!new_fa) @@ -1224,30 +1261,22 @@ int fib_table_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->tb_id = tb->tb_id; new_fa->fa_default = -1; - /* (Optionally) offload fib entry to switch hardware. */ - err = switchdev_fib_ipv4_add(key, plen, fi, tos, cfg->fc_type, - cfg->fc_nlflags, tb->tb_id); - if (err) { - switchdev_fib_ipv4_abort(fi); - goto out_free_new_fa; - } - /* Insert new entry to the list. */ err = fib_insert_alias(t, tp, l, new_fa, fa, key); if (err) - goto out_sw_fib_del; + goto out_free_new_fa; if (!plen) tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, key, plen, fi, tos, + cfg->fc_type, tb->tb_id, cfg->fc_nlflags); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: return 0; -out_sw_fib_del: - switchdev_fib_ipv4_del(key, plen, fi, tos, cfg->fc_type, tb->tb_id); out_free_new_fa: kmem_cache_free(fn_alias_kmem, new_fa); out: @@ -1486,7 +1515,8 @@ static void fib_remove_alias(struct trie *t, struct key_vector *tp, } /* Caller must hold RTNL. */ -int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) +int fib_table_delete(struct net *net, struct fib_table *tb, + struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *fa_to_delete; @@ -1539,9 +1569,9 @@ int fib_table_delete(struct fib_table *tb, struct fib_config *cfg) if (!fa_to_delete) return -ESRCH; - switchdev_fib_ipv4_del(key, plen, fa_to_delete->fa_info, tos, - cfg->fc_type, tb->tb_id); - + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, + fa_to_delete->fa_info, tos, cfg->fc_type, + tb->tb_id, 0); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1730,82 +1760,8 @@ out: return NULL; } -/* Caller must hold RTNL */ -void fib_table_flush_external(struct fib_table *tb) -{ - struct trie *t = (struct trie *)tb->tb_data; - struct key_vector *pn = t->kv; - unsigned long cindex = 1; - struct hlist_node *tmp; - struct fib_alias *fa; - - /* walk trie in reverse order */ - for (;;) { - unsigned char slen = 0; - struct key_vector *n; - - if (!(cindex--)) { - t_key pkey = pn->key; - - /* cannot resize the trie vector */ - if (IS_TRIE(pn)) - break; - - /* resize completed node */ - pn = resize(t, pn); - cindex = get_index(pkey, pn); - - continue; - } - - /* grab the next available node */ - n = get_child(pn, cindex); - if (!n) - continue; - - if (IS_TNODE(n)) { - /* record pn and cindex for leaf walking */ - pn = n; - cindex = 1ul << n->bits; - - continue; - } - - hlist_for_each_entry_safe(fa, tmp, &n->leaf, fa_list) { - struct fib_info *fi = fa->fa_info; - - /* if alias was cloned to local then we just - * need to remove the local copy from main - */ - if (tb->tb_id != fa->tb_id) { - hlist_del_rcu(&fa->fa_list); - alias_free_mem_rcu(fa); - continue; - } - - /* record local slen */ - slen = fa->fa_slen; - - if (!fi || !(fi->fib_flags & RTNH_F_OFFLOAD)) - continue; - - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); - } - - /* update leaf slen */ - n->slen = slen; - - if (hlist_empty(&n->leaf)) { - put_child_root(pn, n->key, NULL); - node_free(n); - } - } -} - /* Caller must hold RTNL. */ -int fib_table_flush(struct fib_table *tb) +int fib_table_flush(struct net *net, struct fib_table *tb) { struct trie *t = (struct trie *)tb->tb_data; struct key_vector *pn = t->kv; @@ -1854,9 +1810,11 @@ int fib_table_flush(struct fib_table *tb) continue; } - switchdev_fib_ipv4_del(n->key, KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); + call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, + n->key, + KEYLENGTH - fa->fa_slen, + fi, fa->fa_tos, fa->fa_type, + tb->tb_id, 0); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 321d57f825ce..cf50f7e2b012 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -631,7 +631,7 @@ static struct genl_family fou_nl_family = { .netnsok = true, }; -static struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { +static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = { [FOU_ATTR_PORT] = { .type = NLA_U16, }, [FOU_ATTR_AF] = { .type = NLA_U8, }, [FOU_ATTR_IPPROTO] = { .type = NLA_U8, }, diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index ecd1e09dbbf1..96e0efecefa6 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo; + bool need_csum, ufo, gso_partial; if (!skb->encapsulation) goto out; @@ -69,6 +69,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); gre_offset = outer_hlen - tnl_hlen; skb = segs; @@ -96,7 +98,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (skb_is_gso(skb)) { + if (gso_partial) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 9b4ca87f70ba..606cc3e85d2b 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -472,6 +472,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, continue; } + /* Based on RFC3376 5.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->crcount)) && + (type == IGMPV3_ALLOW_NEW_SOURCES || + type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -499,6 +508,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ip_mc_list *pmc, scount++; stotal++; if ((type == IGMPV3_ALLOW_NEW_SOURCES || type == IGMPV3_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 38c2c47fe0e8..e4d16fc5bbb3 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -45,6 +45,7 @@ struct inet_diag_entry { u16 family; u16 userlocks; u32 ifindex; + u32 mark; }; static DEFINE_MUTEX(inet_diag_table_mutex); @@ -98,6 +99,7 @@ static size_t inet_sk_attr_size(void) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(sizeof(struct inet_diag_meminfo)) + nla_total_size(sizeof(struct inet_diag_msg)) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) @@ -108,7 +110,8 @@ static size_t inet_sk_attr_size(void) int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns) + struct user_namespace *user_ns, + bool net_admin) { const struct inet_sock *inet = inet_sk(sk); @@ -135,6 +138,9 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, } #endif + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, sk->sk_mark)) + goto errout; + r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk)); r->idiag_inode = sock_i_ino(sk); @@ -148,7 +154,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { const struct tcp_congestion_ops *ca_ops; const struct inet_diag_handler *handler; @@ -174,7 +181,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, r->idiag_timer = 0; r->idiag_retrans = 0; - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_MEMINFO - 1))) { @@ -273,10 +280,11 @@ static int inet_csk_diag_fill(struct sock *sk, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { - return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, - user_ns, portid, seq, nlmsg_flags, unlh); + return inet_sk_diag_fill(sk, inet_csk(sk), skb, req, user_ns, + portid, seq, nlmsg_flags, unlh, net_admin); } static int inet_twsk_diag_fill(struct sock *sk, @@ -318,8 +326,9 @@ static int inet_twsk_diag_fill(struct sock *sk, static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { + struct request_sock *reqsk = inet_reqsk(sk); struct inet_diag_msg *r; struct nlmsghdr *nlh; long tmo; @@ -333,7 +342,7 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; r->idiag_timer = 1; - r->idiag_retrans = inet_reqsk(sk)->num_retrans; + r->idiag_retrans = reqsk->num_retrans; BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != offsetof(struct sock, sk_cookie)); @@ -345,6 +354,10 @@ static int inet_req_diag_fill(struct sock *sk, struct sk_buff *skb, r->idiag_uid = 0; r->idiag_inode = 0; + if (net_admin && nla_put_u32(skb, INET_DIAG_MARK, + inet_rsk(reqsk)->ir_mark)) + return -EMSGSIZE; + nlmsg_end(skb, nlh); return 0; } @@ -353,7 +366,7 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, const struct inet_diag_req_v2 *r, struct user_namespace *user_ns, u32 portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, bool net_admin) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill(sk, skb, portid, seq, @@ -361,10 +374,10 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, if (sk->sk_state == TCP_NEW_SYN_RECV) return inet_req_diag_fill(sk, skb, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, - nlmsg_flags, unlh); + nlmsg_flags, unlh, net_admin); } struct sock *inet_diag_find_one_icsk(struct net *net, @@ -434,7 +447,8 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); @@ -580,6 +594,14 @@ static int inet_diag_bc_run(const struct nlattr *_bc, yes = 0; break; } + case INET_DIAG_BC_MARK_COND: { + struct inet_diag_markcond *cond; + + cond = (struct inet_diag_markcond *)(op + 1); + if ((entry->mark & cond->mask) != cond->mark) + yes = 0; + break; + } } if (yes) { @@ -624,6 +646,12 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk) entry.dport = ntohs(inet->inet_dport); entry.ifindex = sk->sk_bound_dev_if; entry.userlocks = sk_fullsock(sk) ? sk->sk_userlocks : 0; + if (sk_fullsock(sk)) + entry.mark = sk->sk_mark; + else if (sk->sk_state == TCP_NEW_SYN_RECV) + entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark; + else + entry.mark = 0; return inet_diag_bc_run(bc, &entry); } @@ -706,10 +734,25 @@ static bool valid_port_comparison(const struct inet_diag_bc_op *op, return true; } -static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) +static bool valid_markcond(const struct inet_diag_bc_op *op, int len, + int *min_len) { - const void *bc = bytecode; - int len = bytecode_len; + *min_len += sizeof(struct inet_diag_markcond); + return len >= *min_len; +} + +static int inet_diag_bc_audit(const struct nlattr *attr, + const struct sk_buff *skb) +{ + bool net_admin = netlink_net_capable(skb, CAP_NET_ADMIN); + const void *bytecode, *bc; + int bytecode_len, len; + + if (!attr || nla_len(attr) < sizeof(struct inet_diag_bc_op)) + return -EINVAL; + + bytecode = bc = nla_data(attr); + len = bytecode_len = nla_len(attr); while (len > 0) { int min_len = sizeof(struct inet_diag_bc_op); @@ -732,6 +775,12 @@ static int inet_diag_bc_audit(const void *bytecode, int bytecode_len) if (!valid_port_comparison(bc, len, &min_len)) return -EINVAL; break; + case INET_DIAG_BC_MARK_COND: + if (!net_admin) + return -EPERM; + if (!valid_markcond(bc, len, &min_len)) + return -EINVAL; + break; case INET_DIAG_BC_AUTO: case INET_DIAG_BC_JMP: case INET_DIAG_BC_NOP: @@ -760,7 +809,8 @@ static int inet_csk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, - const struct nlattr *bc) + const struct nlattr *bc, + bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -768,7 +818,8 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, + net_admin); } static void twsk_build_assert(void) @@ -804,6 +855,7 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct net *net = sock_net(skb->sk); int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -844,7 +896,8 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, cb->args[3] > 0) goto next_listen; - if (inet_csk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (inet_csk_diag_dump(sk, skb, cb, r, + bc, net_admin) < 0) { spin_unlock_bh(&ilb->lock); goto done; } @@ -912,7 +965,7 @@ skip_listen_ht: sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh); + cb->nlh, net_admin); if (res < 0) { spin_unlock_bh(lock); goto done; @@ -1020,13 +1073,13 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) if (nlh->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(nlh, hdrlen)) { struct nlattr *attr; + int err; attr = nlmsg_find_attr(nlh, hdrlen, INET_DIAG_REQ_BYTECODE); - if (!attr || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; + err = inet_diag_bc_audit(attr, skb); + if (err) + return err; } { struct netlink_dump_control c = { @@ -1051,13 +1104,13 @@ static int inet_diag_handler_cmd(struct sk_buff *skb, struct nlmsghdr *h) h->nlmsg_flags & NLM_F_DUMP) { if (nlmsg_attrlen(h, hdrlen)) { struct nlattr *attr; + int err; attr = nlmsg_find_attr(h, hdrlen, INET_DIAG_REQ_BYTECODE); - if (!attr || - nla_len(attr) < sizeof(struct inet_diag_bc_op) || - inet_diag_bc_audit(nla_data(attr), nla_len(attr))) - return -EINVAL; + err = inet_diag_bc_audit(attr, skb); + if (err) + return err; } { struct netlink_dump_control c = { diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 113cc43df789..576f705d8180 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -246,25 +246,6 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } -static __be64 key_to_tunnel_id(__be32 key) -{ -#ifdef __BIG_ENDIAN - return (__force __be64)((__force u32)key); -#else - return (__force __be64)((__force u64)key << 32); -#endif -} - -/* Returns the least-significant 32 bits of a __be64. */ -static __be32 tunnel_id_to_key(__be64 x) -{ -#ifdef __BIG_ENDIAN - return (__force __be32)x; -#else - return (__force __be32)((__force u64)x >> 32); -#endif -} - static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { @@ -290,7 +271,7 @@ static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, __be64 tun_id; flags = tpi->flags & (TUNNEL_CSUM | TUNNEL_KEY); - tun_id = key_to_tunnel_id(tpi->key); + tun_id = key32_to_tunnel_id(tpi->key); tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0); if (!tun_dst) return PACKET_REJECT; @@ -446,7 +427,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, flags = tun_info->key.tun_flags & (TUNNEL_CSUM | TUNNEL_KEY); gre_build_header(skb, tunnel_hlen, flags, proto, - tunnel_id_to_key(tun_info->key.tun_id), 0); + tunnel_id_to_key32(tun_info->key.tun_id), 0); df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index dde37fb340bf..05d105832bdb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -98,6 +99,14 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(skb->len); ip_send_check(iph); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV4, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); @@ -197,6 +206,13 @@ static int ip_finish_output2(struct net *net, struct sock *sk, struct sk_buff *s skb = skb2; } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); @@ -482,7 +498,7 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_IP_VS) || defined(CONFIG_IP_VS_MODULE) +#if IS_ENABLED(CONFIG_IP_VS) to->ipvs_property = from->ipvs_property; #endif skb_copy_secmark(to, from); @@ -1566,8 +1582,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, } oif = arg->bound_dev_if; - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; + oif = oif ? : skb->skb_iif; flowi4_init_output(&fl4, oif, IP4_REPLY_MARK(net, skb->mark), diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 71a52f4d4cff..af4919792b6a 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -284,9 +284,12 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, ipc->ttl = val; break; case IP_TOS: - if (cmsg->cmsg_len != CMSG_LEN(sizeof(int))) + if (cmsg->cmsg_len == CMSG_LEN(sizeof(int))) + val = *(int *)CMSG_DATA(cmsg); + else if (cmsg->cmsg_len == CMSG_LEN(sizeof(u8))) + val = *(u8 *)CMSG_DATA(cmsg); + else return -EINVAL; - val = *(int *)CMSG_DATA(cmsg); if (val < 0 || val > 255) return -EINVAL; ipc->tos = val; diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 95649ebd2874..5719d6ba0824 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -55,6 +55,7 @@ #include #include #include +#include #if IS_ENABLED(CONFIG_IPV6) #include @@ -546,6 +547,81 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb, return 0; } +void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + u32 headroom = sizeof(struct iphdr); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + const struct iphdr *inner_iph; + struct rtable *rt; + struct flowi4 fl4; + __be16 df = 0; + u8 tos, ttl; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto tx_error; + key = &tun_info->key; + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + inner_iph = (const struct iphdr *)skb_inner_network_header(skb); + tos = key->tos; + if (tos == 1) { + if (skb->protocol == htons(ETH_P_IP)) + tos = inner_iph->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); + } + init_tunnel_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0, + RT_TOS(tos), tunnel->parms.link); + if (tunnel->encap.type != TUNNEL_ENCAP_NONE) + goto tx_error; + rt = ip_route_output_key(tunnel->net, &fl4); + if (IS_ERR(rt)) { + dev->stats.tx_carrier_errors++; + goto tx_error; + } + if (rt->dst.dev == dev) { + ip_rt_put(rt); + dev->stats.collisions++; + goto tx_error; + } + tos = ip_tunnel_ecn_encap(tos, inner_iph, skb); + ttl = key->ttl; + if (ttl == 0) { + if (skb->protocol == htons(ETH_P_IP)) + ttl = inner_iph->ttl; + else if (skb->protocol == htons(ETH_P_IPV6)) + ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit; + else + ttl = ip4_dst_hoplimit(&rt->dst); + } + if (key->tun_flags & TUNNEL_DONT_FRAGMENT) + df = htons(IP_DF); + else if (skb->protocol == htons(ETH_P_IP)) + df = inner_iph->frag_off & htons(IP_DF); + headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len; + if (headroom > dev->needed_headroom) + dev->needed_headroom = headroom; + + if (skb_cow_head(skb, dev->needed_headroom)) { + ip_rt_put(rt); + goto tx_dropped; + } + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, + key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + return; +tx_error: + dev->stats.tx_errors++; + goto kfree; +tx_dropped: + dev->stats.tx_dropped++; +kfree: + kfree_skb(skb); +} +EXPORT_SYMBOL_GPL(ip_md_tunnel_xmit); + void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, const struct iphdr *tnl_params, u8 protocol) { diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 0f227db0e9ac..777bc1883870 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -69,7 +69,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_scrub_packet(skb, xnet); - skb_clear_hash(skb); + skb_clear_hash_if_not_l4(skb); skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 1d71c40eaaf3..071a785c65eb 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -85,7 +85,6 @@ /* Define the timeout for waiting for a DHCP/BOOTP/RARP reply */ #define CONF_OPEN_RETRIES 2 /* (Re)open devices twice */ #define CONF_SEND_RETRIES 6 /* Send six requests per open */ -#define CONF_INTER_TIMEOUT (HZ) /* Inter-device timeout: 1 second */ #define CONF_BASE_TIMEOUT (HZ*2) /* Initial timeout: 2 seconds */ #define CONF_TIMEOUT_RANDOM (HZ) /* Maximum amount of randomization */ #define CONF_TIMEOUT_MULT *7/4 /* Rate of timeout growth */ @@ -188,7 +187,7 @@ struct ic_device { }; static struct ic_device *ic_first_dev __initdata; /* List of open device */ -static struct net_device *ic_dev __initdata; /* Selected device */ +static struct ic_device *ic_dev __initdata; /* Selected device */ static bool __init ic_is_init_dev(struct net_device *dev) { @@ -307,7 +306,7 @@ static void __init ic_close_devs(void) while ((d = next)) { next = d->next; dev = d->dev; - if (dev != ic_dev && !netdev_uses_dsa(dev)) { + if ((!ic_dev || dev != ic_dev->dev) && !netdev_uses_dsa(dev)) { pr_debug("IP-Config: Downing %s\n", dev->name); dev_change_flags(dev, d->flags); } @@ -372,7 +371,7 @@ static int __init ic_setup_if(void) int err; memset(&ir, 0, sizeof(ir)); - strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->name); + strcpy(ir.ifr_ifrn.ifrn_name, ic_dev->dev->name); set_sockaddr(sin, ic_myaddr, 0); if ((err = ic_devinet_ioctl(SIOCSIFADDR, &ir)) < 0) { pr_err("IP-Config: Unable to set interface address (%d)\n", @@ -396,7 +395,7 @@ static int __init ic_setup_if(void) * out, we'll try to muddle along. */ if (ic_dev_mtu != 0) { - strcpy(ir.ifr_name, ic_dev->name); + strcpy(ir.ifr_name, ic_dev->dev->name); ir.ifr_mtu = ic_dev_mtu; if ((err = ic_dev_ioctl(SIOCSIFMTU, &ir)) < 0) pr_err("IP-Config: Unable to set interface mtu to %d (%d)\n", @@ -568,7 +567,7 @@ ic_rarp_recv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt goto drop_unlock; /* We have a winner! */ - ic_dev = dev; + ic_dev = d; if (ic_myaddr == NONE) ic_myaddr = tip; ic_servaddr = sip; @@ -655,8 +654,6 @@ static struct packet_type bootp_packet_type __initdata = { .func = ic_bootp_recv, }; -static __be32 ic_dev_xid; /* Device under configuration */ - /* * Initialize DHCP/BOOTP extension fields in the request. */ @@ -666,14 +663,14 @@ static const u8 ic_bootp_cookie[4] = { 99, 130, 83, 99 }; #ifdef IPCONFIG_DHCP static void __init -ic_dhcp_init_options(u8 *options) +ic_dhcp_init_options(u8 *options, struct ic_device *d) { u8 mt = ((ic_servaddr == NONE) ? DHCPDISCOVER : DHCPREQUEST); u8 *e = options; int len; - pr_debug("DHCP: Sending message type %d\n", mt); + pr_debug("DHCP: Sending message type %d (%s)\n", mt, d->dev->name); memcpy(e, ic_bootp_cookie, 4); /* RFC1048 Magic Cookie */ e += 4; @@ -857,7 +854,7 @@ static void __init ic_bootp_send_if(struct ic_device *d, unsigned long jiffies_d /* add DHCP options or BOOTP extensions */ #ifdef IPCONFIG_DHCP if (ic_proto_enabled & IC_USE_DHCP) - ic_dhcp_init_options(b->exten); + ic_dhcp_init_options(b->exten, d); else #endif ic_bootp_init_ext(b->exten); @@ -1033,14 +1030,8 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str /* Is it a reply to our BOOTP request? */ if (b->op != BOOTP_REPLY || b->xid != d->xid) { - net_err_ratelimited("DHCP/BOOTP: Reply not for us, op[%x] xid[%x]\n", - b->op, b->xid); - goto drop_unlock; - } - - /* Is it a reply for the device we are configuring? */ - if (b->xid != ic_dev_xid) { - net_err_ratelimited("DHCP/BOOTP: Ignoring delayed packet\n"); + net_err_ratelimited("DHCP/BOOTP: Reply not for us on %s, op[%x] xid[%x]\n", + d->dev->name, b->op, b->xid); goto drop_unlock; } @@ -1075,7 +1066,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } } - pr_debug("DHCP: Got message type %d\n", mt); + pr_debug("DHCP: Got message type %d (%s)\n", mt, d->dev->name); switch (mt) { case DHCPOFFER: @@ -1130,7 +1121,7 @@ static int __init ic_bootp_recv(struct sk_buff *skb, struct net_device *dev, str } /* We have a winner! */ - ic_dev = dev; + ic_dev = d; ic_myaddr = b->your_ip; ic_servaddr = b->server_ip; ic_addrservaddr = b->iph.saddr; @@ -1225,9 +1216,6 @@ static int __init ic_dynamic(void) timeout = CONF_BASE_TIMEOUT + (timeout % (unsigned int) CONF_TIMEOUT_RANDOM); for (;;) { #ifdef IPCONFIG_BOOTP - /* Track the device we are configuring */ - ic_dev_xid = d->xid; - if (do_bootp && (d->able & IC_BOOTP)) ic_bootp_send_if(d, jiffies - start_jiffies); #endif @@ -1236,15 +1224,19 @@ static int __init ic_dynamic(void) ic_rarp_send_if(d); #endif - jiff = jiffies + (d->next ? CONF_INTER_TIMEOUT : timeout); - while (time_before(jiffies, jiff) && !ic_got_reply) - schedule_timeout_uninterruptible(1); + if (!d->next) { + jiff = jiffies + timeout; + while (time_before(jiffies, jiff) && !ic_got_reply) + schedule_timeout_uninterruptible(1); + } #ifdef IPCONFIG_DHCP /* DHCP isn't done until we get a DHCPACK. */ if ((ic_got_reply & IC_BOOTP) && (ic_proto_enabled & IC_USE_DHCP) && ic_dhcp_msgtype != DHCPACK) { ic_got_reply = 0; + /* continue on device that got the reply */ + d = ic_dev; pr_cont(","); continue; } @@ -1487,7 +1479,7 @@ static int __init ip_auto_config(void) #endif /* IPCONFIG_DYNAMIC */ } else { /* Device selected manually or only one device -> use it */ - ic_dev = ic_first_dev->dev; + ic_dev = ic_first_dev; } addr = root_nfs_parse_addr(root_server_path); @@ -1500,14 +1492,6 @@ static int __init ip_auto_config(void) if (ic_defaults() < 0) return -1; - /* - * Close all network devices except the device we've - * autoconfigured and set up routes. - */ - ic_close_devs(); - if (ic_setup_if() < 0 || ic_setup_routes() < 0) - return -1; - /* * Record which protocol was actually used. */ @@ -1522,7 +1506,7 @@ static int __init ip_auto_config(void) pr_info("IP-Config: Complete:\n"); pr_info(" device=%s, hwaddr=%*phC, ipaddr=%pI4, mask=%pI4, gw=%pI4\n", - ic_dev->name, ic_dev->addr_len, ic_dev->dev_addr, + ic_dev->dev->name, ic_dev->dev->addr_len, ic_dev->dev->dev_addr, &ic_myaddr, &ic_netmask, &ic_gateway); pr_info(" host=%s, domain=%s, nis-domain=%s\n", utsname()->nodename, ic_domain, utsname()->domainname); @@ -1542,7 +1526,18 @@ static int __init ip_auto_config(void) pr_cont("\n"); #endif /* !SILENT */ - return 0; + /* + * Close all network devices except the device we've + * autoconfigured and set up routes. + */ + if (ic_setup_if() < 0 || ic_setup_routes() < 0) + err = -1; + else + err = 0; + + ic_close_devs(); + + return err; } late_initcall(ip_auto_config); diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 4ae3f8e6c6cc..c9392589c415 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -115,6 +115,7 @@ #include #include #include +#include static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -193,6 +194,7 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); + struct metadata_dst *tun_dst = NULL; struct ip_tunnel *tunnel; const struct iphdr *iph; @@ -216,7 +218,12 @@ static int ipip_tunnel_rcv(struct sk_buff *skb, u8 ipproto) tpi = &ipip_tpi; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - return ip_tunnel_rcv(tunnel, skb, tpi, NULL, log_ecn_error); + if (tunnel->collect_md) { + tun_dst = ip_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + return ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); } return -1; @@ -270,7 +277,10 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, ipproto); - ip_tunnel_xmit(skb, dev, tiph, ipproto); + if (tunnel->collect_md) + ip_md_tunnel_xmit(skb, dev, ipproto); + else + ip_tunnel_xmit(skb, dev, tiph, ipproto); return NETDEV_TX_OK; tx_error: @@ -380,13 +390,14 @@ static int ipip_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) } static void ipip_netlink_parms(struct nlattr *data[], - struct ip_tunnel_parm *parms) + struct ip_tunnel_parm *parms, bool *collect_md) { memset(parms, 0, sizeof(*parms)); parms->iph.version = 4; parms->iph.protocol = IPPROTO_IPIP; parms->iph.ihl = 5; + *collect_md = false; if (!data) return; @@ -414,6 +425,9 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + *collect_md = true; } /* This function returns true when ENCAP attributes are present in the nl msg */ @@ -453,18 +467,18 @@ static bool ipip_netlink_encap_parms(struct nlattr *data[], static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { + struct ip_tunnel *t = netdev_priv(dev); struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; if (ipip_netlink_encap_parms(data, &ipencap)) { - struct ip_tunnel *t = netdev_priv(dev); int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &t->collect_md); return ip_tunnel_newlink(dev, tb, &p); } @@ -473,6 +487,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], { struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; + bool collect_md; if (ipip_netlink_encap_parms(data, &ipencap)) { struct ip_tunnel *t = netdev_priv(dev); @@ -482,7 +497,9 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], return err; } - ipip_netlink_parms(data, &p); + ipip_netlink_parms(data, &p, &collect_md); + if (collect_md) + return -EINVAL; if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) @@ -516,6 +533,8 @@ static size_t ipip_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -544,6 +563,9 @@ static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) tunnel->encap.flags)) goto nla_put_failure; + if (tunnel->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -562,6 +584,7 @@ static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ipip_link_ops __read_mostly = { diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index c187c60e3e0c..d613309e3e5d 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -25,17 +25,6 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -config NF_CONNTRACK_PROC_COMPAT - bool "proc/sysctl compatibility with old connection tracking" - depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 - default y - help - This option enables /proc and sysctl compatibility with the old - layer 3 dependent connection tracking. This is needed to keep - old programs that have not been adapted to the new names working. - - If unsure, say Y. - if NF_TABLES config NF_TABLES_IPV4 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 87b073da14c9..853328f8fd05 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -4,11 +4,6 @@ # objects for l3 independent conntrack nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o -ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) -ifeq ($(CONFIG_PROC_FS),y) -nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o -endif -endif # connection tracking obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f993545a3373..7c00ce90adb8 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -156,7 +156,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = 4, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index ae1a71a97132..713c09a74b90 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -110,7 +110,7 @@ static unsigned int ipv4_helper(void *priv, if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; @@ -202,47 +202,6 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { }, }; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; - -static struct ctl_table ip_ct_sysctl_table[] = { - { - .procname = "ip_conntrack_max", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_count", - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_buckets", - .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_checksum", - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_log_invalid", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, - }, - { } -}; -#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ - /* Fast function for those who don't want to parse /proc (and I don't blame them). */ /* Reversing the socket's dst/src point of view gives us the reply @@ -350,20 +309,6 @@ static struct nf_sockopt_ops so_getorigdst = { static int ipv4_init_net(struct net *net) { -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - struct nf_ip_net *in = &net->ct.nf_ct_proto; - in->ctl_table = kmemdup(ip_ct_sysctl_table, - sizeof(ip_ct_sysctl_table), - GFP_KERNEL); - if (!in->ctl_table) - return -ENOMEM; - - in->ctl_table[0].data = &nf_conntrack_max; - in->ctl_table[1].data = &net->ct.count; - in->ctl_table[2].data = &nf_conntrack_htable_size; - in->ctl_table[3].data = &net->ct.sysctl_checksum; - in->ctl_table[4].data = &net->ct.sysctl_log_invalid; -#endif return 0; } @@ -379,9 +324,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, -#endif -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - .ctl_table_path = "net/ipv4/netfilter", #endif .init_net = ipv4_init_net, .me = THIS_MODULE, @@ -492,16 +434,7 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) goto cleanup_icmpv4; } -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - ret = nf_conntrack_ipv4_compat_init(); - if (ret < 0) - goto cleanup_proto; -#endif return ret; -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - cleanup_proto: - nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); -#endif cleanup_icmpv4: nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); cleanup_udp4: @@ -520,9 +453,6 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) static void __exit nf_conntrack_l3proto_ipv4_fini(void) { synchronize_net(); -#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - nf_conntrack_ipv4_compat_fini(); -#endif nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c deleted file mode 100644 index 63923710f325..000000000000 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c +++ /dev/null @@ -1,492 +0,0 @@ -/* ip_conntrack proc compat - based on ip_conntrack_standalone.c - * - * (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * (C) 2006-2010 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct ct_iter_state { - struct seq_net_private p; - struct hlist_nulls_head *hash; - unsigned int htable_size; - unsigned int bucket; -}; - -static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) -{ - struct ct_iter_state *st = seq->private; - struct hlist_nulls_node *n; - - for (st->bucket = 0; - st->bucket < st->htable_size; - st->bucket++) { - n = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - if (!is_a_nulls(n)) - return n; - } - return NULL; -} - -static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, - struct hlist_nulls_node *head) -{ - struct ct_iter_state *st = seq->private; - - head = rcu_dereference(hlist_nulls_next_rcu(head)); - while (is_a_nulls(head)) { - if (likely(get_nulls_value(head) == st->bucket)) { - if (++st->bucket >= st->htable_size) - return NULL; - } - head = rcu_dereference( - hlist_nulls_first_rcu(&st->hash[st->bucket])); - } - return head; -} - -static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_nulls_node *head = ct_get_first(seq); - - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *ct_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - struct ct_iter_state *st = seq->private; - - rcu_read_lock(); - - nf_conntrack_get_ht(&st->hash, &st->htable_size); - return ct_get_idx(seq, *pos); -} - -static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return ct_get_next(s, v); -} - -static void ct_seq_stop(struct seq_file *s, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -#ifdef CONFIG_NF_CONNTRACK_SECMARK -static void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ - int ret; - u32 len; - char *secctx; - - ret = security_secid_to_secctx(ct->secmark, &secctx, &len); - if (ret) - return; - - seq_printf(s, "secctx=%s ", secctx); - - security_release_secctx(secctx, len); -} -#else -static inline void ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) -{ -} -#endif - -static bool ct_seq_should_skip(const struct nf_conn *ct, - const struct net *net, - const struct nf_conntrack_tuple_hash *hash) -{ - /* we only want to print DIR_ORIGINAL */ - if (NF_CT_DIRECTION(hash)) - return true; - - if (nf_ct_l3num(ct) != AF_INET) - return true; - - if (!net_eq(nf_ct_net(ct), net)) - return true; - - return false; -} - -static int ct_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_tuple_hash *hash = v; - struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); - const struct nf_conntrack_l3proto *l3proto; - const struct nf_conntrack_l4proto *l4proto; - int ret = 0; - - NF_CT_ASSERT(ct); - if (ct_seq_should_skip(ct, seq_file_net(s), hash)) - return 0; - - if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) - return 0; - - /* check if we raced w. object reuse */ - if (!nf_ct_is_confirmed(ct) || - ct_seq_should_skip(ct, seq_file_net(s), hash)) - goto release; - - l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - NF_CT_ASSERT(l3proto); - l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - NF_CT_ASSERT(l4proto); - - ret = -ENOSPC; - seq_printf(s, "%-8s %u %ld ", - l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); - - if (l4proto->print_conntrack) - l4proto->print_conntrack(s, ct); - - if (seq_has_overflowed(s)) - goto release; - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) - goto release; - - if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) - seq_printf(s, "[UNREPLIED] "); - - print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, - l3proto, l4proto); - - if (seq_has_overflowed(s)) - goto release; - - if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) - goto release; - - if (test_bit(IPS_ASSURED_BIT, &ct->status)) - seq_printf(s, "[ASSURED] "); - -#ifdef CONFIG_NF_CONNTRACK_MARK - seq_printf(s, "mark=%u ", ct->mark); -#endif - - ct_show_secctx(s, ct); - - seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)); - - if (seq_has_overflowed(s)) - goto release; - - ret = 0; -release: - nf_ct_put(ct); - return ret; -} - -static const struct seq_operations ct_seq_ops = { - .start = ct_seq_start, - .next = ct_seq_next, - .stop = ct_seq_stop, - .show = ct_seq_show -}; - -static int ct_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_seq_ops, - sizeof(struct ct_iter_state)); -} - -static const struct file_operations ct_file_ops = { - .owner = THIS_MODULE, - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -/* expects */ -struct ct_expect_iter_state { - struct seq_net_private p; - unsigned int bucket; -}; - -static struct hlist_node *ct_expect_get_first(struct seq_file *seq) -{ - struct ct_expect_iter_state *st = seq->private; - struct hlist_node *n; - - for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { - n = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - if (n) - return n; - } - return NULL; -} - -static struct hlist_node *ct_expect_get_next(struct seq_file *seq, - struct hlist_node *head) -{ - struct ct_expect_iter_state *st = seq->private; - - head = rcu_dereference(hlist_next_rcu(head)); - while (head == NULL) { - if (++st->bucket >= nf_ct_expect_hsize) - return NULL; - head = rcu_dereference( - hlist_first_rcu(&nf_ct_expect_hash[st->bucket])); - } - return head; -} - -static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) -{ - struct hlist_node *head = ct_expect_get_first(seq); - - if (head) - while (pos && (head = ct_expect_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *exp_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(RCU) -{ - rcu_read_lock(); - return ct_expect_get_idx(seq, *pos); -} - -static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - (*pos)++; - return ct_expect_get_next(seq, v); -} - -static void exp_seq_stop(struct seq_file *seq, void *v) - __releases(RCU) -{ - rcu_read_unlock(); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct nf_conntrack_expect *exp; - const struct hlist_node *n = v; - - exp = hlist_entry(n, struct nf_conntrack_expect, hnode); - - if (!net_eq(nf_ct_net(exp->master), seq_file_net(s))) - return 0; - - if (exp->tuple.src.l3num != AF_INET) - return 0; - - if (exp->timeout.function) - seq_printf(s, "%ld ", timer_pending(&exp->timeout) - ? (long)(exp->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - - seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); - - print_tuple(s, &exp->tuple, - __nf_ct_l3proto_find(exp->tuple.src.l3num), - __nf_ct_l4proto_find(exp->tuple.src.l3num, - exp->tuple.dst.protonum)); - seq_putc(s, '\n'); - - return 0; -} - -static const struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &exp_seq_ops, - sizeof(struct ct_expect_iter_state)); -} - -static const struct file_operations ip_exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - if (*pos == 0) - return SEQ_START_TOKEN; - - for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct net *net = seq_file_net(seq); - int cpu; - - for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return per_cpu_ptr(net->ct.stat, cpu); - } - - return NULL; -} - -static void ct_cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int ct_cpu_seq_show(struct seq_file *seq, void *v) -{ - struct net *net = seq_file_net(seq); - unsigned int nr_conntracks = atomic_read(&net->ct.count); - const struct ip_conntrack_stat *st = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); - return 0; - } - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " - "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", - nr_conntracks, - st->searched, - st->found, - st->new, - st->invalid, - st->ignore, - st->delete, - st->delete_list, - st->insert, - st->insert_failed, - st->drop, - st->early_drop, - st->error, - - st->expect_new, - st->expect_create, - st->expect_delete, - st->search_restart - ); - return 0; -} - -static const struct seq_operations ct_cpu_seq_ops = { - .start = ct_cpu_seq_start, - .next = ct_cpu_seq_next, - .stop = ct_cpu_seq_stop, - .show = ct_cpu_seq_show, -}; - -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_net(inode, file, &ct_cpu_seq_ops, - sizeof(struct seq_net_private)); -} - -static const struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_net, -}; - -static int __net_init ip_conntrack_net_init(struct net *net) -{ - struct proc_dir_entry *proc, *proc_exp, *proc_stat; - - proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); - if (!proc) - goto err1; - - proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, - &ip_exp_file_ops); - if (!proc_exp) - goto err2; - - proc_stat = proc_create("ip_conntrack", S_IRUGO, - net->proc_net_stat, &ct_cpu_seq_fops); - if (!proc_stat) - goto err3; - return 0; - -err3: - remove_proc_entry("ip_conntrack_expect", net->proc_net); -err2: - remove_proc_entry("ip_conntrack", net->proc_net); -err1: - return -ENOMEM; -} - -static void __net_exit ip_conntrack_net_exit(struct net *net) -{ - remove_proc_entry("ip_conntrack", net->proc_net_stat); - remove_proc_entry("ip_conntrack_expect", net->proc_net); - remove_proc_entry("ip_conntrack", net->proc_net); -} - -static struct pernet_operations ip_conntrack_net_ops = { - .init = ip_conntrack_net_init, - .exit = ip_conntrack_net_exit, -}; - -int __init nf_conntrack_ipv4_compat_init(void) -{ - return register_pernet_subsys(&ip_conntrack_net_ops); -} - -void __exit nf_conntrack_ipv4_compat_fini(void) -{ - unregister_pernet_subsys(&ip_conntrack_net_ops); -} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index c567e1b5d799..d075b3cf2400 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -149,7 +149,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's @@ -327,17 +327,6 @@ static struct ctl_table icmp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table icmp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_icmp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -355,40 +344,14 @@ static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_icmp_net *in) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, - sizeof(icmp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &in->timeout; -#endif -#endif - return 0; -} - static int icmp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_icmp_net *in = icmp_pernet(net); struct nf_proto_net *pn = &in->pn; in->timeout = nf_ct_icmp_timeout; - ret = icmp_kmemdup_compat_sysctl_table(pn, in); - if (ret < 0) - return ret; - - ret = icmp_kmemdup_sysctl_table(pn, in); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return icmp_kmemdup_sysctl_table(pn, in); } static struct nf_proto_net *icmp_get_net_proto(struct net *net) diff --git a/net/ipv4/netfilter/nf_dup_ipv4.c b/net/ipv4/netfilter/nf_dup_ipv4.c index ceb187308120..cf986e1c7bbd 100644 --- a/net/ipv4/netfilter/nf_dup_ipv4.c +++ b/net/ipv4/netfilter/nf_dup_ipv4.c @@ -74,21 +74,19 @@ void nf_dup_ipv4(struct net *net, struct sk_buff *skb, unsigned int hooknum, nf_conntrack_get(skb->nfct); #endif /* - * If we are in PREROUTING/INPUT, the checksum must be recalculated - * since the length could have changed as a result of defragmentation. - * - * We also decrease the TTL to mitigate potential loops between two - * hosts. + * If we are in PREROUTING/INPUT, decrease the TTL to mitigate potential + * loops between two hosts. * * Set %IP_DF so that the original source is notified of a potentially * decreased MTU on the clone route. IPv6 does this too. + * + * IP header checksum will be recalculated at ip_local_out. */ iph = ip_hdr(skb); iph->frag_off |= htons(IP_DF); if (hooknum == NF_INET_PRE_ROUTING || hooknum == NF_INET_LOCAL_IN) --iph->ttl; - ip_send_check(iph); if (nf_dup_ipv4_route(net, skb, gw, oif)) { __this_cpu_write(nf_skb_duplicated, true); diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index e7ad950cf9ef..b24795e2ee6d 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -62,7 +62,7 @@ static void dump_arp_packet(struct nf_log_buf *m, /* If it's for Ethernet and the lengths are OK, then log the ARP * payload. */ - if (ah->ar_hrd != htons(1) || + if (ah->ar_hrd != htons(ARPHRD_ETHER) || ah->ar_hln != ETH_ALEN || ah->ar_pln != sizeof(__be32)) return; @@ -111,8 +111,7 @@ static struct nf_logger nf_arp_logger __read_mostly = { static int __net_init nf_log_arp_net_init(struct net *net) { - nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); - return 0; + return nf_log_set(net, NFPROTO_ARP, &nf_arp_logger); } static void __net_exit nf_log_arp_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index 076aadda0473..856648966f4c 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -29,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -46,7 +46,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); if (ih == NULL) { @@ -76,7 +76,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, if (ntohs(ih->frag_off) & IP_OFFSET) nf_log_buf_add(m, "FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - if ((logflags & XT_LOG_IPOPT) && + if ((logflags & NF_LOG_IPOPT) && ih->ihl * 4 > sizeof(struct iphdr)) { const unsigned char *op; unsigned char _opt[4 * 15 - sizeof(struct iphdr)]; @@ -250,7 +250,7 @@ static void dump_ipv4_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && !iphoff) + if ((logflags & NF_LOG_UID) && !iphoff) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -282,7 +282,7 @@ static void dump_ipv4_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { @@ -347,8 +347,7 @@ static struct nf_logger nf_ip_logger __read_mostly = { static int __net_init nf_log_ipv4_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV4, &nf_ip_logger); } static void __net_exit nf_log_ipv4_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index 9414923f1e15..edf05002d674 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -88,8 +88,8 @@ gre_manip_pkt(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, enum nf_nat_manip_type maniptype) { - const struct gre_hdr *greh; - struct gre_hdr_pptp *pgreh; + const struct gre_base_hdr *greh; + struct pptp_gre_header *pgreh; /* pgreh includes two optional 32bit fields which are not required * to be there. That's where the magic '8' comes from */ @@ -97,18 +97,19 @@ gre_manip_pkt(struct sk_buff *skb, return false; greh = (void *)skb->data + hdroff; - pgreh = (struct gre_hdr_pptp *)greh; + pgreh = (struct pptp_gre_header *)greh; /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype != NF_NAT_MANIP_DST) return true; - switch (greh->version) { - case GRE_VERSION_1701: + + switch (greh->flags & GRE_VERSION) { + case GRE_VERSION_0: /* We do not currently NAT any GREv0 packets. * Try to behave like "nf_nat_proto_unknown" */ break; - case GRE_VERSION_PPTP: + case GRE_VERSION_1: pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c index cd84d4295a20..805c8ddfe860 100644 --- a/net/ipv4/netfilter/nf_tables_arp.c +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -21,7 +21,7 @@ nft_do_chain_arp(void *priv, { struct nft_pktinfo pkt; - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); return nft_do_chain(&pkt, priv); } @@ -80,7 +80,10 @@ static int __init nf_tables_arp_init(void) { int ret; - nft_register_chain_type(&filter_arp); + ret = nft_register_chain_type(&filter_arp); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_arp_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_arp); diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index e44ba3b12fbb..2840a29b2e04 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -103,7 +103,10 @@ static int __init nf_tables_ipv4_init(void) { int ret; - nft_register_chain_type(&filter_ipv4); + ret = nft_register_chain_type(&filter_ipv4); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv4); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 9f665b63a927..7143ca1a6af9 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -46,6 +46,8 @@ #include #include +#define TCPUDP_MIB_MAX max_t(u32, UDP_MIB_MAX, TCP_MIB_MAX) + /* * Report socket allocation statistics [mea@utu.fi] */ @@ -257,6 +259,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSpuriousRTOs", LINUX_MIB_TCPSPURIOUSRTOS), SNMP_MIB_ITEM("TCPMD5NotFound", LINUX_MIB_TCPMD5NOTFOUND), SNMP_MIB_ITEM("TCPMD5Unexpected", LINUX_MIB_TCPMD5UNEXPECTED), + SNMP_MIB_ITEM("TCPMD5Failure", LINUX_MIB_TCPMD5FAILURE), SNMP_MIB_ITEM("TCPSackShifted", LINUX_MIB_SACKSHIFTED), SNMP_MIB_ITEM("TCPSackMerged", LINUX_MIB_SACKMERGED), SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK), @@ -355,22 +358,22 @@ static void icmp_put(struct seq_file *seq) atomic_long_t *ptr = net->mib.icmpmsg_statistics->mibs; seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " In%s", icmpmibmap[i].name); seq_puts(seq, " OutMsgs OutErrors"); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " Out%s", icmpmibmap[i].name); seq_printf(seq, "\nIcmp: %lu %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_INERRORS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_CSUMERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + icmpmibmap[i].index)); seq_printf(seq, " %lu %lu", snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS), snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS)); - for (i = 0; icmpmibmap[i].name != NULL; i++) + for (i = 0; icmpmibmap[i].name; i++) seq_printf(seq, " %lu", atomic_long_read(ptr + (icmpmibmap[i].index | 0x100))); } @@ -378,14 +381,16 @@ static void icmp_put(struct seq_file *seq) /* * Called from the PROCfs module. This outputs /proc/net/snmp. */ -static int snmp_seq_show(struct seq_file *seq, void *v) +static int snmp_seq_show_ipstats(struct seq_file *seq, void *v) { - int i; struct net *net = seq->private; + u64 buff64[IPSTATS_MIB_MAX]; + int i; + + memset(buff64, 0, IPSTATS_MIB_MAX * sizeof(u64)); seq_puts(seq, "Ip: Forwarding DefaultTTL"); - - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipstats_list[i].name); seq_printf(seq, "\nIp: %d %d", @@ -393,54 +398,74 @@ static int snmp_seq_show(struct seq_file *seq, void *v) net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); - for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) - seq_printf(seq, " %llu", - snmp_fold_field64(net->mib.ip_statistics, - snmp4_ipstats_list[i].entry, - offsetof(struct ipstats_mib, syncp))); + snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list, + net->mib.ip_statistics, + offsetof(struct ipstats_mib, syncp)); + for (i = 0; snmp4_ipstats_list[i].name; i++) + seq_printf(seq, " %llu", buff64[i]); + + return 0; +} + +static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) +{ + unsigned long buff[TCPUDP_MIB_MAX]; + struct net *net = seq->private; + int i; + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + seq_puts(seq, "\nTcp:"); + for (i = 0; snmp4_tcp_list[i].name; i++) + seq_printf(seq, " %s", snmp4_tcp_list[i].name); + + seq_puts(seq, "\nTcp:"); + snmp_get_cpu_field_batch(buff, snmp4_tcp_list, + net->mib.tcp_statistics); + for (i = 0; snmp4_tcp_list[i].name; i++) { + /* MaxConn field is signed, RFC 2012 */ + if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) + seq_printf(seq, " %ld", buff[i]); + else + seq_printf(seq, " %lu", buff[i]); + } + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udp_statistics); + seq_puts(seq, "\nUdp:"); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %s", snmp4_udp_list[i].name); + seq_puts(seq, "\nUdp:"); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %lu", buff[i]); + + memset(buff, 0, TCPUDP_MIB_MAX * sizeof(unsigned long)); + + /* the UDP and UDP-Lite MIBs are the same */ + seq_puts(seq, "\nUdpLite:"); + snmp_get_cpu_field_batch(buff, snmp4_udp_list, + net->mib.udplite_statistics); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %s", snmp4_udp_list[i].name); + seq_puts(seq, "\nUdpLite:"); + for (i = 0; snmp4_udp_list[i].name; i++) + seq_printf(seq, " %lu", buff[i]); + + seq_putc(seq, '\n'); + return 0; +} + +static int snmp_seq_show(struct seq_file *seq, void *v) +{ + snmp_seq_show_ipstats(seq, v); icmp_put(seq); /* RFC 2011 compatibility */ icmpmsg_put(seq); - seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) - seq_printf(seq, " %s", snmp4_tcp_list[i].name); + snmp_seq_show_tcp_udp(seq, v); - seq_puts(seq, "\nTcp:"); - for (i = 0; snmp4_tcp_list[i].name != NULL; i++) { - /* MaxConn field is signed, RFC 2012 */ - if (snmp4_tcp_list[i].entry == TCP_MIB_MAXCONN) - seq_printf(seq, " %ld", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); - else - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.tcp_statistics, - snmp4_tcp_list[i].entry)); - } - - seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %s", snmp4_udp_list[i].name); - - seq_puts(seq, "\nUdp:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udp_statistics, - snmp4_udp_list[i].entry)); - - /* the UDP and UDP-Lite MIBs are the same */ - seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %s", snmp4_udp_list[i].name); - - seq_puts(seq, "\nUdpLite:"); - for (i = 0; snmp4_udp_list[i].name != NULL; i++) - seq_printf(seq, " %lu", - snmp_fold_field(net->mib.udplite_statistics, - snmp4_udp_list[i].entry)); - - seq_putc(seq, '\n'); return 0; } @@ -468,21 +493,21 @@ static int netstat_seq_show(struct seq_file *seq, void *v) struct net *net = seq->private; seq_puts(seq, "TcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %s", snmp4_net_list[i].name); seq_puts(seq, "\nTcpExt:"); - for (i = 0; snmp4_net_list[i].name != NULL; i++) + for (i = 0; snmp4_net_list[i].name; i++) seq_printf(seq, " %lu", snmp_fold_field(net->mib.net_statistics, snmp4_net_list[i].entry)); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %s", snmp4_ipextstats_list[i].name); seq_puts(seq, "\nIpExt:"); - for (i = 0; snmp4_ipextstats_list[i].name != NULL; i++) + for (i = 0; snmp4_ipextstats_list[i].name; i++) seq_printf(seq, " %llu", snmp_fold_field64(net->mib.ip_statistics, snmp4_ipextstats_list[i].entry, diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 438f50c1a676..90a85c955872 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -606,12 +606,6 @@ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) (inet->hdrincl ? FLOWI_FLAG_KNOWN_NH : 0), daddr, saddr, 0, 0); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, &fl4); - if (err < 0) - goto done; - } - if (!inet->hdrincl) { rfv.msg = msg; rfv.hlen = 0; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 62c3ed0b7556..f2be689a6c85 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1252,7 +1252,9 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = 576; } - return min_t(unsigned int, mtu, IP_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct fib_nh_exception *find_exception(struct fib_nh *nh, __be32 daddr) @@ -1835,7 +1837,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, * Now we are ready to route packet. */ fl4.flowi4_oif = 0; - fl4.flowi4_iif = l3mdev_fib_oif_rcu(dev); + fl4.flowi4_iif = dev->ifindex; fl4.flowi4_mark = skb->mark; fl4.flowi4_tos = tos; fl4.flowi4_scope = RT_SCOPE_UNIVERSE; @@ -2022,7 +2024,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res, return ERR_PTR(-EINVAL); if (likely(!IN_DEV_ROUTE_LOCALNET(in_dev))) - if (ipv4_is_loopback(fl4->saddr) && !(dev_out->flags & IFF_LOOPBACK)) + if (ipv4_is_loopback(fl4->saddr) && + !(dev_out->flags & IFF_LOOPBACK) && + !netif_is_l3_master(dev_out)) return ERR_PTR(-EINVAL); if (ipv4_is_lbcast(fl4->daddr)) @@ -2152,7 +2156,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, unsigned int flags = 0; struct fib_result res; struct rtable *rth; - int master_idx; int orig_oif; int err = -ENETUNREACH; @@ -2162,9 +2165,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, orig_oif = fl4->flowi4_oif; - master_idx = l3mdev_master_ifindex_by_index(net, fl4->flowi4_oif); - if (master_idx) - fl4->flowi4_oif = master_idx; fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? @@ -2248,10 +2248,6 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, fl4->saddr = inet_select_addr(dev_out, 0, RT_SCOPE_HOST); } - - rth = l3mdev_get_rtable(dev_out, fl4); - if (rth) - goto out; } if (!fl4->daddr) { @@ -2269,8 +2265,7 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, if (err) { res.fi = NULL; res.table = NULL; - if (fl4->flowi4_oif && - !netif_index_is_l3_master(net, fl4->flowi4_oif)) { + if (fl4->flowi4_oif) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -2306,7 +2301,9 @@ struct rtable *__ip_route_output_key_hash(struct net *net, struct flowi4 *fl4, else fl4->saddr = fl4->daddr; } - dev_out = net->loopback_dev; + + /* L3 master device is the loopback for that domain */ + dev_out = l3mdev_master_dev_rcu(dev_out) ? : net->loopback_dev; fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; @@ -2582,9 +2579,6 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) fl4.flowi4_oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0; fl4.flowi4_mark = mark; - if (netif_index_is_l3_master(net, fl4.flowi4_oif)) - fl4.flowi4_flags = FLOWI_FLAG_L3MDEV_SRC | FLOWI_FLAG_SKIP_NH_OIF; - if (iif) { struct net_device *dev; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index ffbb218de520..f253e5019d22 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -380,14 +380,14 @@ void tcp_init_sock(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - __skb_queue_head_init(&tp->out_of_order_queue); + tp->out_of_order_queue = RB_ROOT; tcp_init_xmit_timers(sk); tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - tp->rtt_min[0].rtt = ~0U; + minmax_reset(&tp->rtt_min, tcp_time_stamp, ~0U); /* So many TCP implementations out there (incorrectly) count the * initial SYN frame in their delayed-ACK and congestion control @@ -396,6 +396,9 @@ void tcp_init_sock(struct sock *sk) */ tp->snd_cwnd = TCP_INIT_CWND; + /* There's a bubble in the pipe until at least the first ACK. */ + tp->app_limited = ~0U; + /* See draft-stevens-tcpca-spec-01 for discussion of the * initialization of these values. */ @@ -1014,23 +1017,40 @@ int tcp_sendpage(struct sock *sk, struct page *page, int offset, flags); lock_sock(sk); + + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + res = do_tcp_sendpages(sk, page, offset, size, flags); release_sock(sk); return res; } EXPORT_SYMBOL(tcp_sendpage); -static inline int select_size(const struct sock *sk, bool sg) +/* Do not bother using a page frag for very small frames. + * But use this heuristic only for the first skb in write queue. + * + * Having no payload in skb->head allows better SACK shifting + * in tcp_shift_skb_data(), reducing sack/rack overhead, because + * write queue has less skbs. + * Each skb can hold up to MAX_SKB_FRAGS * 32Kbytes, or ~0.5 MB. + * This also speeds up tso_fragment(), since it wont fallback + * to tcp_fragment(). + */ +static int linear_payload_sz(bool first_skb) +{ + if (first_skb) + return SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + return 0; +} + +static int select_size(const struct sock *sk, bool sg, bool first_skb) { const struct tcp_sock *tp = tcp_sk(sk); int tmp = tp->mss_cache; if (sg) { if (sk_can_gso(sk)) { - /* Small frames wont use a full page: - * Payload will immediately follow tcp header. - */ - tmp = SKB_WITH_OVERHEAD(2048 - MAX_TCP_HEADER); + tmp = linear_payload_sz(first_skb); } else { int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER); @@ -1101,6 +1121,8 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); + tcp_rate_check_app_limited(sk); /* is sending application-limited? */ + /* Wait for a connection to finish. One exception is TCP Fast Open * (passive side) where data is allowed to be sent before a connection * is fully established. @@ -1161,6 +1183,8 @@ restart: } if (copy <= 0 || !tcp_skb_can_collapse_to(skb)) { + bool first_skb; + new_segment: /* Allocate new segment. If the interface is SG, * allocate skb fitting to single page. @@ -1172,10 +1196,11 @@ new_segment: process_backlog = false; goto restart; } + first_skb = skb_queue_empty(&sk->sk_write_queue); skb = sk_stream_alloc_skb(sk, - select_size(sk, sg), + select_size(sk, sg, first_skb), sk->sk_allocation, - skb_queue_empty(&sk->sk_write_queue)); + first_skb); if (!skb) goto wait_for_memory; @@ -1570,6 +1595,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc, } EXPORT_SYMBOL(tcp_read_sock); +int tcp_peek_len(struct socket *sock) +{ + return tcp_inq(sock->sk); +} +EXPORT_SYMBOL(tcp_peek_len); + /* * This routine copies from a sock struct into the user buffer. * @@ -2237,7 +2268,7 @@ int tcp_disconnect(struct sock *sk, int flags) tcp_clear_xmit_timers(sk); __skb_queue_purge(&sk->sk_receive_queue); tcp_write_queue_purge(sk); - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); inet->inet_dport = 0; @@ -2681,7 +2712,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now = tcp_time_stamp; + u32 now = tcp_time_stamp, intv; unsigned int start; int notsent_bytes; u64 rate64; @@ -2771,6 +2802,15 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; + + info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; + rate = READ_ONCE(tp->rate_delivered); + intv = READ_ONCE(tp->rate_interval_us); + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + put_unaligned(rate64, &info->tcpi_delivery_rate); + } } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -3092,23 +3132,6 @@ struct tcp_md5sig_pool *tcp_get_md5sig_pool(void) } EXPORT_SYMBOL(tcp_get_md5sig_pool); -int tcp_md5_hash_header(struct tcp_md5sig_pool *hp, - const struct tcphdr *th) -{ - struct scatterlist sg; - struct tcphdr hdr; - - /* We are not allowed to change tcphdr, make a local copy */ - memcpy(&hdr, th, sizeof(hdr)); - hdr.check = 0; - - /* options aren't included in the hash */ - sg_init_one(&sg, &hdr, sizeof(hdr)); - ahash_request_set_crypt(hp->md5_req, &sg, NULL, sizeof(hdr)); - return crypto_ahash_update(hp->md5_req); -} -EXPORT_SYMBOL(tcp_md5_hash_header); - int tcp_md5_hash_skb_data(struct tcp_md5sig_pool *hp, const struct sk_buff *skb, unsigned int header_len) { @@ -3255,11 +3278,12 @@ static void __init tcp_init_mem(void) void __init tcp_init(void) { - unsigned long limit; int max_rshare, max_wshare, cnt; + unsigned long limit; unsigned int i; - sock_skb_cb_check_size(sizeof(struct tcp_skb_cb)); + BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c new file mode 100644 index 000000000000..0ea66c2c9344 --- /dev/null +++ b/net/ipv4/tcp_bbr.c @@ -0,0 +1,896 @@ +/* Bottleneck Bandwidth and RTT (BBR) congestion control + * + * BBR congestion control computes the sending rate based on the delivery + * rate (throughput) estimated from ACKs. In a nutshell: + * + * On each ACK, update our model of the network path: + * bottleneck_bandwidth = windowed_max(delivered / elapsed, 10 round trips) + * min_rtt = windowed_min(rtt, 10 seconds) + * pacing_rate = pacing_gain * bottleneck_bandwidth + * cwnd = max(cwnd_gain * bottleneck_bandwidth * min_rtt, 4) + * + * The core algorithm does not react directly to packet losses or delays, + * although BBR may adjust the size of next send per ACK when loss is + * observed, or adjust the sending rate if it estimates there is a + * traffic policer, in order to keep the drop rate reasonable. + * + * BBR is described in detail in: + * "BBR: Congestion-Based Congestion Control", + * Neal Cardwell, Yuchung Cheng, C. Stephen Gunn, Soheil Hassas Yeganeh, + * Van Jacobson. ACM Queue, Vol. 14 No. 5, September-October 2016. + * + * There is a public e-mail list for discussing BBR development and testing: + * https://groups.google.com/forum/#!forum/bbr-dev + * + * NOTE: BBR *must* be used with the fq qdisc ("man tc-fq") with pacing enabled, + * since pacing is integral to the BBR design and implementation. + * BBR without pacing would not function properly, and may incur unnecessary + * high packet loss rates. + */ +#include +#include +#include +#include +#include +#include + +/* Scale factor for rate in pkt/uSec unit to avoid truncation in bandwidth + * estimation. The rate unit ~= (1500 bytes / 1 usec / 2^24) ~= 715 bps. + * This handles bandwidths from 0.06pps (715bps) to 256Mpps (3Tbps) in a u32. + * Since the minimum window is >=4 packets, the lower bound isn't + * an issue. The upper bound isn't an issue with existing technologies. + */ +#define BW_SCALE 24 +#define BW_UNIT (1 << BW_SCALE) + +#define BBR_SCALE 8 /* scaling factor for fractions in BBR (e.g. gains) */ +#define BBR_UNIT (1 << BBR_SCALE) + +/* BBR has the following modes for deciding how fast to send: */ +enum bbr_mode { + BBR_STARTUP, /* ramp up sending rate rapidly to fill pipe */ + BBR_DRAIN, /* drain any queue created during startup */ + BBR_PROBE_BW, /* discover, share bw: pace around estimated bw */ + BBR_PROBE_RTT, /* cut cwnd to min to probe min_rtt */ +}; + +/* BBR congestion control block */ +struct bbr { + u32 min_rtt_us; /* min RTT in min_rtt_win_sec window */ + u32 min_rtt_stamp; /* timestamp of min_rtt_us */ + u32 probe_rtt_done_stamp; /* end time for BBR_PROBE_RTT mode */ + struct minmax bw; /* Max recent delivery rate in pkts/uS << 24 */ + u32 rtt_cnt; /* count of packet-timed rounds elapsed */ + u32 next_rtt_delivered; /* scb->tx.delivered at end of round */ + struct skb_mstamp cycle_mstamp; /* time of this cycle phase start */ + u32 mode:3, /* current bbr_mode in state machine */ + prev_ca_state:3, /* CA state on previous ACK */ + packet_conservation:1, /* use packet conservation? */ + restore_cwnd:1, /* decided to revert cwnd to old value */ + round_start:1, /* start of packet-timed tx->ack round? */ + tso_segs_goal:7, /* segments we want in each skb we send */ + idle_restart:1, /* restarting after idle? */ + probe_rtt_round_done:1, /* a BBR_PROBE_RTT round at 4 pkts? */ + unused:5, + lt_is_sampling:1, /* taking long-term ("LT") samples now? */ + lt_rtt_cnt:7, /* round trips in long-term interval */ + lt_use_bw:1; /* use lt_bw as our bw estimate? */ + u32 lt_bw; /* LT est delivery rate in pkts/uS << 24 */ + u32 lt_last_delivered; /* LT intvl start: tp->delivered */ + u32 lt_last_stamp; /* LT intvl start: tp->delivered_mstamp */ + u32 lt_last_lost; /* LT intvl start: tp->lost */ + u32 pacing_gain:10, /* current gain for setting pacing rate */ + cwnd_gain:10, /* current gain for setting cwnd */ + full_bw_cnt:3, /* number of rounds without large bw gains */ + cycle_idx:3, /* current index in pacing_gain cycle array */ + unused_b:6; + u32 prior_cwnd; /* prior cwnd upon entering loss recovery */ + u32 full_bw; /* recent bw, to estimate if pipe is full */ +}; + +#define CYCLE_LEN 8 /* number of phases in a pacing gain cycle */ + +/* Window length of bw filter (in rounds): */ +static const int bbr_bw_rtts = CYCLE_LEN + 2; +/* Window length of min_rtt filter (in sec): */ +static const u32 bbr_min_rtt_win_sec = 10; +/* Minimum time (in ms) spent at bbr_cwnd_min_target in BBR_PROBE_RTT mode: */ +static const u32 bbr_probe_rtt_mode_ms = 200; +/* Skip TSO below the following bandwidth (bits/sec): */ +static const int bbr_min_tso_rate = 1200000; + +/* We use a high_gain value of 2/ln(2) because it's the smallest pacing gain + * that will allow a smoothly increasing pacing rate that will double each RTT + * and send the same number of packets per RTT that an un-paced, slow-starting + * Reno or CUBIC flow would: + */ +static const int bbr_high_gain = BBR_UNIT * 2885 / 1000 + 1; +/* The pacing gain of 1/high_gain in BBR_DRAIN is calculated to typically drain + * the queue created in BBR_STARTUP in a single round: + */ +static const int bbr_drain_gain = BBR_UNIT * 1000 / 2885; +/* The gain for deriving steady-state cwnd tolerates delayed/stretched ACKs: */ +static const int bbr_cwnd_gain = BBR_UNIT * 2; +/* The pacing_gain values for the PROBE_BW gain cycle, to discover/share bw: */ +static const int bbr_pacing_gain[] = { + BBR_UNIT * 5 / 4, /* probe for more available bw */ + BBR_UNIT * 3 / 4, /* drain queue and/or yield bw to other flows */ + BBR_UNIT, BBR_UNIT, BBR_UNIT, /* cruise at 1.0*bw to utilize pipe, */ + BBR_UNIT, BBR_UNIT, BBR_UNIT /* without creating excess queue... */ +}; +/* Randomize the starting gain cycling phase over N phases: */ +static const u32 bbr_cycle_rand = 7; + +/* Try to keep at least this many packets in flight, if things go smoothly. For + * smooth functioning, a sliding window protocol ACKing every other packet + * needs at least 4 packets in flight: + */ +static const u32 bbr_cwnd_min_target = 4; + +/* To estimate if BBR_STARTUP mode (i.e. high_gain) has filled pipe... */ +/* If bw has increased significantly (1.25x), there may be more bw available: */ +static const u32 bbr_full_bw_thresh = BBR_UNIT * 5 / 4; +/* But after 3 rounds w/o significant bw growth, estimate pipe is full: */ +static const u32 bbr_full_bw_cnt = 3; + +/* "long-term" ("LT") bandwidth estimator parameters... */ +/* The minimum number of rounds in an LT bw sampling interval: */ +static const u32 bbr_lt_intvl_min_rtts = 4; +/* If lost/delivered ratio > 20%, interval is "lossy" and we may be policed: */ +static const u32 bbr_lt_loss_thresh = 50; +/* If 2 intervals have a bw ratio <= 1/8, their bw is "consistent": */ +static const u32 bbr_lt_bw_ratio = BBR_UNIT / 8; +/* If 2 intervals have a bw diff <= 4 Kbit/sec their bw is "consistent": */ +static const u32 bbr_lt_bw_diff = 4000 / 8; +/* If we estimate we're policed, use lt_bw for this many round trips: */ +static const u32 bbr_lt_bw_max_rtts = 48; + +/* Do we estimate that STARTUP filled the pipe? */ +static bool bbr_full_bw_reached(const struct sock *sk) +{ + const struct bbr *bbr = inet_csk_ca(sk); + + return bbr->full_bw_cnt >= bbr_full_bw_cnt; +} + +/* Return the windowed max recent bandwidth sample, in pkts/uS << BW_SCALE. */ +static u32 bbr_max_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return minmax_get(&bbr->bw); +} + +/* Return the estimated bandwidth of the path, in pkts/uS << BW_SCALE. */ +static u32 bbr_bw(const struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->lt_use_bw ? bbr->lt_bw : bbr_max_bw(sk); +} + +/* Return rate in bytes per second, optionally with a gain. + * The order here is chosen carefully to avoid overflow of u64. This should + * work for input rates of up to 2.9Tbit/sec and gain of 2.89x. + */ +static u64 bbr_rate_bytes_per_sec(struct sock *sk, u64 rate, int gain) +{ + rate *= tcp_mss_to_mtu(sk, tcp_sk(sk)->mss_cache); + rate *= gain; + rate >>= BBR_SCALE; + rate *= USEC_PER_SEC; + return rate >> BW_SCALE; +} + +/* Pace using current bw estimate and a gain factor. In order to help drive the + * network toward lower queues while maintaining high utilization and low + * latency, the average pacing rate aims to be slightly (~1%) lower than the + * estimated bandwidth. This is an important aspect of the design. In this + * implementation this slightly lower pacing rate is achieved implicitly by not + * including link-layer headers in the packet size used for the pacing rate. + */ +static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u64 rate = bw; + + rate = bbr_rate_bytes_per_sec(sk, rate, gain); + rate = min_t(u64, rate, sk->sk_max_pacing_rate); + if (bbr->mode != BBR_STARTUP || rate > sk->sk_pacing_rate) + sk->sk_pacing_rate = rate; +} + +/* Return count of segments we want in the skbs we send, or 0 for default. */ +static u32 bbr_tso_segs_goal(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + return bbr->tso_segs_goal; +} + +static void bbr_set_tso_segs_goal(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 min_segs; + + min_segs = sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2; + bbr->tso_segs_goal = min(tcp_tso_autosize(sk, tp->mss_cache, min_segs), + 0x7FU); +} + +/* Save "last known good" cwnd so we can restore it after losses or PROBE_RTT */ +static void bbr_save_cwnd(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->prev_ca_state < TCP_CA_Recovery && bbr->mode != BBR_PROBE_RTT) + bbr->prior_cwnd = tp->snd_cwnd; /* this cwnd is good enough */ + else /* loss recovery or BBR_PROBE_RTT have temporarily cut cwnd */ + bbr->prior_cwnd = max(bbr->prior_cwnd, tp->snd_cwnd); +} + +static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + if (event == CA_EVENT_TX_START && tp->app_limited) { + bbr->idle_restart = 1; + /* Avoid pointless buffer overflows: pace at est. bw if we don't + * need more speed (we're restarting from idle and app-limited). + */ + if (bbr->mode == BBR_PROBE_BW) + bbr_set_pacing_rate(sk, bbr_bw(sk), BBR_UNIT); + } +} + +/* Find target cwnd. Right-size the cwnd based on min RTT and the + * estimated bottleneck bandwidth: + * + * cwnd = bw * min_rtt * gain = BDP * gain + * + * The key factor, gain, controls the amount of queue. While a small gain + * builds a smaller queue, it becomes more vulnerable to noise in RTT + * measurements (e.g., delayed ACKs or other ACK compression effects). This + * noise may cause BBR to under-estimate the rate. + * + * To achieve full performance in high-speed paths, we budget enough cwnd to + * fit full-sized skbs in-flight on both end hosts to fully utilize the path: + * - one skb in sending host Qdisc, + * - one skb in sending host TSO/GSO engine + * - one skb being received by receiver host LRO/GRO/delayed-ACK engine + * Don't worry, at low rates (bbr_min_tso_rate) this won't bloat cwnd because + * in such cases tso_segs_goal is 1. The minimum cwnd is 4 packets, + * which allows 2 outstanding 2-packet sequences, to try to keep pipe + * full even with ACK-every-other-packet delayed ACKs. + */ +static u32 bbr_target_cwnd(struct sock *sk, u32 bw, int gain) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd; + u64 w; + + /* If we've never had a valid RTT sample, cap cwnd at the initial + * default. This should only happen when the connection is not using TCP + * timestamps and has retransmitted all of the SYN/SYNACK/data packets + * ACKed so far. In this case, an RTO can cut cwnd to 1, in which + * case we need to slow-start up toward something safe: TCP_INIT_CWND. + */ + if (unlikely(bbr->min_rtt_us == ~0U)) /* no valid RTT samples yet? */ + return TCP_INIT_CWND; /* be safe: cap at default initial cwnd*/ + + w = (u64)bw * bbr->min_rtt_us; + + /* Apply a gain to the given value, then remove the BW_SCALE shift. */ + cwnd = (((w * gain) >> BBR_SCALE) + BW_UNIT - 1) / BW_UNIT; + + /* Allow enough full-sized skbs in flight to utilize end systems. */ + cwnd += 3 * bbr->tso_segs_goal; + + /* Reduce delayed ACKs by rounding up cwnd to the next even number. */ + cwnd = (cwnd + 1) & ~1U; + + return cwnd; +} + +/* An optimization in BBR to reduce losses: On the first round of recovery, we + * follow the packet conservation principle: send P packets per P packets acked. + * After that, we slow-start and send at most 2*P packets per P packets acked. + * After recovery finishes, or upon undo, we restore the cwnd we had when + * recovery started (capped by the target cwnd based on estimated BDP). + * + * TODO(ycheng/ncardwell): implement a rate-based approach. + */ +static bool bbr_set_cwnd_to_recover_or_restore( + struct sock *sk, const struct rate_sample *rs, u32 acked, u32 *new_cwnd) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u8 prev_state = bbr->prev_ca_state, state = inet_csk(sk)->icsk_ca_state; + u32 cwnd = tp->snd_cwnd; + + /* An ACK for P pkts should release at most 2*P packets. We do this + * in two steps. First, here we deduct the number of lost packets. + * Then, in bbr_set_cwnd() we slow start up toward the target cwnd. + */ + if (rs->losses > 0) + cwnd = max_t(s32, cwnd - rs->losses, 1); + + if (state == TCP_CA_Recovery && prev_state != TCP_CA_Recovery) { + /* Starting 1st round of Recovery, so do packet conservation. */ + bbr->packet_conservation = 1; + bbr->next_rtt_delivered = tp->delivered; /* start round now */ + /* Cut unused cwnd from app behavior, TSQ, or TSO deferral: */ + cwnd = tcp_packets_in_flight(tp) + acked; + } else if (prev_state >= TCP_CA_Recovery && state < TCP_CA_Recovery) { + /* Exiting loss recovery; restore cwnd saved before recovery. */ + bbr->restore_cwnd = 1; + bbr->packet_conservation = 0; + } + bbr->prev_ca_state = state; + + if (bbr->restore_cwnd) { + /* Restore cwnd after exiting loss recovery or PROBE_RTT. */ + cwnd = max(cwnd, bbr->prior_cwnd); + bbr->restore_cwnd = 0; + } + + if (bbr->packet_conservation) { + *new_cwnd = max(cwnd, tcp_packets_in_flight(tp) + acked); + return true; /* yes, using packet conservation */ + } + *new_cwnd = cwnd; + return false; +} + +/* Slow-start up toward target cwnd (if bw estimate is growing, or packet loss + * has drawn us down below target), or snap down to target if we're above it. + */ +static void bbr_set_cwnd(struct sock *sk, const struct rate_sample *rs, + u32 acked, u32 bw, int gain) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 cwnd = 0, target_cwnd = 0; + + if (!acked) + return; + + if (bbr_set_cwnd_to_recover_or_restore(sk, rs, acked, &cwnd)) + goto done; + + /* If we're below target cwnd, slow start cwnd toward target cwnd. */ + target_cwnd = bbr_target_cwnd(sk, bw, gain); + if (bbr_full_bw_reached(sk)) /* only cut cwnd if we filled the pipe */ + cwnd = min(cwnd + acked, target_cwnd); + else if (cwnd < target_cwnd || tp->delivered < TCP_INIT_CWND) + cwnd = cwnd + acked; + cwnd = max(cwnd, bbr_cwnd_min_target); + +done: + tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp); /* apply global cap */ + if (bbr->mode == BBR_PROBE_RTT) /* drain queue, refresh min_rtt */ + tp->snd_cwnd = min(tp->snd_cwnd, bbr_cwnd_min_target); +} + +/* End cycle phase if it's time and/or we hit the phase's in-flight target. */ +static bool bbr_is_next_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool is_full_length = + skb_mstamp_us_delta(&tp->delivered_mstamp, &bbr->cycle_mstamp) > + bbr->min_rtt_us; + u32 inflight, bw; + + /* The pacing_gain of 1.0 paces at the estimated bw to try to fully + * use the pipe without increasing the queue. + */ + if (bbr->pacing_gain == BBR_UNIT) + return is_full_length; /* just use wall clock time */ + + inflight = rs->prior_in_flight; /* what was in-flight before ACK? */ + bw = bbr_max_bw(sk); + + /* A pacing_gain > 1.0 probes for bw by trying to raise inflight to at + * least pacing_gain*BDP; this may take more than min_rtt if min_rtt is + * small (e.g. on a LAN). We do not persist if packets are lost, since + * a path with small buffers may not hold that much. + */ + if (bbr->pacing_gain > BBR_UNIT) + return is_full_length && + (rs->losses || /* perhaps pacing_gain*BDP won't fit */ + inflight >= bbr_target_cwnd(sk, bw, bbr->pacing_gain)); + + /* A pacing_gain < 1.0 tries to drain extra queue we added if bw + * probing didn't find more bw. If inflight falls to match BDP then we + * estimate queue is drained; persisting would underutilize the pipe. + */ + return is_full_length || + inflight <= bbr_target_cwnd(sk, bw, BBR_UNIT); +} + +static void bbr_advance_cycle_phase(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->cycle_idx = (bbr->cycle_idx + 1) & (CYCLE_LEN - 1); + bbr->cycle_mstamp = tp->delivered_mstamp; + bbr->pacing_gain = bbr_pacing_gain[bbr->cycle_idx]; +} + +/* Gain cycling: cycle pacing gain to converge to fair share of available bw. */ +static void bbr_update_cycle_phase(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if ((bbr->mode == BBR_PROBE_BW) && !bbr->lt_use_bw && + bbr_is_next_cycle_phase(sk, rs)) + bbr_advance_cycle_phase(sk); +} + +static void bbr_reset_startup_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_STARTUP; + bbr->pacing_gain = bbr_high_gain; + bbr->cwnd_gain = bbr_high_gain; +} + +static void bbr_reset_probe_bw_mode(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->mode = BBR_PROBE_BW; + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = bbr_cwnd_gain; + bbr->cycle_idx = CYCLE_LEN - 1 - prandom_u32_max(bbr_cycle_rand); + bbr_advance_cycle_phase(sk); /* flip to next phase of gain cycle */ +} + +static void bbr_reset_mode(struct sock *sk) +{ + if (!bbr_full_bw_reached(sk)) + bbr_reset_startup_mode(sk); + else + bbr_reset_probe_bw_mode(sk); +} + +/* Start a new long-term sampling interval. */ +static void bbr_reset_lt_bw_sampling_interval(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_last_stamp = tp->delivered_mstamp.stamp_jiffies; + bbr->lt_last_delivered = tp->delivered; + bbr->lt_last_lost = tp->lost; + bbr->lt_rtt_cnt = 0; +} + +/* Completely reset long-term bandwidth sampling. */ +static void bbr_reset_lt_bw_sampling(struct sock *sk) +{ + struct bbr *bbr = inet_csk_ca(sk); + + bbr->lt_bw = 0; + bbr->lt_use_bw = 0; + bbr->lt_is_sampling = false; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Long-term bw sampling interval is done. Estimate whether we're policed. */ +static void bbr_lt_bw_interval_done(struct sock *sk, u32 bw) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 diff; + + if (bbr->lt_bw) { /* do we have bw from a previous interval? */ + /* Is new bw close to the lt_bw from the previous interval? */ + diff = abs(bw - bbr->lt_bw); + if ((diff * BBR_UNIT <= bbr_lt_bw_ratio * bbr->lt_bw) || + (bbr_rate_bytes_per_sec(sk, diff, BBR_UNIT) <= + bbr_lt_bw_diff)) { + /* All criteria are met; estimate we're policed. */ + bbr->lt_bw = (bw + bbr->lt_bw) >> 1; /* avg 2 intvls */ + bbr->lt_use_bw = 1; + bbr->pacing_gain = BBR_UNIT; /* try to avoid drops */ + bbr->lt_rtt_cnt = 0; + return; + } + } + bbr->lt_bw = bw; + bbr_reset_lt_bw_sampling_interval(sk); +} + +/* Token-bucket traffic policers are common (see "An Internet-Wide Analysis of + * Traffic Policing", SIGCOMM 2016). BBR detects token-bucket policers and + * explicitly models their policed rate, to reduce unnecessary losses. We + * estimate that we're policed if we see 2 consecutive sampling intervals with + * consistent throughput and high packet loss. If we think we're being policed, + * set lt_bw to the "long-term" average delivery rate from those 2 intervals. + */ +static void bbr_lt_bw_sampling(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u32 lost, delivered; + u64 bw; + s32 t; + + if (bbr->lt_use_bw) { /* already using long-term rate, lt_bw? */ + if (bbr->mode == BBR_PROBE_BW && bbr->round_start && + ++bbr->lt_rtt_cnt >= bbr_lt_bw_max_rtts) { + bbr_reset_lt_bw_sampling(sk); /* stop using lt_bw */ + bbr_reset_probe_bw_mode(sk); /* restart gain cycling */ + } + return; + } + + /* Wait for the first loss before sampling, to let the policer exhaust + * its tokens and estimate the steady-state rate allowed by the policer. + * Starting samples earlier includes bursts that over-estimate the bw. + */ + if (!bbr->lt_is_sampling) { + if (!rs->losses) + return; + bbr_reset_lt_bw_sampling_interval(sk); + bbr->lt_is_sampling = true; + } + + /* To avoid underestimates, reset sampling if we run out of data. */ + if (rs->is_app_limited) { + bbr_reset_lt_bw_sampling(sk); + return; + } + + if (bbr->round_start) + bbr->lt_rtt_cnt++; /* count round trips in this interval */ + if (bbr->lt_rtt_cnt < bbr_lt_intvl_min_rtts) + return; /* sampling interval needs to be longer */ + if (bbr->lt_rtt_cnt > 4 * bbr_lt_intvl_min_rtts) { + bbr_reset_lt_bw_sampling(sk); /* interval is too long */ + return; + } + + /* End sampling interval when a packet is lost, so we estimate the + * policer tokens were exhausted. Stopping the sampling before the + * tokens are exhausted under-estimates the policed rate. + */ + if (!rs->losses) + return; + + /* Calculate packets lost and delivered in sampling interval. */ + lost = tp->lost - bbr->lt_last_lost; + delivered = tp->delivered - bbr->lt_last_delivered; + /* Is loss rate (lost/delivered) >= lt_loss_thresh? If not, wait. */ + if (!delivered || (lost << BBR_SCALE) < bbr_lt_loss_thresh * delivered) + return; + + /* Find average delivery rate in this sampling interval. */ + t = (s32)(tp->delivered_mstamp.stamp_jiffies - bbr->lt_last_stamp); + if (t < 1) + return; /* interval is less than one jiffy, so wait */ + t = jiffies_to_usecs(t); + /* Interval long enough for jiffies_to_usecs() to return a bogus 0? */ + if (t < 1) { + bbr_reset_lt_bw_sampling(sk); /* interval too long; reset */ + return; + } + bw = (u64)delivered * BW_UNIT; + do_div(bw, t); + bbr_lt_bw_interval_done(sk, bw); +} + +/* Estimate the bandwidth based on how fast packets are delivered */ +static void bbr_update_bw(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->round_start = 0; + if (rs->delivered < 0 || rs->interval_us <= 0) + return; /* Not a valid observation */ + + /* See if we've reached the next RTT */ + if (!before(rs->prior_delivered, bbr->next_rtt_delivered)) { + bbr->next_rtt_delivered = tp->delivered; + bbr->rtt_cnt++; + bbr->round_start = 1; + bbr->packet_conservation = 0; + } + + bbr_lt_bw_sampling(sk, rs); + + /* Divide delivered by the interval to find a (lower bound) bottleneck + * bandwidth sample. Delivered is in packets and interval_us in uS and + * ratio will be <<1 for most connections. So delivered is first scaled. + */ + bw = (u64)rs->delivered * BW_UNIT; + do_div(bw, rs->interval_us); + + /* If this sample is application-limited, it is likely to have a very + * low delivered count that represents application behavior rather than + * the available network rate. Such a sample could drag down estimated + * bw, causing needless slow-down. Thus, to continue to send at the + * last measured network rate, we filter out app-limited samples unless + * they describe the path bw at least as well as our bw model. + * + * So the goal during app-limited phase is to proceed with the best + * network rate no matter how long. We automatically leave this + * phase when app writes faster than the network can deliver :) + */ + if (!rs->is_app_limited || bw >= bbr_max_bw(sk)) { + /* Incorporate new sample into our max bw filter. */ + minmax_running_max(&bbr->bw, bbr_bw_rtts, bbr->rtt_cnt, bw); + } +} + +/* Estimate when the pipe is full, using the change in delivery rate: BBR + * estimates that STARTUP filled the pipe if the estimated bw hasn't changed by + * at least bbr_full_bw_thresh (25%) after bbr_full_bw_cnt (3) non-app-limited + * rounds. Why 3 rounds: 1: rwin autotuning grows the rwin, 2: we fill the + * higher rwin, 3: we get higher delivery rate samples. Or transient + * cross-traffic or radio noise can go away. CUBIC Hystart shares a similar + * design goal, but uses delay and inter-ACK spacing instead of bandwidth. + */ +static void bbr_check_full_bw_reached(struct sock *sk, + const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw_thresh; + + if (bbr_full_bw_reached(sk) || !bbr->round_start || rs->is_app_limited) + return; + + bw_thresh = (u64)bbr->full_bw * bbr_full_bw_thresh >> BBR_SCALE; + if (bbr_max_bw(sk) >= bw_thresh) { + bbr->full_bw = bbr_max_bw(sk); + bbr->full_bw_cnt = 0; + return; + } + ++bbr->full_bw_cnt; +} + +/* If pipe is probably full, drain the queue and then enter steady-state. */ +static void bbr_check_drain(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (bbr->mode == BBR_STARTUP && bbr_full_bw_reached(sk)) { + bbr->mode = BBR_DRAIN; /* drain queue we created */ + bbr->pacing_gain = bbr_drain_gain; /* pace slow to drain */ + bbr->cwnd_gain = bbr_high_gain; /* maintain cwnd */ + } /* fall through to check if in-flight is already small: */ + if (bbr->mode == BBR_DRAIN && + tcp_packets_in_flight(tcp_sk(sk)) <= + bbr_target_cwnd(sk, bbr_max_bw(sk), BBR_UNIT)) + bbr_reset_probe_bw_mode(sk); /* we estimate queue is drained */ +} + +/* The goal of PROBE_RTT mode is to have BBR flows cooperatively and + * periodically drain the bottleneck queue, to converge to measure the true + * min_rtt (unloaded propagation delay). This allows the flows to keep queues + * small (reducing queuing delay and packet loss) and achieve fairness among + * BBR flows. + * + * The min_rtt filter window is 10 seconds. When the min_rtt estimate expires, + * we enter PROBE_RTT mode and cap the cwnd at bbr_cwnd_min_target=4 packets. + * After at least bbr_probe_rtt_mode_ms=200ms and at least one packet-timed + * round trip elapsed with that flight size <= 4, we leave PROBE_RTT mode and + * re-enter the previous mode. BBR uses 200ms to approximately bound the + * performance penalty of PROBE_RTT's cwnd capping to roughly 2% (200ms/10s). + * + * Note that flows need only pay 2% if they are busy sending over the last 10 + * seconds. Interactive applications (e.g., Web, RPCs, video chunks) often have + * natural silences or low-rate periods within 10 seconds where the rate is low + * enough for long enough to drain its queue in the bottleneck. We pick up + * these min RTT measurements opportunistically with our min_rtt filter. :-) + */ +static void bbr_update_min_rtt(struct sock *sk, const struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + bool filter_expired; + + /* Track min RTT seen in the min_rtt_win_sec filter window: */ + filter_expired = after(tcp_time_stamp, + bbr->min_rtt_stamp + bbr_min_rtt_win_sec * HZ); + if (rs->rtt_us >= 0 && + (rs->rtt_us <= bbr->min_rtt_us || filter_expired)) { + bbr->min_rtt_us = rs->rtt_us; + bbr->min_rtt_stamp = tcp_time_stamp; + } + + if (bbr_probe_rtt_mode_ms > 0 && filter_expired && + !bbr->idle_restart && bbr->mode != BBR_PROBE_RTT) { + bbr->mode = BBR_PROBE_RTT; /* dip, drain queue */ + bbr->pacing_gain = BBR_UNIT; + bbr->cwnd_gain = BBR_UNIT; + bbr_save_cwnd(sk); /* note cwnd so we can restore it */ + bbr->probe_rtt_done_stamp = 0; + } + + if (bbr->mode == BBR_PROBE_RTT) { + /* Ignore low rate samples during this mode. */ + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; + /* Maintain min packets in flight for max(200 ms, 1 round). */ + if (!bbr->probe_rtt_done_stamp && + tcp_packets_in_flight(tp) <= bbr_cwnd_min_target) { + bbr->probe_rtt_done_stamp = tcp_time_stamp + + msecs_to_jiffies(bbr_probe_rtt_mode_ms); + bbr->probe_rtt_round_done = 0; + bbr->next_rtt_delivered = tp->delivered; + } else if (bbr->probe_rtt_done_stamp) { + if (bbr->round_start) + bbr->probe_rtt_round_done = 1; + if (bbr->probe_rtt_round_done && + after(tcp_time_stamp, bbr->probe_rtt_done_stamp)) { + bbr->min_rtt_stamp = tcp_time_stamp; + bbr->restore_cwnd = 1; /* snap to prior_cwnd */ + bbr_reset_mode(sk); + } + } + } + bbr->idle_restart = 0; +} + +static void bbr_update_model(struct sock *sk, const struct rate_sample *rs) +{ + bbr_update_bw(sk, rs); + bbr_update_cycle_phase(sk, rs); + bbr_check_full_bw_reached(sk, rs); + bbr_check_drain(sk, rs); + bbr_update_min_rtt(sk, rs); +} + +static void bbr_main(struct sock *sk, const struct rate_sample *rs) +{ + struct bbr *bbr = inet_csk_ca(sk); + u32 bw; + + bbr_update_model(sk, rs); + + bw = bbr_bw(sk); + bbr_set_pacing_rate(sk, bw, bbr->pacing_gain); + bbr_set_tso_segs_goal(sk); + bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain); +} + +static void bbr_init(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw; + + bbr->prior_cwnd = 0; + bbr->tso_segs_goal = 0; /* default segs per skb until first ACK */ + bbr->rtt_cnt = 0; + bbr->next_rtt_delivered = 0; + bbr->prev_ca_state = TCP_CA_Open; + bbr->packet_conservation = 0; + + bbr->probe_rtt_done_stamp = 0; + bbr->probe_rtt_round_done = 0; + bbr->min_rtt_us = tcp_min_rtt(tp); + bbr->min_rtt_stamp = tcp_time_stamp; + + minmax_reset(&bbr->bw, bbr->rtt_cnt, 0); /* init max bw to 0 */ + + /* Initialize pacing rate to: high_gain * init_cwnd / RTT. */ + bw = (u64)tp->snd_cwnd * BW_UNIT; + do_div(bw, (tp->srtt_us >> 3) ? : USEC_PER_MSEC); + sk->sk_pacing_rate = 0; /* force an update of sk_pacing_rate */ + bbr_set_pacing_rate(sk, bw, bbr_high_gain); + + bbr->restore_cwnd = 0; + bbr->round_start = 0; + bbr->idle_restart = 0; + bbr->full_bw = 0; + bbr->full_bw_cnt = 0; + bbr->cycle_mstamp.v64 = 0; + bbr->cycle_idx = 0; + bbr_reset_lt_bw_sampling(sk); + bbr_reset_startup_mode(sk); +} + +static u32 bbr_sndbuf_expand(struct sock *sk) +{ + /* Provision 3 * cwnd since BBR may slow-start even during recovery. */ + return 3; +} + +/* In theory BBR does not need to undo the cwnd since it does not + * always reduce cwnd on losses (see bbr_main()). Keep it for now. + */ +static u32 bbr_undo_cwnd(struct sock *sk) +{ + return tcp_sk(sk)->snd_cwnd; +} + +/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */ +static u32 bbr_ssthresh(struct sock *sk) +{ + bbr_save_cwnd(sk); + return TCP_INFINITE_SSTHRESH; /* BBR does not use ssthresh */ +} + +static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr, + union tcp_cc_info *info) +{ + if (ext & (1 << (INET_DIAG_BBRINFO - 1)) || + ext & (1 << (INET_DIAG_VEGASINFO - 1))) { + struct tcp_sock *tp = tcp_sk(sk); + struct bbr *bbr = inet_csk_ca(sk); + u64 bw = bbr_bw(sk); + + bw = bw * tp->mss_cache * USEC_PER_SEC >> BW_SCALE; + memset(&info->bbr, 0, sizeof(info->bbr)); + info->bbr.bbr_bw_lo = (u32)bw; + info->bbr.bbr_bw_hi = (u32)(bw >> 32); + info->bbr.bbr_min_rtt = bbr->min_rtt_us; + info->bbr.bbr_pacing_gain = bbr->pacing_gain; + info->bbr.bbr_cwnd_gain = bbr->cwnd_gain; + *attr = INET_DIAG_BBRINFO; + return sizeof(info->bbr); + } + return 0; +} + +static void bbr_set_state(struct sock *sk, u8 new_state) +{ + struct bbr *bbr = inet_csk_ca(sk); + + if (new_state == TCP_CA_Loss) { + struct rate_sample rs = { .losses = 1 }; + + bbr->prev_ca_state = TCP_CA_Loss; + bbr->full_bw = 0; + bbr->round_start = 1; /* treat RTO like end of a round */ + bbr_lt_bw_sampling(sk, &rs); + } +} + +static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { + .flags = TCP_CONG_NON_RESTRICTED, + .name = "bbr", + .owner = THIS_MODULE, + .init = bbr_init, + .cong_control = bbr_main, + .sndbuf_expand = bbr_sndbuf_expand, + .undo_cwnd = bbr_undo_cwnd, + .cwnd_event = bbr_cwnd_event, + .ssthresh = bbr_ssthresh, + .tso_segs_goal = bbr_tso_segs_goal, + .get_info = bbr_get_info, + .set_state = bbr_set_state, +}; + +static int __init bbr_register(void) +{ + BUILD_BUG_ON(sizeof(struct bbr) > ICSK_CA_PRIV_SIZE); + return tcp_register_congestion_control(&tcp_bbr_cong_ops); +} + +static void __exit bbr_unregister(void) +{ + tcp_unregister_congestion_control(&tcp_bbr_cong_ops); +} + +module_init(bbr_register); +module_exit(bbr_unregister); + +MODULE_AUTHOR("Van Jacobson "); +MODULE_AUTHOR("Neal Cardwell "); +MODULE_AUTHOR("Yuchung Cheng "); +MODULE_AUTHOR("Soheil Hassas Yeganeh "); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("TCP BBR (Bottleneck Bandwidth and RTT)"); diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 03725b294286..35b280361cb2 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -56,7 +56,7 @@ MODULE_PARM_DESC(use_shadow, "use shadow window heuristic"); module_param(use_tolerance, bool, 0644); MODULE_PARM_DESC(use_tolerance, "use loss tolerance heuristic"); -struct minmax { +struct cdg_minmax { union { struct { s32 min; @@ -74,10 +74,10 @@ enum cdg_state { }; struct cdg { - struct minmax rtt; - struct minmax rtt_prev; - struct minmax *gradients; - struct minmax gsum; + struct cdg_minmax rtt; + struct cdg_minmax rtt_prev; + struct cdg_minmax *gradients; + struct cdg_minmax gsum; bool gfilled; u8 tail; u8 state; @@ -353,7 +353,7 @@ static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - struct minmax *gradients; + struct cdg_minmax *gradients; switch (ev) { case CA_EVENT_CWND_RESTART: diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index 882caa4e72bc..1294af4e0127 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -69,7 +69,7 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca) int ret = 0; /* all algorithms must implement ssthresh and cong_avoid ops */ - if (!ca->ssthresh || !ca->cong_avoid) { + if (!ca->ssthresh || !(ca->cong_avoid || ca->cong_control)) { pr_err("%s does not implement required ops\n", ca->name); return -EINVAL; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index a756b8749a26..a27b9c0e27c0 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -289,6 +289,7 @@ static bool tcp_ecn_rcv_ecn_echo(const struct tcp_sock *tp, const struct tcphdr static void tcp_sndbuf_expand(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; int sndmem, per_mss; u32 nr_segs; @@ -309,7 +310,8 @@ static void tcp_sndbuf_expand(struct sock *sk) * Cubic needs 1.7 factor, rounded to 2 to include * extra cushion (application might react slowly to POLLOUT) */ - sndmem = 2 * nr_segs * per_mss; + sndmem = ca_ops->sndbuf_expand ? ca_ops->sndbuf_expand(sk) : 2; + sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); @@ -899,12 +901,29 @@ static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb) tp->retransmit_high = TCP_SKB_CB(skb)->end_seq; } +/* Sum the number of packets on the wire we have marked as lost. + * There are two cases we care about here: + * a) Packet hasn't been marked lost (nor retransmitted), + * and this is the first loss. + * b) Packet has been marked both lost and retransmitted, + * and this means we think it was lost again. + */ +static void tcp_sum_lost(struct tcp_sock *tp, struct sk_buff *skb) +{ + __u8 sacked = TCP_SKB_CB(skb)->sacked; + + if (!(sacked & TCPCB_LOST) || + ((sacked & TCPCB_LOST) && (sacked & TCPCB_SACKED_RETRANS))) + tp->lost += tcp_skb_pcount(skb); +} + static void tcp_skb_mark_lost(struct tcp_sock *tp, struct sk_buff *skb) { if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tcp_verify_retransmit_hint(tp, skb); tp->lost_out += tcp_skb_pcount(skb); + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; } } @@ -913,6 +932,7 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) { tcp_verify_retransmit_hint(tp, skb); + tcp_sum_lost(tp, skb); if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) { tp->lost_out += tcp_skb_pcount(skb); TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; @@ -1094,6 +1114,7 @@ struct tcp_sacktag_state { */ struct skb_mstamp first_sackt; struct skb_mstamp last_sackt; + struct rate_sample *rate; int flag; }; @@ -1261,6 +1282,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_sacktag_one(sk, state, TCP_SKB_CB(skb)->sacked, start_seq, end_seq, dup_sack, pcount, &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (skb == tp->lost_skb_hint) tp->lost_cnt_hint += pcount; @@ -1311,6 +1333,9 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, tcp_advance_highest_sack(sk, skb); tcp_skb_collapse_tstamp(prev, skb); + if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp.v64)) + TCP_SKB_CB(prev)->tx.delivered_mstamp.v64 = 0; + tcp_unlink_write_queue(skb, sk); sk_wmem_free_skb(sk, skb); @@ -1540,6 +1565,7 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, dup_sack, tcp_skb_pcount(skb), &skb->skb_mstamp); + tcp_rate_skb_delivered(sk, skb, state->rate); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) @@ -1622,8 +1648,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); - if (found_dup_sack) + if (found_dup_sack) { state->flag |= FLAG_DSACKING_ACK; + tp->delivered++; /* A spurious retransmission is delivered */ + } /* Eliminate too old ACKs, but take into * account more or less fresh ones, they can @@ -1890,6 +1918,7 @@ void tcp_enter_loss(struct sock *sk) struct sk_buff *skb; bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery; bool is_reneg; /* is receiver reneging on SACKs? */ + bool mark_lost; /* Reduce ssthresh if it has not yet been made inside this window. */ if (icsk->icsk_ca_state <= TCP_CA_Disorder || @@ -1923,8 +1952,12 @@ void tcp_enter_loss(struct sock *sk) if (skb == tcp_send_head(sk)) break; + mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || + is_reneg); + if (mark_lost) + tcp_sum_lost(tp, skb); TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED; - if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) { + if (mark_lost) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED; TCP_SKB_CB(skb)->sacked |= TCPCB_LOST; tp->lost_out += tcp_skb_pcount(skb); @@ -2502,6 +2535,9 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + if (inet_csk(sk)->icsk_ca_ops->cong_control) + return; + /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { @@ -2878,67 +2914,13 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, *rexmit = REXMIT_LOST; } -/* Kathleen Nichols' algorithm for tracking the minimum value of - * a data stream over some fixed time interval. (E.g., the minimum - * RTT over the past five minutes.) It uses constant space and constant - * time per update yet almost always delivers the same minimum as an - * implementation that has to keep all the data in the window. - * - * The algorithm keeps track of the best, 2nd best & 3rd best min - * values, maintaining an invariant that the measurement time of the - * n'th best >= n-1'th best. It also makes sure that the three values - * are widely separated in the time window since that bounds the worse - * case error when that data is monotonically increasing over the window. - * - * Upon getting a new min, we can forget everything earlier because it - * has no value - the new min is <= everything else in the window by - * definition and it's the most recent. So we restart fresh on every new min - * and overwrites 2nd & 3rd choices. The same property holds for 2nd & 3rd - * best. - */ static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { - const u32 now = tcp_time_stamp, wlen = sysctl_tcp_min_rtt_wlen * HZ; - struct rtt_meas *m = tcp_sk(sk)->rtt_min; - struct rtt_meas rttm = { - .rtt = likely(rtt_us) ? rtt_us : jiffies_to_usecs(1), - .ts = now, - }; - u32 elapsed; + struct tcp_sock *tp = tcp_sk(sk); + u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; - /* Check if the new measurement updates the 1st, 2nd, or 3rd choices */ - if (unlikely(rttm.rtt <= m[0].rtt)) - m[0] = m[1] = m[2] = rttm; - else if (rttm.rtt <= m[1].rtt) - m[1] = m[2] = rttm; - else if (rttm.rtt <= m[2].rtt) - m[2] = rttm; - - elapsed = now - m[0].ts; - if (unlikely(elapsed > wlen)) { - /* Passed entire window without a new min so make 2nd choice - * the new min & 3rd choice the new 2nd. So forth and so on. - */ - m[0] = m[1]; - m[1] = m[2]; - m[2] = rttm; - if (now - m[0].ts > wlen) { - m[0] = m[1]; - m[1] = rttm; - if (now - m[0].ts > wlen) - m[0] = rttm; - } - } else if (m[1].ts == m[0].ts && elapsed > wlen / 4) { - /* Passed a quarter of the window without a new min so - * take 2nd choice from the 2nd quarter of the window. - */ - m[2] = m[1] = rttm; - } else if (m[2].ts == m[1].ts && elapsed > wlen / 2) { - /* Passed half the window without a new min so take the 3rd - * choice from the last half of the window. - */ - m[2] = rttm; - } + minmax_running_min(&tp->rtt_min, wlen, tcp_time_stamp, + rtt_us ? : jiffies_to_usecs(1)); } static inline bool tcp_ack_update_rtt(struct sock *sk, const int flag, @@ -3101,10 +3083,11 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, */ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, u32 prior_snd_una, int *acked, - struct tcp_sacktag_state *sack) + struct tcp_sacktag_state *sack, + struct skb_mstamp *now) { const struct inet_connection_sock *icsk = inet_csk(sk); - struct skb_mstamp first_ackt, last_ackt, now; + struct skb_mstamp first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; u32 reord = tp->packets_out; @@ -3136,7 +3119,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, acked_pcount = tcp_tso_acked(sk, skb); if (!acked_pcount) break; - fully_acked = false; } else { /* Speedup tcp_unlink_write_queue() and next loop */ @@ -3172,6 +3154,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->packets_out -= acked_pcount; pkts_acked += acked_pcount; + tcp_rate_skb_delivered(sk, skb, sack->rate); /* Initial outgoing SYN's get put onto the write_queue * just like anything else we transmit. It is not @@ -3204,16 +3187,15 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) flag |= FLAG_SACK_RENEGING; - skb_mstamp_get(&now); if (likely(first_ackt.v64) && !(flag & FLAG_RETRANS_DATA_ACKED)) { - seq_rtt_us = skb_mstamp_us_delta(&now, &first_ackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &last_ackt); + seq_rtt_us = skb_mstamp_us_delta(now, &first_ackt); + ca_rtt_us = skb_mstamp_us_delta(now, &last_ackt); } if (sack->first_sackt.v64) { - sack_rtt_us = skb_mstamp_us_delta(&now, &sack->first_sackt); - ca_rtt_us = skb_mstamp_us_delta(&now, &sack->last_sackt); + sack_rtt_us = skb_mstamp_us_delta(now, &sack->first_sackt); + ca_rtt_us = skb_mstamp_us_delta(now, &sack->last_sackt); } - + sack->rate->rtt_us = ca_rtt_us; /* RTT of last (S)ACKed packet, or -1 */ rtt_update = tcp_ack_update_rtt(sk, flag, seq_rtt_us, sack_rtt_us, ca_rtt_us); @@ -3241,7 +3223,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, tp->fackets_out -= min(pkts_acked, tp->fackets_out); } else if (skb && rtt_update && sack_rtt_us >= 0 && - sack_rtt_us > skb_mstamp_us_delta(&now, &skb->skb_mstamp)) { + sack_rtt_us > skb_mstamp_us_delta(now, &skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. @@ -3332,8 +3314,15 @@ static inline bool tcp_may_raise_cwnd(const struct sock *sk, const int flag) * information. All transmission or retransmission are delayed afterwards. */ static void tcp_cong_control(struct sock *sk, u32 ack, u32 acked_sacked, - int flag) + int flag, const struct rate_sample *rs) { + const struct inet_connection_sock *icsk = inet_csk(sk); + + if (icsk->icsk_ca_ops->cong_control) { + icsk->icsk_ca_ops->cong_control(sk, rs); + return; + } + if (tcp_in_cwnd_reduction(sk)) { /* Reduce cwnd if state mandates */ tcp_cwnd_reduction(sk, acked_sacked, flag); @@ -3578,17 +3567,21 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); struct tcp_sacktag_state sack_state; + struct rate_sample rs = { .prior_delivered = 0 }; u32 prior_snd_una = tp->snd_una; u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; u32 prior_fackets; int prior_packets = tp->packets_out; - u32 prior_delivered = tp->delivered; + u32 delivered = tp->delivered; + u32 lost = tp->lost; int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + struct skb_mstamp now; sack_state.first_sackt.v64 = 0; + sack_state.rate = &rs; /* We very likely will need to access write queue head. */ prefetchw(sk->sk_write_queue.next); @@ -3611,6 +3604,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; + skb_mstamp_get(&now); + if (icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -3621,6 +3616,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) } prior_fackets = tp->fackets_out; + rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet * is in window. @@ -3676,7 +3672,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) /* See if we can take anything off of the retransmit queue. */ flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + &sack_state, &now); if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); @@ -3693,7 +3689,10 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (icsk->icsk_pending == ICSK_TIME_RETRANS) tcp_schedule_loss_probe(sk); - tcp_cong_control(sk, ack, tp->delivered - prior_delivered, flag); + delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ + lost = tp->lost - lost; /* freshly marked lost */ + tcp_rate_gen(sk, delivered, lost, &now, &rs); + tcp_cong_control(sk, ack, delivered, flag, &rs); tcp_xmit_recovery(sk, rexmit); return 1; @@ -4107,7 +4106,7 @@ void tcp_fin(struct sock *sk) /* It _is_ possible, that we have something out-of-order _after_ FIN. * Probably, we should reset in this case. For now drop them. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); if (tcp_is_sack(tp)) tcp_sack_reset(&tp->rx_opt); sk_mem_reclaim(sk); @@ -4267,7 +4266,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) int this_sack; /* Empty ofo queue, hence, all the SACKs are eaten. Clear. */ - if (skb_queue_empty(&tp->out_of_order_queue)) { + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tp->rx_opt.num_sacks = 0; return; } @@ -4343,10 +4342,13 @@ static void tcp_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); __u32 dsack_high = tp->rcv_nxt; + bool fin, fragstolen, eaten; struct sk_buff *skb, *tail; - bool fragstolen, eaten; + struct rb_node *p; - while ((skb = skb_peek(&tp->out_of_order_queue)) != NULL) { + p = rb_first(&tp->out_of_order_queue); + while (p) { + skb = rb_entry(p, struct sk_buff, rbnode); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4356,9 +4358,10 @@ static void tcp_ofo_queue(struct sock *sk) dsack_high = TCP_SKB_CB(skb)->end_seq; tcp_dsack_extend(sk, TCP_SKB_CB(skb)->seq, dsack); } + p = rb_next(p); + rb_erase(&skb->rbnode, &tp->out_of_order_queue); - __skb_unlink(skb, &tp->out_of_order_queue); - if (!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) { + if (unlikely(!after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt))) { SOCK_DEBUG(sk, "ofo packet was already received\n"); tcp_drop(sk, skb); continue; @@ -4370,12 +4373,19 @@ static void tcp_ofo_queue(struct sock *sk) tail = skb_peek_tail(&sk->sk_receive_queue); eaten = tail && tcp_try_coalesce(sk, tail, skb, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); + fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN; if (!eaten) __skb_queue_tail(&sk->sk_receive_queue, skb); - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) - tcp_fin(sk); - if (eaten) + else kfree_skb_partial(skb, fragstolen); + + if (unlikely(fin)) { + tcp_fin(sk); + /* tcp_fin() purges tp->out_of_order_queue, + * so we must end this loop right now. + */ + break; + } } } @@ -4391,12 +4401,9 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, if (tcp_prune_queue(sk) < 0) return -1; - if (!sk_rmem_schedule(sk, skb, size)) { + while (!sk_rmem_schedule(sk, skb, size)) { if (!tcp_prune_ofo_queue(sk)) return -1; - - if (!sk_rmem_schedule(sk, skb, size)) - return -1; } } return 0; @@ -4405,8 +4412,10 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); + struct rb_node **p, *q, *parent; struct sk_buff *skb1; u32 seq, end_seq; + bool fragstolen; tcp_ecn_check_ce(tp, skb); @@ -4421,88 +4430,92 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) inet_csk_schedule_ack(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOQUEUE); + seq = TCP_SKB_CB(skb)->seq; + end_seq = TCP_SKB_CB(skb)->end_seq; SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n", - tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + tp->rcv_nxt, seq, end_seq); - skb1 = skb_peek_tail(&tp->out_of_order_queue); - if (!skb1) { + p = &tp->out_of_order_queue.rb_node; + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) { /* Initial out of order segment, build 1 SACK. */ if (tcp_is_sack(tp)) { tp->rx_opt.num_sacks = 1; - tp->selective_acks[0].start_seq = TCP_SKB_CB(skb)->seq; - tp->selective_acks[0].end_seq = - TCP_SKB_CB(skb)->end_seq; + tp->selective_acks[0].start_seq = seq; + tp->selective_acks[0].end_seq = end_seq; } - __skb_queue_head(&tp->out_of_order_queue, skb); + rb_link_node(&skb->rbnode, NULL, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); + tp->ooo_last_skb = skb; goto end; } - seq = TCP_SKB_CB(skb)->seq; - end_seq = TCP_SKB_CB(skb)->end_seq; - - if (seq == TCP_SKB_CB(skb1)->end_seq) { - bool fragstolen; - - if (!tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - } else { - tcp_grow_window(sk, skb); - kfree_skb_partial(skb, fragstolen); - skb = NULL; - } - - if (!tp->rx_opt.num_sacks || - tp->selective_acks[0].end_seq != seq) - goto add_sack; - - /* Common case: data arrive in order after hole. */ - tp->selective_acks[0].end_seq = end_seq; - goto end; + /* In the typical case, we are adding an skb to the end of the list. + * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. + */ + if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { +coalesce_done: + tcp_grow_window(sk, skb); + kfree_skb_partial(skb, fragstolen); + skb = NULL; + goto add_sack; + } + /* Can avoid an rbtree lookup if we are adding skb after ooo_last_skb */ + if (!before(seq, TCP_SKB_CB(tp->ooo_last_skb)->end_seq)) { + parent = &tp->ooo_last_skb->rbnode; + p = &parent->rb_right; + goto insert; } - /* Find place to insert this segment. */ - while (1) { - if (!after(TCP_SKB_CB(skb1)->seq, seq)) - break; - if (skb_queue_is_first(&tp->out_of_order_queue, skb1)) { - skb1 = NULL; - break; + /* Find place to insert this segment. Handle overlaps on the way. */ + parent = NULL; + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(seq, TCP_SKB_CB(skb1)->seq)) { + p = &parent->rb_left; + continue; } - skb1 = skb_queue_prev(&tp->out_of_order_queue, skb1); + if (before(seq, TCP_SKB_CB(skb1)->end_seq)) { + if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { + /* All the bits are present. Drop. */ + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb); + skb = NULL; + tcp_dsack_set(sk, seq, end_seq); + goto add_sack; + } + if (after(seq, TCP_SKB_CB(skb1)->seq)) { + /* Partial overlap. */ + tcp_dsack_set(sk, seq, TCP_SKB_CB(skb1)->end_seq); + } else { + /* skb's seq == skb1's seq and skb covers skb1. + * Replace skb1 with skb. + */ + rb_replace_node(&skb1->rbnode, &skb->rbnode, + &tp->out_of_order_queue); + tcp_dsack_extend(sk, + TCP_SKB_CB(skb1)->seq, + TCP_SKB_CB(skb1)->end_seq); + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPOFOMERGE); + __kfree_skb(skb1); + goto merge_right; + } + } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + goto coalesce_done; + } + p = &parent->rb_right; } +insert: + /* Insert segment into RB tree. */ + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, &tp->out_of_order_queue); - /* Do skb overlap to previous one? */ - if (skb1 && before(seq, TCP_SKB_CB(skb1)->end_seq)) { - if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) { - /* All the bits are present. Drop. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); - tcp_drop(sk, skb); - skb = NULL; - tcp_dsack_set(sk, seq, end_seq); - goto add_sack; - } - if (after(seq, TCP_SKB_CB(skb1)->seq)) { - /* Partial overlap. */ - tcp_dsack_set(sk, seq, - TCP_SKB_CB(skb1)->end_seq); - } else { - if (skb_queue_is_first(&tp->out_of_order_queue, - skb1)) - skb1 = NULL; - else - skb1 = skb_queue_prev( - &tp->out_of_order_queue, - skb1); - } - } - if (!skb1) - __skb_queue_head(&tp->out_of_order_queue, skb); - else - __skb_queue_after(&tp->out_of_order_queue, skb1, skb); - - /* And clean segments covered by new one as whole. */ - while (!skb_queue_is_last(&tp->out_of_order_queue, skb)) { - skb1 = skb_queue_next(&tp->out_of_order_queue, skb); +merge_right: + /* Remove other segments covered by skb. */ + while ((q = rb_next(&skb->rbnode)) != NULL) { + skb1 = rb_entry(q, struct sk_buff, rbnode); if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; @@ -4511,12 +4524,15 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) end_seq); break; } - __skb_unlink(skb1, &tp->out_of_order_queue); + rb_erase(&skb1->rbnode, &tp->out_of_order_queue); tcp_dsack_extend(sk, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPOFOMERGE); tcp_drop(sk, skb1); } + /* If there is no skb after us, we are the last_skb ! */ + if (!q) + tp->ooo_last_skb = skb; add_sack: if (tcp_is_sack(tp)) @@ -4653,13 +4669,13 @@ queue_and_out: if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) tcp_fin(sk); - if (!skb_queue_empty(&tp->out_of_order_queue)) { + if (!RB_EMPTY_ROOT(&tp->out_of_order_queue)) { tcp_ofo_queue(sk); /* RFC2581. 4.2. SHOULD send immediate ACK, when * gap in queue is filled. */ - if (skb_queue_empty(&tp->out_of_order_queue)) + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) inet_csk(sk)->icsk_ack.pingpong = 0; } @@ -4713,48 +4729,76 @@ drop: tcp_data_queue_ofo(sk, skb); } -static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *list) +static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *list) { - struct sk_buff *next = NULL; + if (list) + return !skb_queue_is_last(list, skb) ? skb->next : NULL; - if (!skb_queue_is_last(list, skb)) - next = skb_queue_next(list, skb); + return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); +} + +static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *list, + struct rb_root *root) +{ + struct sk_buff *next = tcp_skb_next(skb, list); + + if (list) + __skb_unlink(skb, list); + else + rb_erase(&skb->rbnode, root); - __skb_unlink(skb, list); __kfree_skb(skb); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRCVCOLLAPSED); return next; } +/* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ +static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +{ + struct rb_node **p = &root->rb_node; + struct rb_node *parent = NULL; + struct sk_buff *skb1; + + while (*p) { + parent = *p; + skb1 = rb_entry(parent, struct sk_buff, rbnode); + if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node(&skb->rbnode, parent, p); + rb_insert_color(&skb->rbnode, root); +} + /* Collapse contiguous sequence of skbs head..tail with * sequence numbers start..end. * - * If tail is NULL, this means until the end of the list. + * If tail is NULL, this means until the end of the queue. * * Segments with FIN/SYN are not collapsed (only because this * simplifies code) */ static void -tcp_collapse(struct sock *sk, struct sk_buff_head *list, - struct sk_buff *head, struct sk_buff *tail, - u32 start, u32 end) +tcp_collapse(struct sock *sk, struct sk_buff_head *list, struct rb_root *root, + struct sk_buff *head, struct sk_buff *tail, u32 start, u32 end) { - struct sk_buff *skb, *n; + struct sk_buff *skb = head, *n; + struct sk_buff_head tmp; bool end_of_skbs; /* First, check that queue is collapsible and find - * the point where collapsing can be useful. */ - skb = head; + * the point where collapsing can be useful. + */ restart: - end_of_skbs = true; - skb_queue_walk_from_safe(list, skb, n) { - if (skb == tail) - break; + for (end_of_skbs = true; skb != NULL && skb != tail; skb = n) { + n = tcp_skb_next(skb, list); + /* No new bits? It is possible on ofo queue. */ if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb) break; goto restart; @@ -4772,13 +4816,10 @@ restart: break; } - if (!skb_queue_is_last(list, skb)) { - struct sk_buff *next = skb_queue_next(list, skb); - if (next != tail && - TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(next)->seq) { - end_of_skbs = false; - break; - } + if (n && n != tail && + TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(n)->seq) { + end_of_skbs = false; + break; } /* Decided to skip this, advance start seq. */ @@ -4788,17 +4829,22 @@ restart: (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) return; + __skb_queue_head_init(&tmp); + while (before(start, end)) { int copy = min_t(int, SKB_MAX_ORDER(0, 0), end - start); struct sk_buff *nskb; nskb = alloc_skb(copy, GFP_ATOMIC); if (!nskb) - return; + break; memcpy(nskb->cb, skb->cb, sizeof(skb->cb)); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start; - __skb_queue_before(list, skb, nskb); + if (list) + __skb_queue_before(list, skb, nskb); + else + __skb_queue_tail(&tmp, nskb); /* defer rbtree insertion */ skb_set_owner_r(nskb, sk); /* Copy data, releasing collapsed skbs. */ @@ -4816,14 +4862,17 @@ restart: start += size; } if (!before(start, TCP_SKB_CB(skb)->end_seq)) { - skb = tcp_collapse_one(sk, skb, list); + skb = tcp_collapse_one(sk, skb, list, root); if (!skb || skb == tail || (TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN))) - return; + goto end; } } } +end: + skb_queue_walk_safe(&tmp, skb, n) + tcp_rbtree_insert(root, skb); } /* Collapse ofo queue. Algorithm: select contiguous sequence of skbs @@ -4832,70 +4881,86 @@ restart: static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = skb_peek(&tp->out_of_order_queue); - struct sk_buff *head; + struct sk_buff *skb, *head; + struct rb_node *p; u32 start, end; - if (!skb) + p = rb_first(&tp->out_of_order_queue); + skb = rb_entry_safe(p, struct sk_buff, rbnode); +new_range: + if (!skb) { + p = rb_last(&tp->out_of_order_queue); + /* Note: This is possible p is NULL here. We do not + * use rb_entry_safe(), as ooo_last_skb is valid only + * if rbtree is not empty. + */ + tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); return; - + } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; - head = skb; - for (;;) { - struct sk_buff *next = NULL; + for (head = skb;;) { + skb = tcp_skb_next(skb, NULL); - if (!skb_queue_is_last(&tp->out_of_order_queue, skb)) - next = skb_queue_next(&tp->out_of_order_queue, skb); - skb = next; - - /* Segment is terminated when we see gap or when - * we are at the end of all the queue. */ + /* Range is terminated when we see a gap or when + * we are at the queue end. + */ if (!skb || after(TCP_SKB_CB(skb)->seq, end) || before(TCP_SKB_CB(skb)->end_seq, start)) { - tcp_collapse(sk, &tp->out_of_order_queue, + tcp_collapse(sk, NULL, &tp->out_of_order_queue, head, skb, start, end); - head = skb; - if (!skb) - break; - /* Start new segment */ - start = TCP_SKB_CB(skb)->seq; - end = TCP_SKB_CB(skb)->end_seq; - } else { - if (before(TCP_SKB_CB(skb)->seq, start)) - start = TCP_SKB_CB(skb)->seq; - if (after(TCP_SKB_CB(skb)->end_seq, end)) - end = TCP_SKB_CB(skb)->end_seq; + goto new_range; } + + if (unlikely(before(TCP_SKB_CB(skb)->seq, start))) + start = TCP_SKB_CB(skb)->seq; + if (after(TCP_SKB_CB(skb)->end_seq, end)) + end = TCP_SKB_CB(skb)->end_seq; } } /* - * Purge the out-of-order queue. - * Return true if queue was pruned. + * Clean the out-of-order queue to make room. + * We drop high sequences packets to : + * 1) Let a chance for holes to be filled. + * 2) not add too big latencies if thousands of packets sit there. + * (But if application shrinks SO_RCVBUF, we could still end up + * freeing whole queue here) + * + * Return true if queue has shrunk. */ static bool tcp_prune_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - bool res = false; + struct rb_node *node, *prev; - if (!skb_queue_empty(&tp->out_of_order_queue)) { - NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); - __skb_queue_purge(&tp->out_of_order_queue); + if (RB_EMPTY_ROOT(&tp->out_of_order_queue)) + return false; - /* Reset SACK state. A conforming SACK implementation will - * do the same at a timeout based retransmit. When a connection - * is in a sad state like this, we care only about integrity - * of the connection not performance. - */ - if (tp->rx_opt.sack_ok) - tcp_sack_reset(&tp->rx_opt); + NET_INC_STATS(sock_net(sk), LINUX_MIB_OFOPRUNED); + node = &tp->ooo_last_skb->rbnode; + do { + prev = rb_prev(node); + rb_erase(node, &tp->out_of_order_queue); + tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); sk_mem_reclaim(sk); - res = true; - } - return res; + if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + !tcp_under_memory_pressure(sk)) + break; + node = prev; + } while (node); + tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + + /* Reset SACK state. A conforming SACK implementation will + * do the same at a timeout based retransmit. When a connection + * is in a sad state like this, we care only about integrity + * of the connection not performance. + */ + if (tp->rx_opt.sack_ok) + tcp_sack_reset(&tp->rx_opt); + return true; } /* Reduce allocated memory if we can, trying to get @@ -4920,7 +4985,7 @@ static int tcp_prune_queue(struct sock *sk) tcp_collapse_ofo_queue(sk); if (!skb_queue_empty(&sk->sk_receive_queue)) - tcp_collapse(sk, &sk->sk_receive_queue, + tcp_collapse(sk, &sk->sk_receive_queue, NULL, skb_peek(&sk->sk_receive_queue), NULL, tp->copied_seq, tp->rcv_nxt); @@ -5025,7 +5090,7 @@ static void __tcp_ack_snd_check(struct sock *sk, int ofo_possible) /* We ACK each frame or... */ tcp_in_quickack_mode(sk) || /* We have out of order data. */ - (ofo_possible && skb_peek(&tp->out_of_order_queue))) { + (ofo_possible && !RB_EMPTY_ROOT(&tp->out_of_order_queue))) { /* Then ack it now */ tcp_send_ack(sk); } else { @@ -5926,7 +5991,8 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) } else tcp_init_metrics(sk); - tcp_update_pacing_rate(sk); + if (!inet_csk(sk)->icsk_ca_ops->cong_control) + tcp_update_pacing_rate(sk); /* Prevent spurious tcp_cwnd_restart() on first data packet */ tp->lsndtime = tcp_time_stamp; @@ -6259,6 +6325,7 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, tmp_opt.tstamp_ok = tmp_opt.saw_tstamp; tcp_openreq_init(req, &tmp_opt, skb, sk); + inet_rsk(req)->no_srccheck = inet_sk(sk)->transparent; /* Note: tcp_v6_init_req() might override ir_iif for link locals */ inet_rsk(req)->ir_iif = inet_request_bound_dev_if(sk, skb); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 7158d4f8dae4..7ac37c314312 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1175,6 +1175,7 @@ static bool tcp_v4_inbound_md5_hash(const struct sock *sk, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); net_info_ratelimited("MD5 Hash failed for (%pI4, %d)->(%pI4, %d)%s\n", &iph->saddr, ntohs(th->source), &iph->daddr, ntohs(th->dest), @@ -1195,7 +1196,6 @@ static void tcp_v4_init_req(struct request_sock *req, sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->no_srccheck = inet_sk(sk_listener)->transparent; ireq->opt = tcp_v4_save_options(skb); } @@ -1537,6 +1537,34 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(tcp_prequeue); +bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) +{ + u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; + + /* Only socket owner can try to collapse/prune rx queues + * to reduce memory overhead, so add a little headroom here. + * Few sockets backlog are possibly concurrently non empty. + */ + limit += 64*1024; + + /* In case all data was pulled from skb frags (in __pskb_pull_tail()), + * we can fix skb->truesize to its real value to avoid future drops. + * This is valid because skb is not yet charged to the socket. + * It has been noticed pure SACK packets were sometimes dropped + * (if cooked by drivers without copybreak feature). + */ + if (!skb->data_len) + skb->truesize = SKB_TRUESIZE(skb_end_offset(skb)); + + if (unlikely(sk_add_backlog(sk, skb, limit))) { + bh_unlock_sock(sk); + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return true; + } + return false; +} +EXPORT_SYMBOL(tcp_add_backlog); + /* * From tcp_input.c */ @@ -1608,6 +1636,7 @@ process: sk = req->rsk_listener; if (unlikely(tcp_v4_inbound_md5_hash(sk, skb))) { + sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; } @@ -1666,10 +1695,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v4_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); @@ -1818,7 +1844,7 @@ void tcp_v4_destroy_sock(struct sock *sk) tcp_write_queue_purge(sk); /* Cleans up our, hopefully empty, out_of_order_queue. */ - __skb_queue_purge(&tp->out_of_order_queue); + skb_rbtree_purge(&tp->out_of_order_queue); #ifdef CONFIG_TCP_MD5SIG /* Clean up the MD5 key list, if any */ diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index b617826e2477..bf1f3b2b29d1 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -751,7 +751,7 @@ static struct genl_family tcp_metrics_nl_family = { .netnsok = true, }; -static struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { +static const struct nla_policy tcp_metrics_nl_policy[TCP_METRICS_ATTR_MAX + 1] = { [TCP_METRICS_ATTR_ADDR_IPV4] = { .type = NLA_U32, }, [TCP_METRICS_ATTR_ADDR_IPV6] = { .type = NLA_BINARY, .len = sizeof(struct in6_addr), }, diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 4b95ec4ed2c8..6234ebaa7db1 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -464,7 +464,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->srtt_us = 0; newtp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); - newtp->rtt_min[0].rtt = ~0U; + minmax_reset(&newtp->rtt_min, tcp_time_stamp, ~0U); newicsk->icsk_rto = TCP_TIMEOUT_INIT; newtp->packets_out = 0; @@ -487,8 +487,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_cwnd = TCP_INIT_CWND; newtp->snd_cwnd_cnt = 0; + /* There's a bubble in the pipe until at least the first ACK. */ + newtp->app_limited = ~0U; + tcp_init_xmit_timers(newsk); - __skb_queue_head_init(&newtp->out_of_order_queue); newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1; newtp->rx_opt.saw_tstamp = 0; diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 5c5964962d0c..bc68da38ea86 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -90,12 +90,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, goto out; } - /* GSO partial only requires splitting the frame into an MSS - * multiple and possibly a remainder. So update the mss now. - */ - if (features & NETIF_F_GSO_PARTIAL) - mss = skb->len - (skb->len % mss); - copy_destructor = gso_skb->destructor == tcp_wfree; ooo_okay = gso_skb->ooo_okay; /* All segments but the first should have ooo_okay cleared */ @@ -108,6 +102,13 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, /* Only first segment might have ooo_okay set */ segs->ooo_okay = ooo_okay; + /* GSO partial and frag_list segmentation only requires splitting + * the frame into an MSS multiple and possibly a remainder, both + * cases return a GSO skb. So update the mss now. + */ + if (skb_is_gso(segs)) + mss *= skb_shinfo(segs)->gso_segs; + delta = htonl(oldlen + (thlen + mss)); skb = segs; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d48d5571e62a..896e9dfbdb5c 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -734,9 +734,16 @@ static void tcp_tsq_handler(struct sock *sk) { if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_CLOSING | - TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) - tcp_write_xmit(sk, tcp_current_mss(sk), tcp_sk(sk)->nonagle, + TCPF_CLOSE_WAIT | TCPF_LAST_ACK)) { + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->lost_out > tp->retrans_out && + tp->snd_cwnd > tcp_packets_in_flight(tp)) + tcp_xmit_retransmit_queue(sk); + + tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); + } } /* * One tasklet per cpu tries to send more skbs. @@ -918,6 +925,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, skb_mstamp_get(&skb->skb_mstamp); TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; + tcp_rate_skb_sent(sk, skb); if (unlikely(skb_cloned(skb))) skb = pskb_copy(skb, gfp_mask); @@ -1213,6 +1221,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, tcp_set_skb_tso_segs(skb, mss_now); tcp_set_skb_tso_segs(buff, mss_now); + /* Update delivered info for the new segment */ + TCP_SKB_CB(buff)->tx = TCP_SKB_CB(skb)->tx; + /* If this packet has been sent out already, we must * adjust the various packet counters. */ @@ -1358,6 +1369,7 @@ int tcp_mss_to_mtu(struct sock *sk, int mss) } return mtu; } +EXPORT_SYMBOL(tcp_mss_to_mtu); /* MTU probing init per socket */ void tcp_mtup_init(struct sock *sk) @@ -1545,7 +1557,8 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp, /* Return how many segs we'd like on a TSO packet, * to send one TSO packet per ms */ -static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) +u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, + int min_tso_segs) { u32 bytes, segs; @@ -1557,10 +1570,23 @@ static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now) * This preserves ACK clocking and is consistent * with tcp_tso_should_defer() heuristic. */ - segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs); + segs = max_t(u32, bytes / mss_now, min_tso_segs); return min_t(u32, segs, sk->sk_gso_max_segs); } +EXPORT_SYMBOL(tcp_tso_autosize); + +/* Return the number of segments we want in the skb we are transmitting. + * See if congestion control module wants to decide; otherwise, autosize. + */ +static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) +{ + const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; + u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; + + return tso_segs ? : + tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); +} /* Returns the portion of skb which can be sent right away */ static unsigned int tcp_mss_split_point(const struct sock *sk, @@ -2022,6 +2048,39 @@ static int tcp_mtu_probe(struct sock *sk) return -1; } +/* TCP Small Queues : + * Control number of packets in qdisc/devices to two packets / or ~1 ms. + * (These limits are doubled for retransmits) + * This allows for : + * - better RTT estimation and ACK scheduling + * - faster recovery + * - high rates + * Alas, some drivers / subsystems require a fair amount + * of queued bytes to ensure line rate. + * One example is wifi aggregation (802.11 AMPDU) + */ +static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, + unsigned int factor) +{ + unsigned int limit; + + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); + limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit <<= factor; + + if (atomic_read(&sk->sk_wmem_alloc) > limit) { + set_bit(TSQ_THROTTLED, &tcp_sk(sk)->tsq_flags); + /* It is possible TX completion already happened + * before we set TSQ_THROTTLED, so we must + * test again the condition. + */ + smp_mb__after_atomic(); + if (atomic_read(&sk->sk_wmem_alloc) > limit) + return true; + } + return false; +} + /* This routine writes packets to the network. It advances the * send_head. This happens as incoming acks open up the remote * window for us. @@ -2059,7 +2118,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } } - max_segs = tcp_tso_autosize(sk, mss_now); + max_segs = tcp_tso_segs(sk, mss_now); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2108,29 +2167,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) break; - /* TCP Small Queues : - * Control number of packets in qdisc/devices to two packets / or ~1 ms. - * This allows for : - * - better RTT estimation and ACK scheduling - * - faster recovery - * - high rates - * Alas, some drivers / subsystems require a fair amount - * of queued bytes to ensure line rate. - * One example is wifi aggregation (802.11 AMPDU) - */ - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); - - if (atomic_read(&sk->sk_wmem_alloc) > limit) { - set_bit(TSQ_THROTTLED, &tp->tsq_flags); - /* It is possible TX completion already happened - * before we set TSQ_THROTTLED, so we must - * test again the condition. - */ - smp_mb__after_atomic(); - if (atomic_read(&sk->sk_wmem_alloc) > limit) - break; - } + if (tcp_small_queue_check(sk, skb, 0)) + break; if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp))) break; @@ -2777,9 +2815,9 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } - max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); + max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { - __u8 sacked = TCP_SKB_CB(skb)->sacked; + __u8 sacked; int segs; if (skb == tcp_send_head(sk)) @@ -2791,6 +2829,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + sacked = TCP_SKB_CB(skb)->sacked; /* In case tcp_shift_skb_data() have aggregated large skbs, * we need to make sure not sending too bigs TSO packets */ @@ -2830,6 +2869,9 @@ begin_fwd: if (sacked & (TCPCB_SACKED_ACKED|TCPCB_SACKED_RETRANS)) continue; + if (tcp_small_queue_check(sk, skb, 1)) + return; + if (tcp_retransmit_skb(sk, skb, segs)) return; diff --git a/net/ipv4/tcp_rate.c b/net/ipv4/tcp_rate.c new file mode 100644 index 000000000000..9be1581a5a08 --- /dev/null +++ b/net/ipv4/tcp_rate.c @@ -0,0 +1,186 @@ +#include + +/* The bandwidth estimator estimates the rate at which the network + * can currently deliver outbound data packets for this flow. At a high + * level, it operates by taking a delivery rate sample for each ACK. + * + * A rate sample records the rate at which the network delivered packets + * for this flow, calculated over the time interval between the transmission + * of a data packet and the acknowledgment of that packet. + * + * Specifically, over the interval between each transmit and corresponding ACK, + * the estimator generates a delivery rate sample. Typically it uses the rate + * at which packets were acknowledged. However, the approach of using only the + * acknowledgment rate faces a challenge under the prevalent ACK decimation or + * compression: packets can temporarily appear to be delivered much quicker + * than the bottleneck rate. Since it is physically impossible to do that in a + * sustained fashion, when the estimator notices that the ACK rate is faster + * than the transmit rate, it uses the latter: + * + * send_rate = #pkts_delivered/(last_snd_time - first_snd_time) + * ack_rate = #pkts_delivered/(last_ack_time - first_ack_time) + * bw = min(send_rate, ack_rate) + * + * Notice the estimator essentially estimates the goodput, not always the + * network bottleneck link rate when the sending or receiving is limited by + * other factors like applications or receiver window limits. The estimator + * deliberately avoids using the inter-packet spacing approach because that + * approach requires a large number of samples and sophisticated filtering. + * + * TCP flows can often be application-limited in request/response workloads. + * The estimator marks a bandwidth sample as application-limited if there + * was some moment during the sampled window of packets when there was no data + * ready to send in the write queue. + */ + +/* Snapshot the current delivery information in the skb, to generate + * a rate sample later when the skb is (s)acked in tcp_rate_skb_delivered(). + */ +void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) +{ + struct tcp_sock *tp = tcp_sk(sk); + + /* In general we need to start delivery rate samples from the + * time we received the most recent ACK, to ensure we include + * the full time the network needs to deliver all in-flight + * packets. If there are no packets in flight yet, then we + * know that any ACKs after now indicate that the network was + * able to deliver those packets completely in the sampling + * interval between now and the next ACK. + * + * Note that we use packets_out instead of tcp_packets_in_flight(tp) + * because the latter is a guess based on RTO and loss-marking + * heuristics. We don't want spurious RTOs or loss markings to cause + * a spuriously small time interval, causing a spuriously high + * bandwidth estimate. + */ + if (!tp->packets_out) { + tp->first_tx_mstamp = skb->skb_mstamp; + tp->delivered_mstamp = skb->skb_mstamp; + } + + TCP_SKB_CB(skb)->tx.first_tx_mstamp = tp->first_tx_mstamp; + TCP_SKB_CB(skb)->tx.delivered_mstamp = tp->delivered_mstamp; + TCP_SKB_CB(skb)->tx.delivered = tp->delivered; + TCP_SKB_CB(skb)->tx.is_app_limited = tp->app_limited ? 1 : 0; +} + +/* When an skb is sacked or acked, we fill in the rate sample with the (prior) + * delivery information when the skb was last transmitted. + * + * If an ACK (s)acks multiple skbs (e.g., stretched-acks), this function is + * called multiple times. We favor the information from the most recently + * sent skb, i.e., the skb with the highest prior_delivered count. + */ +void tcp_rate_skb_delivered(struct sock *sk, struct sk_buff *skb, + struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + + if (!scb->tx.delivered_mstamp.v64) + return; + + if (!rs->prior_delivered || + after(scb->tx.delivered, rs->prior_delivered)) { + rs->prior_delivered = scb->tx.delivered; + rs->prior_mstamp = scb->tx.delivered_mstamp; + rs->is_app_limited = scb->tx.is_app_limited; + rs->is_retrans = scb->sacked & TCPCB_RETRANS; + + /* Find the duration of the "send phase" of this window: */ + rs->interval_us = skb_mstamp_us_delta( + &skb->skb_mstamp, + &scb->tx.first_tx_mstamp); + + /* Record send time of most recently ACKed packet: */ + tp->first_tx_mstamp = skb->skb_mstamp; + } + /* Mark off the skb delivered once it's sacked to avoid being + * used again when it's cumulatively acked. For acked packets + * we don't need to reset since it'll be freed soon. + */ + if (scb->sacked & TCPCB_SACKED_ACKED) + scb->tx.delivered_mstamp.v64 = 0; +} + +/* Update the connection delivery information and generate a rate sample. */ +void tcp_rate_gen(struct sock *sk, u32 delivered, u32 lost, + struct skb_mstamp *now, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + u32 snd_us, ack_us; + + /* Clear app limited if bubble is acked and gone. */ + if (tp->app_limited && after(tp->delivered, tp->app_limited)) + tp->app_limited = 0; + + /* TODO: there are multiple places throughout tcp_ack() to get + * current time. Refactor the code using a new "tcp_acktag_state" + * to carry current time, flags, stats like "tcp_sacktag_state". + */ + if (delivered) + tp->delivered_mstamp = *now; + + rs->acked_sacked = delivered; /* freshly ACKed or SACKed */ + rs->losses = lost; /* freshly marked lost */ + /* Return an invalid sample if no timing information is available. */ + if (!rs->prior_mstamp.v64) { + rs->delivered = -1; + rs->interval_us = -1; + return; + } + rs->delivered = tp->delivered - rs->prior_delivered; + + /* Model sending data and receiving ACKs as separate pipeline phases + * for a window. Usually the ACK phase is longer, but with ACK + * compression the send phase can be longer. To be safe we use the + * longer phase. + */ + snd_us = rs->interval_us; /* send phase */ + ack_us = skb_mstamp_us_delta(now, &rs->prior_mstamp); /* ack phase */ + rs->interval_us = max(snd_us, ack_us); + + /* Normally we expect interval_us >= min-rtt. + * Note that rate may still be over-estimated when a spuriously + * retransmistted skb was first (s)acked because "interval_us" + * is under-estimated (up to an RTT). However continuously + * measuring the delivery rate during loss recovery is crucial + * for connections suffer heavy or prolonged losses. + */ + if (unlikely(rs->interval_us < tcp_min_rtt(tp))) { + if (!rs->is_retrans) + pr_debug("tcp rate: %ld %d %u %u %u\n", + rs->interval_us, rs->delivered, + inet_csk(sk)->icsk_ca_state, + tp->rx_opt.sack_ok, tcp_min_rtt(tp)); + rs->interval_us = -1; + return; + } + + /* Record the last non-app-limited or the highest app-limited bw */ + if (!rs->is_app_limited || + ((u64)rs->delivered * tp->rate_interval_us >= + (u64)tp->rate_delivered * rs->interval_us)) { + tp->rate_delivered = rs->delivered; + tp->rate_interval_us = rs->interval_us; + tp->rate_app_limited = rs->is_app_limited; + } +} + +/* If a gap is detected between sends, mark the socket application-limited. */ +void tcp_rate_check_app_limited(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (/* We have less than one packet to send. */ + tp->write_seq - tp->snd_nxt < tp->mss_cache && + /* Nothing in sending host's qdisc queues or NIC tx queue. */ + sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1) && + /* We are not limited by CWND. */ + tcp_packets_in_flight(tp) < tp->snd_cwnd && + /* All lost packets have been retransmitted. */ + tp->lost_out <= tp->retrans_out) + tp->app_limited = + (tp->delivered + tcp_packets_in_flight(tp)) ? : 1; +} diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index f712b411f6ed..3ea1cf804748 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -192,6 +192,8 @@ static int tcp_write_timeout(struct sock *sk) if (tp->syn_data && icsk->icsk_retransmits == 1) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); + } else if (!tp->syn_data && !tp->syn_fastopen) { + sk_rethink_txhash(sk); } retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries; syn_set = true; @@ -213,6 +215,8 @@ static int tcp_write_timeout(struct sock *sk) tcp_mtu_probing(icsk, sk); dst_negative_advice(sk); + } else { + sk_rethink_txhash(sk); } retry_until = net->ipv4.sysctl_tcp_retries2; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 5fdcb8d108d4..7d96dc2d3d08 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -114,6 +114,7 @@ #include #include "udp_impl.h" #include +#include struct udp_table udp_table __read_mostly; EXPORT_SYMBOL(udp_table); @@ -1020,12 +1021,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flow_flags, faddr, saddr, dport, inet->inet_sport); - if (!saddr && ipc.oif) { - err = l3mdev_get_saddr(net, ipc.oif, fl4); - if (err < 0) - goto out; - } - security_sk_classify_flow(sk, flowi4_to_flowi(fl4)); rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) { @@ -2192,6 +2187,20 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } EXPORT_SYMBOL(udp_poll); +int udp_abort(struct sock *sk, int err) +{ + lock_sock(sk); + + sk->sk_err = err; + sk->sk_error_report(sk); + udp_disconnect(sk, 0); + + release_sock(sk); + + return 0; +} +EXPORT_SYMBOL_GPL(udp_abort); + struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, @@ -2221,7 +2230,7 @@ struct proto udp_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, + .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 3d5ccf4b1412..9a89c10a55f0 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -20,7 +20,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *req, - struct nlattr *bc) + struct nlattr *bc, bool net_admin) { if (!inet_diag_bc_sk(bc, sk)) return 0; @@ -28,7 +28,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); + cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); } static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, @@ -76,7 +76,8 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + netlink_net_capable(in_skb, CAP_NET_ADMIN)); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); @@ -97,6 +98,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; @@ -132,7 +134,7 @@ static void udp_dump(struct udp_table *table, struct sk_buff *skb, r->id.idiag_dport) goto next; - if (sk_diag_dump(sk, skb, cb, r, bc) < 0) { + if (sk_diag_dump(sk, skb, cb, r, bc, net_admin) < 0) { spin_unlock_bh(&hslot->lock); goto done; } @@ -165,12 +167,88 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, r->idiag_wqueue = sk_wmem_alloc_get(sk); } +#ifdef CONFIG_INET_DIAG_DESTROY +static int __udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req, + struct udp_table *tbl) +{ + struct net *net = sock_net(in_skb->sk); + struct sock *sk; + int err; + + rcu_read_lock(); + + if (req->sdiag_family == AF_INET) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); +#if IS_ENABLED(CONFIG_IPV6) + else if (req->sdiag_family == AF_INET6) { + if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && + ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) + sk = __udp4_lib_lookup(net, + req->id.idiag_dst[3], req->id.idiag_dport, + req->id.idiag_src[3], req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + + else + sk = __udp6_lib_lookup(net, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if, tbl, NULL); + } +#endif + else { + rcu_read_unlock(); + return -EINVAL; + } + + if (sk && !atomic_inc_not_zero(&sk->sk_refcnt)) + sk = NULL; + + rcu_read_unlock(); + + if (!sk) + return -ENOENT; + + if (sock_diag_check_cookie(sk, req->id.idiag_cookie)) { + sock_put(sk); + return -ENOENT; + } + + err = sock_diag_destroy(sk, ECONNABORTED); + + sock_put(sk); + + return err; +} + +static int udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udp_table); +} + +static int udplite_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) +{ + return __udp_diag_destroy(in_skb, req, &udplite_table); +} + +#endif + static const struct inet_diag_handler udp_diag_handler = { .dump = udp_diag_dump, .dump_one = udp_diag_dump_one, .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDP, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udp_diag_destroy, +#endif }; static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, @@ -192,6 +270,9 @@ static const struct inet_diag_handler udplite_diag_handler = { .idiag_get_info = udp_diag_get_info, .idiag_type = IPPROTO_UDPLITE, .idiag_info_size = 0, +#ifdef CONFIG_INET_DIAG_DESTROY + .destroy = udplite_diag_destroy, +#endif }; static int __init udp_diag_init(void) diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 81f253b6ff36..f9333c963607 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo; + bool remcsum, need_csum, offload_csum, ufo, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -88,6 +88,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, goto out; } + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + outer_hlen = skb_tnl_header_len(skb); udp_offset = outer_hlen - tnl_hlen; skb = segs; @@ -117,7 +119,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (skb_is_gso(skb)) { + if (gso_partial) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 2eea073e27ef..af817158d830 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -60,7 +60,6 @@ struct proto udplite_prot = { .compat_setsockopt = compat_udp_setsockopt, .compat_getsockopt = compat_udp_getsockopt, #endif - .clear_sk = sk_prot_clear_portaddr_nulls, }; EXPORT_SYMBOL(udplite_prot); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 41f5b504a782..6a7ff6957535 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -112,7 +112,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) int oif = 0; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); fl4->flowi4_mark = skb->mark; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 2f1f5d439788..cbd9343751a2 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -112,6 +112,27 @@ static inline u32 cstamp_delta(unsigned long cstamp) return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; } +static inline s32 rfc3315_s14_backoff_init(s32 irt) +{ + /* multiply 'initial retransmission time' by 0.9 .. 1.1 */ + u64 tmp = (900000 + prandom_u32() % 200001) * (u64)irt; + do_div(tmp, 1000000); + return (s32)tmp; +} + +static inline s32 rfc3315_s14_backoff_update(s32 rt, s32 mrt) +{ + /* multiply 'retransmission timeout' by 1.9 .. 2.1 */ + u64 tmp = (1900000 + prandom_u32() % 200001) * (u64)rt; + do_div(tmp, 1000000); + if ((s32)tmp > mrt) { + /* multiply 'maximum retransmission time' by 0.9 .. 1.1 */ + tmp = (900000 + prandom_u32() % 200001) * (u64)mrt; + do_div(tmp, 1000000); + } + return (s32)tmp; +} + #ifdef CONFIG_SYSCTL static int addrconf_sysctl_register(struct inet6_dev *idev); static void addrconf_sysctl_unregister(struct inet6_dev *idev); @@ -187,6 +208,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -232,6 +254,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .dad_transmits = 1, .rtr_solicits = MAX_RTR_SOLICITATIONS, .rtr_solicit_interval = RTR_SOLICITATION_INTERVAL, + .rtr_solicit_max_interval = RTR_SOLICITATION_MAX_INTERVAL, .rtr_solicit_delay = MAX_RTR_SOLICITATION_DELAY, .use_tempaddr = 0, .temp_valid_lft = TEMP_VALID_LIFETIME, @@ -3687,7 +3710,7 @@ static void addrconf_rs_timer(unsigned long data) if (idev->if_flags & IF_RA_RCVD) goto out; - if (idev->rs_probes++ < idev->cnf.rtr_solicits) { + if (idev->rs_probes++ < idev->cnf.rtr_solicits || idev->cnf.rtr_solicits < 0) { write_unlock(&idev->lock); if (!ipv6_get_lladdr(dev, &lladdr, IFA_F_TENTATIVE)) ndisc_send_rs(dev, &lladdr, @@ -3696,11 +3719,13 @@ static void addrconf_rs_timer(unsigned long data) goto put; write_lock(&idev->lock); + idev->rs_interval = rfc3315_s14_backoff_update( + idev->rs_interval, idev->cnf.rtr_solicit_max_interval); /* The wait after the last probe can be shorter */ addrconf_mod_rs_timer(idev, (idev->rs_probes == idev->cnf.rtr_solicits) ? idev->cnf.rtr_solicit_delay : - idev->cnf.rtr_solicit_interval); + idev->rs_interval); } else { /* * Note: we do not support deprecated "all on-link" @@ -3949,7 +3974,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && - ifp->idev->cnf.rtr_solicits > 0 && + ifp->idev->cnf.rtr_solicits != 0 && (dev->flags&IFF_LOOPBACK) == 0; read_unlock_bh(&ifp->idev->lock); @@ -3971,10 +3996,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) write_lock_bh(&ifp->idev->lock); spin_lock(&ifp->lock); + ifp->idev->rs_interval = rfc3315_s14_backoff_init( + ifp->idev->cnf.rtr_solicit_interval); ifp->idev->rs_probes = 1; ifp->idev->if_flags |= IF_RS_SENT; - addrconf_mod_rs_timer(ifp->idev, - ifp->idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(ifp->idev, ifp->idev->rs_interval); spin_unlock(&ifp->lock); write_unlock_bh(&ifp->idev->lock); } @@ -4891,6 +4917,8 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_RTR_SOLICITS] = cnf->rtr_solicits; array[DEVCONF_RTR_SOLICIT_INTERVAL] = jiffies_to_msecs(cnf->rtr_solicit_interval); + array[DEVCONF_RTR_SOLICIT_MAX_INTERVAL] = + jiffies_to_msecs(cnf->rtr_solicit_max_interval); array[DEVCONF_RTR_SOLICIT_DELAY] = jiffies_to_msecs(cnf->rtr_solicit_delay); array[DEVCONF_FORCE_MLD_VERSION] = cnf->force_mld_version; @@ -4961,18 +4989,18 @@ static inline size_t inet6_if_nlmsg_size(void) } static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, - int items, int bytes) + int bytes) { int i; - int pad = bytes - sizeof(u64) * items; + int pad = bytes - sizeof(u64) * ICMP6_MIB_MAX; BUG_ON(pad < 0); /* Use put_unaligned() because stats may not be aligned for u64. */ - put_unaligned(items, &stats[0]); - for (i = 1; i < items; i++) + put_unaligned(ICMP6_MIB_MAX, &stats[0]); + for (i = 1; i < ICMP6_MIB_MAX; i++) put_unaligned(atomic_long_read(&mib[i]), &stats[i]); - memset(&stats[items], 0, pad); + memset(&stats[ICMP6_MIB_MAX], 0, pad); } static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, @@ -5005,7 +5033,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: - __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, ICMP6_MIB_MAX, bytes); + __snmp6_fill_statsdev(stats, idev->stats.icmpv6dev->mibs, bytes); break; } } @@ -5099,7 +5127,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) return -EINVAL; if (!ipv6_accept_ra(idev)) return -EINVAL; - if (idev->cnf.rtr_solicits <= 0) + if (idev->cnf.rtr_solicits == 0) return -EINVAL; write_lock_bh(&idev->lock); @@ -5128,8 +5156,10 @@ update_lft: if (update_rs) { idev->if_flags |= IF_RS_SENT; + idev->rs_interval = rfc3315_s14_backoff_init( + idev->cnf.rtr_solicit_interval); idev->rs_probes = 1; - addrconf_mod_rs_timer(idev, idev->cnf.rtr_solicit_interval); + addrconf_mod_rs_timer(idev, idev->rs_interval); } /* Well, that's kinda nasty ... */ @@ -5466,20 +5496,6 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_hop_limit(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - struct ctl_table lctl; - int min_hl = 1, max_hl = 255; - - lctl = *ctl; - lctl.extra1 = &min_hl; - lctl.extra2 = &max_hl; - - return proc_dointvec_minmax(&lctl, write, buffer, lenp, ppos); -} - static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -5713,6 +5729,9 @@ int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, return ret; } +static const int one = 1; +static const int two_five_five = 255; + static const struct ctl_table addrconf_sysctl[] = { { .procname = "forwarding", @@ -5726,7 +5745,9 @@ static const struct ctl_table addrconf_sysctl[] = { .data = &ipv6_devconf.hop_limit, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = addrconf_sysctl_hop_limit, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&two_five_five, }, { .procname = "mtu", @@ -5777,6 +5798,13 @@ static const struct ctl_table addrconf_sysctl[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "router_solicitation_max_interval", + .data = &ipv6_devconf.rtr_solicit_max_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, { .procname = "router_solicitation_delay", .data = &ipv6_devconf.rtr_solicit_delay, @@ -6044,8 +6072,14 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, for (i = 0; table[i].data; i++) { table[i].data += (char *)p - (char *)&ipv6_devconf; - table[i].extra1 = idev; /* embedded; no ref */ - table[i].extra2 = net; + /* If one of these is already set, then it is not safe to + * overwrite either of them: this makes proc_dointvec_minmax + * usable. + */ + if (!table[i].extra1 && !table[i].extra2) { + table[i].extra1 = idev; /* embedded; no ref */ + table[i].extra2 = net; + } } snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index b454055ba625..46ad699937fd 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -545,6 +545,8 @@ const struct proto_ops inet6_stream_ops = { .mmap = sock_no_mmap, .sendpage = inet_sendpage, .splice_read = tcp_splice_read, + .read_sock = tcp_read_sock, + .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, .compat_getsockopt = compat_sock_common_getsockopt, diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 5857c1fc8b67..eea23b57c6a5 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -38,6 +38,9 @@ struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, .flags = FIB_LOOKUP_NOREF, }; + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + fib_rules_lookup(net->ipv6.fib6_rules_ops, flowi6_to_flowi(fl6), flags, &arg); diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index ec9efbcdad35..aba0998ddbfb 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -172,6 +172,5 @@ static void __exit ila_fini(void) module_init(ila_init); module_exit(ila_fini); -MODULE_ALIAS_RTNL_LWT(ILA); MODULE_AUTHOR("Tom Herbert "); MODULE_LICENSE("GPL"); diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index c8314c6b6154..e50c27a93e17 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -51,7 +51,7 @@ drop: return -EINVAL; } -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { +static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, }; diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index e6eca5fdf4c9..e604013dd814 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -128,7 +128,7 @@ static struct genl_family ila_nl_family = { .parallel_ops = true, }; -static struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { +static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 771be1fa4176..ef5485204522 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -743,6 +743,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + u16 nlflags = NLM_F_EXCL; int err; ins = &fn->leaf; @@ -759,6 +760,8 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (info->nlh && (info->nlh->nlmsg_flags & NLM_F_EXCL)) return -EEXIST; + + nlflags &= ~NLM_F_EXCL; if (replace) { if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { found++; @@ -856,6 +859,7 @@ next_iter: pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + nlflags |= NLM_F_CREATE; err = fib6_commit_metrics(&rt->dst, mxc); if (err) return err; @@ -864,7 +868,7 @@ add: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info, 0); + inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; if (!(fn->fn_flags & RTN_RTINFO)) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index edc3daab354e..d7d6d3ae0b3b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,12 +61,12 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_GRE_HASH_SIZE_SHIFT 5 +#define IP6_GRE_HASH_SIZE (1 << IP6_GRE_HASH_SIZE_SHIFT) static int ip6gre_net_id __read_mostly; struct ip6gre_net { - struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct net_device *fb_tunnel_dev; }; @@ -96,12 +96,12 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); will match fallback tunnel. */ -#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(IP6_GRE_HASH_SIZE - 1)) static u32 HASH_ADDR(const struct in6_addr *addr) { u32 hash = ipv6_addr_hash(addr); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_GRE_HASH_SIZE_SHIFT); } #define tunnels_r_l tunnels[3] @@ -1086,7 +1086,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) for (prio = 0; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_GRE_HASH_SIZE; h++) { struct ip6_tnl *t; t = rtnl_dereference(ign->tunnels[prio][h]); @@ -1238,7 +1238,7 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); if (data[IFLA_GRE_FLOWINFO]) - parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + parms->flowinfo = nla_get_be32(data[IFLA_GRE_FLOWINFO]); if (data[IFLA_GRE_FLAGS]) parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 22e90e56b5a9..e7bfd55899a3 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -69,6 +69,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, int offset = 0; bool encap, udpfrag; int nhoff; + bool gso_partial; skb_reset_network_header(skb); nhoff = skb_network_header(skb) - skb_mac_header(skb); @@ -101,9 +102,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (IS_ERR(segs)) goto out; + gso_partial = !!(skb_shinfo(segs)->gso_type & SKB_GSO_PARTIAL); + for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (skb_is_gso(skb)) + if (gso_partial) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1dfc402d9ad1..6001e781164e 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,6 +56,7 @@ #include #include #include +#include static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -104,6 +105,13 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff * } } + if (lwtunnel_xmit_redirect(dst->lwtstate)) { + int res = lwtunnel_xmit(skb); + + if (res < 0 || res == LWTUNNEL_XMIT_DONE) + return res; + } + rcu_read_lock_bh(); nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); @@ -228,6 +236,14 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); + + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out((struct sock *)sk, skb); + if (unlikely(!skb)) + return 0; + /* hooks should never assume socket lock is held. * we promote our socket to non const */ @@ -910,13 +926,6 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, int err; int flags = 0; - if (ipv6_addr_any(&fl6->saddr) && fl6->flowi6_oif && - (!*dst || !(*dst)->error)) { - err = l3mdev_get_saddr6(net, sk, fl6); - if (err) - goto out_err; - } - /* The correct way to handle this would be to do * ip6_route_get_saddr, and then ip6_route_output; however, * the route-specific preferred source forces the @@ -1008,7 +1017,7 @@ static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk, out_err_release: dst_release(*dst); *dst = NULL; -out_err: + if (err == -ENETUNREACH) IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); return err; @@ -1054,8 +1063,6 @@ struct dst_entry *ip6_dst_lookup_flow(const struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (!fl6->flowi6_oif) - fl6->flowi6_oif = l3mdev_fib_oif(dst->dev); return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 888543debe4e..6a66adba0c22 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -57,6 +57,7 @@ #include #include #include +#include MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); @@ -64,8 +65,8 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_TUNNEL_HASH_SIZE_SHIFT 5 +#define IP6_TUNNEL_HASH_SIZE (1 << IP6_TUNNEL_HASH_SIZE_SHIFT) static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); @@ -75,7 +76,7 @@ static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_TUNNEL_HASH_SIZE_SHIFT); } static int ip6_tnl_dev_init(struct net_device *dev); @@ -87,9 +88,10 @@ struct ip6_tnl_net { /* the IPv6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_TUNNEL_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; + struct ip6_tnl __rcu *collect_md_tun; }; static struct net_device_stats *ip6_get_stats(struct net_device *dev) @@ -166,6 +168,10 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ return t; } + t = rcu_dereference(ip6n->collect_md_tun); + if (t) + return t; + t = rcu_dereference(ip6n->tnls_wc[0]); if (t && (t->dev->flags & IFF_UP)) return t; @@ -209,6 +215,8 @@ ip6_tnl_link(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6_tnl_bucket(ip6n, &t->parms); + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, t); rcu_assign_pointer(t->next , rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -224,6 +232,9 @@ ip6_tnl_unlink(struct ip6_tnl_net *ip6n, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; + if (t->parms.collect_md) + rcu_assign_pointer(ip6n->collect_md_tun, NULL); + for (tp = ip6_tnl_bucket(ip6n, &t->parms); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -829,6 +840,9 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb, skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); + if (tun_dst) + skb_dst_set(skb, (struct dst_entry *)tun_dst); + gro_cells_receive(&tunnel->gro_cells, skb); return 0; @@ -865,6 +879,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, { struct ip6_tnl *t; const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct metadata_dst *tun_dst = NULL; int ret = -1; rcu_read_lock(); @@ -881,7 +896,12 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, goto drop; if (iptunnel_pull_header(skb, 0, tpi->proto, false)) goto drop; - ret = __ip6_tnl_rcv(t, skb, tpi, NULL, dscp_ecn_decapsulate, + if (t->parms.collect_md) { + tun_dst = ipv6_tun_rx_dst(skb, 0, 0, 0); + if (!tun_dst) + return 0; + } + ret = __ip6_tnl_rcv(t, skb, tpi, tun_dst, dscp_ecn_decapsulate, log_ecn_error); } @@ -1012,8 +1032,16 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, int mtu; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; + u8 hop_limit; int err = -1; + if (t->parms.collect_md) { + hop_limit = skb_tunnel_info(skb)->key.ttl; + goto route_lookup; + } else { + hop_limit = t->parms.hop_limit; + } + /* NBMA tunnel */ if (ipv6_addr_any(&t->parms.raddr)) { struct in6_addr *addr6; @@ -1043,6 +1071,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, goto tx_err_link_failure; if (!dst) { +route_lookup: dst = ip6_route_output(net, NULL, fl6); if (dst->error) @@ -1053,6 +1082,10 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, dst = NULL; goto tx_err_link_failure; } + if (t->parms.collect_md && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl6->daddr, 0, &fl6->saddr)) + goto tx_err_link_failure; ndst = dst; } @@ -1071,7 +1104,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, } if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb_dst(skb)) + if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); if (skb->len > mtu && !skb_is_gso(skb)) { *pmtu = mtu; @@ -1111,8 +1144,13 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, skb = new_skb; } - if (!fl6->flowi6_mark && ndst) - dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + if (t->parms.collect_md) { + if (t->encap.type != TUNNEL_ENCAP_NONE) + goto tx_err_dst_release; + } else { + if (!fl6->flowi6_mark && ndst) + dst_cache_set_ip6(&t->dst_cache, ndst, &fl6->saddr); + } skb_dst_set(skb, dst); if (encap_limit >= 0) { @@ -1137,7 +1175,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, ipv6h = ipv6_hdr(skb); ip6_flow_hdr(ipv6h, INET_ECN_encapsulate(0, dsfield), ip6_make_flowlabel(net, skb, fl6->flowlabel, true, fl6)); - ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->hop_limit = hop_limit; ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; @@ -1170,19 +1208,34 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (tproto != IPPROTO_IPIP && tproto != 0) return -1; - if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPIP; - dsfield = ipv4_get_dsfield(iph); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) - & IPV6_TCLASS_MASK; - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1220,29 +1273,47 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); - if (offset > 0) { - struct ipv6_tlv_tnl_enc_lim *tel; - tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; - if (tel->encap_limit == 0) { - icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2); - return -1; - } - encap_limit = tel->encap_limit - 1; - } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) - encap_limit = t->parms.encap_limit; - - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_IPV6; - dsfield = ipv6_get_dsfield(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= ip6_flowlabel(ipv6h); - if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) - fl6.flowi6_mark = skb->mark; + + if (t->parms.collect_md) { + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET6)) + return -1; + key = &tun_info->key; + memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + fl6.daddr = key->u.ipv6.dst; + fl6.flowlabel = key->label; + } else { + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + + tel = (void *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) { + encap_limit = t->parms.encap_limit; + } + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *)ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= ip6_flowlabel(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + } if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6)) return -1; @@ -1741,6 +1812,10 @@ static int ip6_tnl_dev_init(struct net_device *dev) if (err) return err; ip6_tnl_link_config(t); + if (t->parms.collect_md) { + dev->features |= NETIF_F_NETNS_LOCAL; + netif_keep_dst(dev); + } return 0; } @@ -1811,6 +1886,9 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], if (data[IFLA_IPTUN_PROTO]) parms->proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); + + if (data[IFLA_IPTUN_COLLECT_METADATA]) + parms->collect_md = true; } static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], @@ -1850,6 +1928,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { struct net *net = dev_net(dev); + struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct ip6_tnl *nt, *t; struct ip_tunnel_encap ipencap; @@ -1864,9 +1943,14 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, ip6_tnl_netlink_parms(data, &nt->parms); - t = ip6_tnl_locate(net, &nt->parms, 0); - if (!IS_ERR(t)) - return -EEXIST; + if (nt->parms.collect_md) { + if (rtnl_dereference(ip6n->collect_md_tun)) + return -EEXIST; + } else { + t = ip6_tnl_locate(net, &nt->parms, 0); + if (!IS_ERR(t)) + return -EEXIST; + } return ip6_tnl_create2(dev); } @@ -1890,6 +1974,8 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], return err; } ip6_tnl_netlink_parms(data, &p); + if (p.collect_md) + return -EINVAL; t = ip6_tnl_locate(net, &p, 0); if (!IS_ERR(t)) { @@ -1937,6 +2023,8 @@ static size_t ip6_tnl_get_size(const struct net_device *dev) nla_total_size(2) + /* IFLA_IPTUN_ENCAP_DPORT */ nla_total_size(2) + + /* IFLA_IPTUN_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -1955,16 +2043,15 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_u8(skb, IFLA_IPTUN_PROTO, parm->proto)) goto nla_put_failure; - if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, - tunnel->encap.type) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, - tunnel->encap.sport) || - nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, - tunnel->encap.dport) || - nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, - tunnel->encap.flags)) + if (nla_put_u16(skb, IFLA_IPTUN_ENCAP_TYPE, tunnel->encap.type) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_SPORT, tunnel->encap.sport) || + nla_put_be16(skb, IFLA_IPTUN_ENCAP_DPORT, tunnel->encap.dport) || + nla_put_u16(skb, IFLA_IPTUN_ENCAP_FLAGS, tunnel->encap.flags)) goto nla_put_failure; + if (parm->collect_md) + if (nla_put_flag(skb, IFLA_IPTUN_COLLECT_METADATA)) + goto nla_put_failure; return 0; nla_put_failure: @@ -1992,6 +2079,7 @@ static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = { [IFLA_IPTUN_ENCAP_FLAGS] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_SPORT] = { .type = NLA_U16 }, [IFLA_IPTUN_ENCAP_DPORT] = { .type = NLA_U16 }, + [IFLA_IPTUN_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops ip6_link_ops __read_mostly = { @@ -2033,7 +2121,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) if (dev->rtnl_link_ops == &ip6_link_ops) unregister_netdevice_queue(dev, &list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { /* If dev is in the same netns, it has already diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5bd3afdcc771..8a02ca8a11af 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -50,14 +50,14 @@ #include #include -#define HASH_SIZE_SHIFT 5 -#define HASH_SIZE (1 << HASH_SIZE_SHIFT) +#define IP6_VTI_HASH_SIZE_SHIFT 5 +#define IP6_VTI_HASH_SIZE (1 << IP6_VTI_HASH_SIZE_SHIFT) static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) { u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); - return hash_32(hash, HASH_SIZE_SHIFT); + return hash_32(hash, IP6_VTI_HASH_SIZE_SHIFT); } static int vti6_dev_init(struct net_device *dev); @@ -69,7 +69,7 @@ struct vti6_net { /* the vti6 tunnel fallback device */ struct net_device *fb_tnl_dev; /* lists for storing tunnels in use */ - struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; + struct ip6_tnl __rcu *tnls_r_l[IP6_VTI_HASH_SIZE]; struct ip6_tnl __rcu *tnls_wc[1]; struct ip6_tnl __rcu **tnls[2]; }; @@ -1051,7 +1051,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) struct ip6_tnl *t; LIST_HEAD(list); - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { unregister_netdevice_queue(t->dev, &list); diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d64ee7e83664..75c1fc54f188 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -1739,6 +1739,15 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, continue; } + /* Based on RFC3810 6.1. Should not send source-list change + * records when there is a filter mode change. + */ + if (((gdeleted && pmc->mca_sfmode == MCAST_EXCLUDE) || + (!gdeleted && pmc->mca_crcount)) && + (type == MLD2_ALLOW_NEW_SOURCES || + type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) + goto decrease_sf_crcount; + /* clear marks on query responses */ if (isquery) psf->sf_gsresp = 0; @@ -1766,6 +1775,7 @@ static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, scount++; stotal++; if ((type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) && psf->sf_crcount) { +decrease_sf_crcount: psf->sf_crcount--; if ((sdeleted || gdeleted) && psf->sf_crcount == 0) { if (psf_prev) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index fe65cdc28a45..d8e671457d10 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -67,7 +67,6 @@ #include #include #include -#include #include #include @@ -457,11 +456,9 @@ static void ndisc_send_skb(struct sk_buff *skb, if (!dst) { struct flowi6 fl6; - int oif = l3mdev_fib_oif(skb->dev); + int oif = skb->dev->ifindex; icmpv6_flow_init(sk, &fl6, type, saddr, daddr, oif); - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; dst = icmp6_dst_alloc(skb->dev, &fl6); if (IS_ERR(dst)) { kfree_skb(skb); @@ -1538,7 +1535,6 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) int rd_len; u8 ha_buf[MAX_ADDR_LEN], *ha = NULL, ops_data_buf[NDISC_OPS_REDIRECT_DATA_SPACE], *ops_data = NULL; - int oif = l3mdev_fib_oif(dev); bool ret; if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) { @@ -1555,10 +1551,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) } icmpv6_flow_init(sk, &fl6, NDISC_REDIRECT, - &saddr_buf, &ipv6_hdr(skb)->saddr, oif); - - if (oif != skb->dev->ifindex) - fl6.flowi6_flags |= FLOWI_FLAG_L3MDEV_SRC; + &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex); dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 552fac2f390a..55aacea24396 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -190,7 +190,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 1aa5848764a7..963ee3848675 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -115,7 +115,7 @@ static unsigned int ipv6_helper(void *priv, help = nfct_help(ct); if (!help) return NF_ACCEPT; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (!helper) return NF_ACCEPT; diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 660bc10c7a9c..f5a61bc3ec2b 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -165,7 +165,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return -NF_ACCEPT; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ inproto = __nf_ct_l4proto_find(PF_INET6, origtuple.dst.protonum); /* Ordinarily, we'd expect the inverted tupleproto, but it's diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 8dd869642f45..57d86066a13b 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -30,7 +30,7 @@ static struct nf_loginfo default_loginfo = { .u = { .log = { .level = LOGLEVEL_NOTICE, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -52,7 +52,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; else - logflags = NF_LOG_MASK; + logflags = NF_LOG_DEFAULT_MASK; ih = skb_header_pointer(skb, ip6hoff, sizeof(_ip6h), &_ip6h); if (ih == NULL) { @@ -84,7 +84,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 48 "OPT (...) " */ - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, "OPT ( "); switch (currenthdr) { @@ -121,7 +121,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, case IPPROTO_ROUTING: case IPPROTO_HOPOPTS: if (fragment) { - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ")"); return; } @@ -129,7 +129,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, break; /* Max Length */ case IPPROTO_AH: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_auth_hdr _ahdr; const struct ip_auth_hdr *ah; @@ -161,7 +161,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, hdrlen = (hp->hdrlen+2)<<2; break; case IPPROTO_ESP: - if (logflags & XT_LOG_IPOPT) { + if (logflags & NF_LOG_IPOPT) { struct ip_esp_hdr _esph; const struct ip_esp_hdr *eh; @@ -194,7 +194,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, nf_log_buf_add(m, "Unknown Ext Hdr %u", currenthdr); return; } - if (logflags & XT_LOG_IPOPT) + if (logflags & NF_LOG_IPOPT) nf_log_buf_add(m, ") "); currenthdr = hp->nexthdr; @@ -277,7 +277,7 @@ static void dump_ipv6_packet(struct nf_log_buf *m, } /* Max length: 15 "UID=4294967295 " */ - if ((logflags & XT_LOG_UID) && recurse) + if ((logflags & NF_LOG_UID) && recurse) nf_log_dump_sk_uid_gid(m, skb->sk); /* Max length: 16 "MARK=0xFFFFFFFF " */ @@ -295,7 +295,7 @@ static void dump_ipv6_mac_header(struct nf_log_buf *m, if (info->type == NF_LOG_TYPE_LOG) logflags = info->u.log.logflags; - if (!(logflags & XT_LOG_MACDECODE)) + if (!(logflags & NF_LOG_MACDECODE)) goto fallback; switch (dev->type) { @@ -379,8 +379,7 @@ static struct nf_logger nf_ip6_logger __read_mostly = { static int __net_init nf_log_ipv6_net_init(struct net *net) { - nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); - return 0; + return nf_log_set(net, NFPROTO_IPV6, &nf_ip6_logger); } static void __net_exit nf_log_ipv6_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 30b22f4dff55..d6e4ba5de916 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -22,9 +22,7 @@ static unsigned int nft_do_chain_ipv6(void *priv, { struct nft_pktinfo pkt; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); return nft_do_chain(&pkt, priv); } @@ -102,7 +100,10 @@ static int __init nf_tables_ipv6_init(void) { int ret; - nft_register_chain_type(&filter_ipv6); + ret = nft_register_chain_type(&filter_ipv6); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_ipv6); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 2535223ba956..f2727475895e 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -33,9 +33,7 @@ static unsigned int nf_route_table_hook(void *priv, u32 mark, flowlabel; int err; - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, skb, state) < 0) - return NF_DROP; + nft_set_pktinfo_ipv6(&pkt, skb, state); /* save source/dest address, mark, hoplimit, flowlabel, priority */ memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 462f2a76b5c2..7cca8ac66fe9 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -148,6 +148,13 @@ int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 679253d0af84..cc8e3ae9ca73 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -30,6 +30,11 @@ #include #include +#define MAX4(a, b, c, d) \ + max_t(u32, max_t(u32, a, b), max_t(u32, c, d)) +#define SNMP_MIB_MAX MAX4(UDP_MIB_MAX, TCP_MIB_MAX, \ + IPSTATS_MIB_MAX, ICMP_MIB_MAX) + static int sockstat6_seq_show(struct seq_file *seq, void *v) { struct net *net = seq->private; @@ -191,25 +196,34 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { + unsigned long buff[SNMP_MIB_MAX]; int i; - unsigned long val; - for (i = 0; itemlist[i].name; i++) { - val = pcpumib ? - snmp_fold_field(pcpumib, itemlist[i].entry) : - atomic_long_read(smib + itemlist[i].entry); - seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, val); + if (pcpumib) { + memset(buff, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, itemlist, pcpumib); + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", + itemlist[i].name, buff[i]); + } else { + for (i = 0; itemlist[i].name; i++) + seq_printf(seq, "%-32s\t%lu\n", itemlist[i].name, + atomic_long_read(smib + itemlist[i].entry)); } } static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { + u64 buff64[SNMP_MIB_MAX]; int i; + memset(buff64, 0, sizeof(unsigned long) * SNMP_MIB_MAX); + + snmp_get_cpu_field64_batch(buff64, itemlist, mib, syncpoff); for (i = 0; itemlist[i].name; i++) - seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, - snmp_fold_field64(mib, itemlist[i].entry, syncpoff)); + seq_printf(seq, "%-32s\t%llu\n", itemlist[i].name, buff64[i]); } static int snmp6_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 590dd1f7746f..54404f08efcc 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -653,6 +653,13 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, if (err) goto error_fault; + /* if egress device is enslaved to an L3 master device pass the + * skb to its handler for processing + */ + skb = l3mdev_ip6_out(sk, skb); + if (unlikely(!skb)) + return 0; + IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb, NULL, rt->dst.dev, dst_output); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 269218aacbea..bdbc38e8bf29 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1147,15 +1147,16 @@ static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table * return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags); } -static struct dst_entry *ip6_route_input_lookup(struct net *net, - struct net_device *dev, - struct flowi6 *fl6, int flags) +struct dst_entry *ip6_route_input_lookup(struct net *net, + struct net_device *dev, + struct flowi6 *fl6, int flags) { if (rt6_need_strict(&fl6->daddr) && dev->type != ARPHRD_PIMREG) flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_input); } +EXPORT_SYMBOL_GPL(ip6_route_input_lookup); void ip6_route_input(struct sk_buff *skb) { @@ -1164,7 +1165,7 @@ void ip6_route_input(struct sk_buff *skb) int flags = RT6_LOOKUP_F_HAS_SADDR; struct ip_tunnel_info *tun_info; struct flowi6 fl6 = { - .flowi6_iif = l3mdev_fib_oif(skb->dev), + .flowi6_iif = skb->dev->ifindex, .daddr = iph->daddr, .saddr = iph->saddr, .flowlabel = ip6_flowinfo(iph), @@ -1188,12 +1189,15 @@ static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table struct dst_entry *ip6_route_output_flags(struct net *net, const struct sock *sk, struct flowi6 *fl6, int flags) { - struct dst_entry *dst; bool any_src; - dst = l3mdev_get_rt6_dst(net, fl6); - if (dst) - return dst; + if (rt6_need_strict(&fl6->daddr)) { + struct dst_entry *dst; + + dst = l3mdev_link_scope_lookup(net, fl6); + if (dst) + return dst; + } fl6->flowi6_iif = LOOPBACK_IFINDEX; @@ -1604,7 +1608,9 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) rcu_read_unlock(); out: - return min_t(unsigned int, mtu, IP6_MAX_MTU); + mtu = min_t(unsigned int, mtu, IP6_MAX_MTU); + + return mtu - lwtunnel_headroom(dst->lwtstate, mtu); } static struct dst_entry *icmp6_dst_gc_list; @@ -2565,8 +2571,16 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, - DST_NOCOUNT); + struct net_device *dev = net->loopback_dev; + struct rt6_info *rt; + + /* use L3 Master device as loopback for host routes if device + * is enslaved and address is not link local or multicast + */ + if (!rt6_need_strict(addr)) + dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; + + rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); @@ -3347,11 +3361,6 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh) } else { fl6.flowi6_oif = oif; - if (netif_index_is_l3_master(net, oif)) { - fl6.flowi6_flags = FLOWI_FLAG_L3MDEV_SRC | - FLOWI_FLAG_SKIP_NH_OIF; - } - rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 182b6a9be29d..b1cdf8009d29 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -62,7 +62,7 @@ For comments look at net/ipv4/ip_gre.c --ANK */ -#define HASH_SIZE 16 +#define IP6_SIT_HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) static bool log_ecn_error = true; @@ -78,9 +78,9 @@ static struct rtnl_link_ops sit_link_ops __read_mostly; static int sit_net_id __read_mostly; struct sit_net { - struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; - struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r_l[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_r[IP6_SIT_HASH_SIZE]; + struct ip_tunnel __rcu *tunnels_l[IP6_SIT_HASH_SIZE]; struct ip_tunnel __rcu *tunnels_wc[1]; struct ip_tunnel __rcu **tunnels[4]; @@ -1126,7 +1126,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, } #endif -bool ipip6_valid_ip_proto(u8 ipproto) +static bool ipip6_valid_ip_proto(u8 ipproto) { return ipproto == IPPROTO_IPV6 || ipproto == IPPROTO_IPIP || @@ -1783,7 +1783,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, for (prio = 1; prio < 4; prio++) { int h; - for (h = 0; h < HASH_SIZE; h++) { + for (h = 0; h < IP6_SIT_HASH_SIZE; h++) { struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 94f4f89d73e7..54cf7197c7ab 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -671,6 +671,7 @@ static bool tcp_v6_inbound_md5_hash(const struct sock *sk, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE); net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), @@ -817,12 +818,8 @@ static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 fl6.flowi6_proto = IPPROTO_TCP; if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = tcp_v6_iif(skb); - else { - if (!oif && netif_index_is_l3_master(net, skb->skb_iif)) - oif = skb->skb_iif; - - fl6.flowi6_oif = oif; - } + else + fl6.flowi6_oif = oif ? : skb->skb_iif; fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; @@ -1415,6 +1412,7 @@ process: sk = req->rsk_listener; tcp_v6_fill_cb(skb, hdr, th); if (tcp_v6_inbound_md5_hash(sk, skb)) { + sk_drops_add(sk, skb); reqsk_put(req); goto discard_it; } @@ -1471,10 +1469,7 @@ process: if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) ret = tcp_v6_do_rcv(sk, skb); - } else if (unlikely(sk_add_backlog(sk, skb, - sk->sk_rcvbuf + sk->sk_sndbuf))) { - bh_unlock_sock(sk); - __NET_INC_STATS(net, LINUX_MIB_TCPBACKLOGDROP); + } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } bh_unlock_sock(sk); @@ -1868,17 +1863,6 @@ void tcp6_proc_exit(struct net *net) } #endif -static void tcp_v6_clear_sk(struct sock *sk, int size) -{ - struct inet_sock *inet = inet_sk(sk); - - /* we do not want to clear pinet6 field, because of RCU lookups */ - sk_prot_clear_nulls(sk, offsetof(struct inet_sock, pinet6)); - - size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); - memset(&inet->pinet6 + 1, 0, size); -} - struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, @@ -1920,7 +1904,6 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif - .clear_sk = tcp_v6_clear_sk, .diag_destroy = tcp_abort, }; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 19ac3a1c308d..9aa7c1c7a9ce 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1424,17 +1424,6 @@ void udp6_proc_exit(struct net *net) } #endif /* CONFIG_PROC_FS */ -void udp_v6_clear_sk(struct sock *sk, int size) -{ - struct inet_sock *inet = inet_sk(sk); - - /* we do not want to clear pinet6 field, because of RCU lookups */ - sk_prot_clear_portaddr_nulls(sk, offsetof(struct inet_sock, pinet6)); - - size -= offsetof(struct inet_sock, pinet6) + sizeof(inet->pinet6); - memset(&inet->pinet6 + 1, 0, size); -} - /* ------------------------------------------------------------------------ */ struct proto udpv6_prot = { @@ -1465,7 +1454,7 @@ struct proto udpv6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = udp_v6_clear_sk, + .diag_destroy = udp_abort, }; static struct inet_protosw udpv6_protosw = { diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 0682c031ccdc..f6eb1ab34f4b 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -29,8 +29,6 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); -void udp_v6_clear_sk(struct sock *sk, int size); - #ifdef CONFIG_PROC_FS int udp6_seq_show(struct seq_file *seq, void *v); #endif diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index fd6ef414899b..47d0d2b87106 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -55,7 +55,6 @@ struct proto udplitev6_prot = { .compat_setsockopt = compat_udpv6_setsockopt, .compat_getsockopt = compat_udpv6_getsockopt, #endif - .clear_sk = udp_v6_clear_sk, }; static struct inet_protosw udplite6_protosw = { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 70a86adad875..e0f71c01d728 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -134,7 +134,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) nexthdr = nh[nhoff]; if (skb_dst(skb)) - oif = l3mdev_fib_oif(skb_dst(skb)->dev); + oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); fl6->flowi6_mark = skb->mark; diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ccc244406fb9..391c3cbd2eed 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -845,9 +845,6 @@ static int irda_accept(struct socket *sock, struct socket *newsock, int flags) if (sock->state != SS_UNCONNECTED) goto out; - if ((sk = sock->sk) == NULL) - goto out; - err = -EOPNOTSUPP; if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && (sk->sk_type != SOCK_DGRAM)) diff --git a/net/kcm/Kconfig b/net/kcm/Kconfig index 5db94d940ecc..87fca36e6c47 100644 --- a/net/kcm/Kconfig +++ b/net/kcm/Kconfig @@ -3,6 +3,7 @@ config AF_KCM tristate "KCM sockets" depends on INET select BPF_SYSCALL + select STREAM_PARSER ---help--- KCM (Kernel Connection Multiplexor) sockets provide a method for multiplexing messages of a message based application diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 16c2e03bd388..bf75c9231cca 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -155,8 +155,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->stats.rx_msgs, - psock->stats.rx_bytes, + psock->strp.stats.rx_msgs, + psock->strp.stats.rx_bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,14 +170,27 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->rx_stopped) + if (psock->strp.rx_stopped) seq_puts(seq, "RxStop "); if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); - if (psock->ready_rx_msg) - seq_puts(seq, "RdyRx "); + if (!psock->strp.rx_paused && !psock->ready_rx_msg) { + if (psock->sk->sk_receive_queue.qlen) { + if (psock->strp.rx_need_bytes) + seq_printf(seq, "RxWait=%u ", + psock->strp.rx_need_bytes); + else + seq_printf(seq, "RxWait "); + } + } else { + if (psock->strp.rx_paused) + seq_puts(seq, "RxPause "); + + if (psock->ready_rx_msg) + seq_puts(seq, "RdyRx "); + } seq_puts(seq, "\n"); } @@ -275,6 +288,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) { struct kcm_psock_stats psock_stats; struct kcm_mux_stats mux_stats; + struct strp_aggr_stats strp_stats; struct kcm_mux *mux; struct kcm_psock *psock; struct net *net = seq->private; @@ -282,20 +296,28 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) memset(&mux_stats, 0, sizeof(mux_stats)); memset(&psock_stats, 0, sizeof(psock_stats)); + memset(&strp_stats, 0, sizeof(strp_stats)); mutex_lock(&knet->mutex); aggregate_mux_stats(&knet->aggregate_mux_stats, &mux_stats); aggregate_psock_stats(&knet->aggregate_psock_stats, &psock_stats); + aggregate_strp_stats(&knet->aggregate_strp_stats, + &strp_stats); list_for_each_entry_rcu(mux, &knet->mux_list, kcm_mux_list) { spin_lock_bh(&mux->lock); aggregate_mux_stats(&mux->stats, &mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &psock_stats); - list_for_each_entry(psock, &mux->psocks, psock_list) + aggregate_strp_stats(&mux->aggregate_strp_stats, + &strp_stats); + list_for_each_entry(psock, &mux->psocks, psock_list) { aggregate_psock_stats(&psock->stats, &psock_stats); + save_strp_stats(&psock->strp, &strp_stats); + } + spin_unlock_bh(&mux->lock); } @@ -328,7 +350,7 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) mux_stats.rx_ready_drops); seq_printf(seq, - "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", + "%-8s %-10s %-16s %-10s %-16s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s %-10s\n", "Psock", "RX-Msgs", "RX-Bytes", @@ -337,6 +359,8 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "Reserved", "Unreserved", "RX-Aborts", + "RX-Intr", + "RX-Unrecov", "RX-MemFail", "RX-NeedMor", "RX-BadLen", @@ -345,20 +369,22 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) "TX-Aborts"); seq_printf(seq, - "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", + "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - psock_stats.rx_msgs, - psock_stats.rx_bytes, + strp_stats.rx_msgs, + strp_stats.rx_bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - psock_stats.rx_aborts, - psock_stats.rx_mem_fail, - psock_stats.rx_need_more_hdr, - psock_stats.rx_bad_hdr_len, - psock_stats.rx_msg_too_big, - psock_stats.rx_msg_timeouts, + strp_stats.rx_aborts, + strp_stats.rx_interrupted, + strp_stats.rx_unrecov_intr, + strp_stats.rx_mem_fail, + strp_stats.rx_need_more_hdr, + strp_stats.rx_bad_hdr_len, + strp_stats.rx_msg_too_big, + strp_stats.rx_msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index 411693288648..b7f869a85ab7 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1,3 +1,13 @@ +/* + * Kernel Connection Multiplexor + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + #include #include #include @@ -17,7 +27,6 @@ #include #include #include -#include #include unsigned int kcm_net_id; @@ -36,38 +45,12 @@ static inline struct kcm_tx_msg *kcm_tx_msg(struct sk_buff *skb) return (struct kcm_tx_msg *)skb->cb; } -static inline struct kcm_rx_msg *kcm_rx_msg(struct sk_buff *skb) -{ - return (struct kcm_rx_msg *)((void *)skb->cb + - offsetof(struct qdisc_skb_cb, data)); -} - static void report_csk_error(struct sock *csk, int err) { csk->sk_err = EPIPE; csk->sk_error_report(csk); } -/* Callback lock held */ -static void kcm_abort_rx_psock(struct kcm_psock *psock, int err, - struct sk_buff *skb) -{ - struct sock *csk = psock->sk; - - /* Unrecoverable error in receive */ - - del_timer(&psock->rx_msg_timer); - - if (psock->rx_stopped) - return; - - psock->rx_stopped = 1; - KCM_STATS_INCR(psock->stats.rx_aborts); - - /* Report an error on the lower socket */ - report_csk_error(csk, err); -} - static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, bool wakeup_kcm) { @@ -110,12 +93,13 @@ static void kcm_abort_tx_psock(struct kcm_psock *psock, int err, static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { - KCM_STATS_ADD(mux->stats.rx_bytes, - psock->stats.rx_bytes - psock->saved_rx_bytes); + STRP_STATS_ADD(mux->stats.rx_bytes, + psock->strp.stats.rx_bytes - + psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->stats.rx_msgs; - psock->saved_rx_bytes = psock->stats.rx_bytes; + psock->strp.stats.rx_msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.rx_msgs; + psock->saved_rx_bytes = psock->strp.stats.rx_bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -168,11 +152,11 @@ static void kcm_rcv_ready(struct kcm_sock *kcm) */ list_del(&psock->psock_ready_list); psock->ready_rx_msg = NULL; - /* Commit clearing of ready_rx_msg for queuing work */ smp_mb(); - queue_work(kcm_wq, &psock->rx_work); + strp_unpause(&psock->strp); + strp_check_rcv(&psock->strp); } /* Buffer limit is okay now, add to ready list */ @@ -286,6 +270,7 @@ static struct kcm_sock *reserve_rx_kcm(struct kcm_psock *psock, if (list_empty(&mux->kcm_rx_waiters)) { psock->ready_rx_msg = head; + strp_pause(&psock->strp); list_add_tail(&psock->psock_ready_list, &mux->psocks_ready); spin_unlock_bh(&mux->rx_lock); @@ -354,346 +339,60 @@ static void unreserve_rx_kcm(struct kcm_psock *psock, spin_unlock_bh(&mux->rx_lock); } -static void kcm_start_rx_timer(struct kcm_psock *psock) -{ - if (psock->sk->sk_rcvtimeo) - mod_timer(&psock->rx_msg_timer, psock->sk->sk_rcvtimeo); -} - -/* Macro to invoke filter function. */ -#define KCM_RUN_FILTER(prog, ctx) \ - (*prog->bpf_func)(ctx, prog->insnsi) - -/* Lower socket lock held */ -static int kcm_tcp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) -{ - struct kcm_psock *psock = (struct kcm_psock *)desc->arg.data; - struct kcm_rx_msg *rxm; - struct kcm_sock *kcm; - struct sk_buff *head, *skb; - size_t eaten = 0, cand_len; - ssize_t extra; - int err; - bool cloned_orig = false; - - if (psock->ready_rx_msg) - return 0; - - head = psock->rx_skb_head; - if (head) { - /* Message already in progress */ - - rxm = kcm_rx_msg(head); - if (unlikely(rxm->early_eaten)) { - /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they - * are consumed. - */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; - - return eaten; - } - - if (unlikely(orig_offset)) { - /* Getting data with a non-zero offset when a message is - * in progress is not expected. If it does happen, we - * need to clone and pull since we can't deal with - * offsets in the skbs for a message expect in the head. - */ - orig_skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!orig_skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - if (!pskb_pull(orig_skb, orig_offset)) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - kfree_skb(orig_skb); - desc->error = -ENOMEM; - return 0; - } - cloned_orig = true; - orig_offset = 0; - } - - if (!psock->rx_skb_nextp) { - /* We are going to append to the frags_list of head. - * Need to unshare the frag_list. - */ - err = skb_unclone(head, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - return 0; - } - - if (unlikely(skb_shinfo(head)->frag_list)) { - /* We can't append to an sk_buff that already - * has a frag_list. We create a new head, point - * the frag_list of that to the old head, and - * then are able to use the old head->next for - * appending to the message. - */ - if (WARN_ON(head->next)) { - desc->error = -EINVAL; - return 0; - } - - skb = alloc_skb(0, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - return 0; - } - skb->len = head->len; - skb->data_len = head->len; - skb->truesize = head->truesize; - *kcm_rx_msg(skb) = *kcm_rx_msg(head); - psock->rx_skb_nextp = &head->next; - skb_shinfo(skb)->frag_list = head; - psock->rx_skb_head = skb; - head = skb; - } else { - psock->rx_skb_nextp = - &skb_shinfo(head)->frag_list; - } - } - } - - while (eaten < orig_len) { - /* Always clone since we will consume something */ - skb = skb_clone(orig_skb, GFP_ATOMIC); - if (!skb) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = -ENOMEM; - break; - } - - cand_len = orig_len - eaten; - - head = psock->rx_skb_head; - if (!head) { - head = skb; - psock->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - psock->rx_skb_nextp = NULL; - rxm = kcm_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->offset = orig_offset + eaten; - } else { - /* Unclone since we may be appending to an skb that we - * already share a frag_list with. - */ - err = skb_unclone(skb, GFP_ATOMIC); - if (err) { - KCM_STATS_INCR(psock->stats.rx_mem_fail); - desc->error = err; - break; - } - - rxm = kcm_rx_msg(head); - *psock->rx_skb_nextp = skb; - psock->rx_skb_nextp = &skb->next; - head->data_len += skb->len; - head->len += skb->len; - head->truesize += skb->truesize; - } - - if (!rxm->full_len) { - ssize_t len; - - len = KCM_RUN_FILTER(psock->bpf_prog, head); - - if (!len) { - /* Need more header to determine length */ - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - rxm->accum_len += cand_len; - eaten += cand_len; - KCM_STATS_INCR(psock->stats.rx_need_more_hdr); - WARN_ON(eaten != orig_len); - break; - } else if (len > psock->sk->sk_rcvbuf) { - /* Message length exceeds maximum allowed */ - KCM_STATS_INCR(psock->stats.rx_msg_too_big); - desc->error = -EMSGSIZE; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EMSGSIZE, head); - break; - } else if (len <= (ssize_t)head->len - - skb->len - rxm->offset) { - /* Length must be into new skb (and also - * greater than zero) - */ - KCM_STATS_INCR(psock->stats.rx_bad_hdr_len); - desc->error = -EPROTO; - psock->rx_skb_head = NULL; - kcm_abort_rx_psock(psock, EPROTO, head); - break; - } - - rxm->full_len = len; - } - - extra = (ssize_t)(rxm->accum_len + cand_len) - rxm->full_len; - - if (extra < 0) { - /* Message not complete yet. */ - if (rxm->full_len - rxm->accum_len > - tcp_inq(psock->sk)) { - /* Don't have the whole messages in the socket - * buffer. Set psock->rx_need_bytes to wait for - * the rest of the message. Also, set "early - * eaten" since we've already buffered the skb - * but don't consume yet per tcp_read_sock. - */ - - if (!rxm->accum_len) { - /* Start RX timer for new message */ - kcm_start_rx_timer(psock); - } - - psock->rx_need_bytes = rxm->full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - KCM_STATS_ADD(psock->stats.rx_bytes, cand_len); - desc->count = 0; /* Stop reading socket */ - break; - } - rxm->accum_len += cand_len; - eaten += cand_len; - WARN_ON(eaten != orig_len); - break; - } - - /* Positive extra indicates ore bytes than needed for the - * message - */ - - WARN_ON(extra > cand_len); - - eaten += (cand_len - extra); - - /* Hurray, we have a new message! */ - del_timer(&psock->rx_msg_timer); - psock->rx_skb_head = NULL; - KCM_STATS_INCR(psock->stats.rx_msgs); - -try_queue: - kcm = reserve_rx_kcm(psock, head); - if (!kcm) { - /* Unable to reserve a KCM, message is held in psock. */ - break; - } - - if (kcm_queue_rcv_skb(&kcm->sk, head)) { - /* Should mean socket buffer full */ - unreserve_rx_kcm(psock, false); - goto try_queue; - } - } - - if (cloned_orig) - kfree_skb(orig_skb); - - KCM_STATS_ADD(psock->stats.rx_bytes, eaten); - - return eaten; -} - -/* Called with lock held on lower socket */ -static int psock_tcp_read_sock(struct kcm_psock *psock) -{ - read_descriptor_t desc; - - desc.arg.data = psock; - desc.error = 0; - desc.count = 1; /* give more than one skb per call */ - - /* sk should be locked here, so okay to do tcp_read_sock */ - tcp_read_sock(psock->sk, &desc, kcm_tcp_recv); - - unreserve_rx_kcm(psock, true); - - return desc.error; -} - /* Lower sock lock held */ -static void psock_tcp_data_ready(struct sock *sk) +static void psock_data_ready(struct sock *sk) { struct kcm_psock *psock; read_lock_bh(&sk->sk_callback_lock); psock = (struct kcm_psock *)sk->sk_user_data; - if (unlikely(!psock || psock->rx_stopped)) - goto out; + if (likely(psock)) + strp_data_ready(&psock->strp); - if (psock->ready_rx_msg) - goto out; - - if (psock->rx_need_bytes) { - if (tcp_inq(sk) >= psock->rx_need_bytes) - psock->rx_need_bytes = 0; - else - goto out; - } - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); - -out: read_unlock_bh(&sk->sk_callback_lock); } -static void do_psock_rx_work(struct kcm_psock *psock) +/* Called with lower sock held */ +static void kcm_rcv_strparser(struct strparser *strp, struct sk_buff *skb) { - read_descriptor_t rd_desc; - struct sock *csk = psock->sk; + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct kcm_sock *kcm; - /* We need the read lock to synchronize with psock_tcp_data_ready. We - * need the socket lock for calling tcp_read_sock. - */ - lock_sock(csk); - read_lock_bh(&csk->sk_callback_lock); +try_queue: + kcm = reserve_rx_kcm(psock, skb); + if (!kcm) { + /* Unable to reserve a KCM, message is held in psock and strp + * is paused. + */ + return; + } - if (unlikely(csk->sk_user_data != psock)) - goto out; - - if (unlikely(psock->rx_stopped)) - goto out; - - if (psock->ready_rx_msg) - goto out; - - rd_desc.arg.data = psock; - - if (psock_tcp_read_sock(psock) == -ENOMEM) - queue_delayed_work(kcm_wq, &psock->rx_delayed_work, 0); - -out: - read_unlock_bh(&csk->sk_callback_lock); - release_sock(csk); + if (kcm_queue_rcv_skb(&kcm->sk, skb)) { + /* Should mean socket buffer full */ + unreserve_rx_kcm(psock, false); + goto try_queue; + } } -static void psock_rx_work(struct work_struct *w) +static int kcm_parse_func_strparser(struct strparser *strp, struct sk_buff *skb) { - do_psock_rx_work(container_of(w, struct kcm_psock, rx_work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + struct bpf_prog *prog = psock->bpf_prog; + + return (*prog->bpf_func)(skb, prog->insnsi); } -static void psock_rx_delayed_work(struct work_struct *w) +static int kcm_read_sock_done(struct strparser *strp, int err) { - do_psock_rx_work(container_of(w, struct kcm_psock, - rx_delayed_work.work)); + struct kcm_psock *psock = container_of(strp, struct kcm_psock, strp); + + unreserve_rx_kcm(psock, true); + + return err; } -static void psock_tcp_state_change(struct sock *sk) +static void psock_state_change(struct sock *sk) { /* TCP only does a POLLIN for a half close. Do a POLLHUP here * since application will normally not poll with POLLIN @@ -703,7 +402,7 @@ static void psock_tcp_state_change(struct sock *sk) report_csk_error(sk, EPIPE); } -static void psock_tcp_write_space(struct sock *sk) +static void psock_write_space(struct sock *sk) { struct kcm_psock *psock; struct kcm_mux *mux; @@ -714,14 +413,13 @@ static void psock_tcp_write_space(struct sock *sk) psock = (struct kcm_psock *)sk->sk_user_data; if (unlikely(!psock)) goto out; - mux = psock->mux; spin_lock_bh(&mux->lock); /* Check if the socket is reserved so someone is waiting for sending. */ kcm = psock->tx_kcm; - if (kcm) + if (kcm && !unlikely(kcm->tx_stopped)) queue_work(kcm_wq, &kcm->tx_work); spin_unlock_bh(&mux->lock); @@ -1412,7 +1110,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int copied = 0; struct sk_buff *skb; @@ -1426,7 +1124,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1482,7 +1180,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct kcm_rx_msg *rxm; + struct strp_rx_msg *rxm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1499,7 +1197,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = kcm_rx_msg(skb); + rxm = strp_rx_msg(skb); if (len > rxm->full_len) len = rxm->full_len; @@ -1675,15 +1373,6 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux) spin_unlock_bh(&mux->rx_lock); } -static void kcm_rx_msg_timeout(unsigned long arg) -{ - struct kcm_psock *psock = (struct kcm_psock *)arg; - - /* Message assembly timed out */ - KCM_STATS_INCR(psock->stats.rx_msg_timeouts); - kcm_abort_rx_psock(psock, ETIMEDOUT, NULL); -} - static int kcm_attach(struct socket *sock, struct socket *csock, struct bpf_prog *prog) { @@ -1693,19 +1382,13 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct kcm_psock *psock = NULL, *tpsock; struct list_head *head; int index = 0; - - if (csock->ops->family != PF_INET && - csock->ops->family != PF_INET6) - return -EINVAL; + struct strp_callbacks cb; + int err; csk = csock->sk; if (!csk) return -EINVAL; - /* Only support TCP for now */ - if (csk->sk_protocol != IPPROTO_TCP) - return -EINVAL; - psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; @@ -1714,11 +1397,16 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - setup_timer(&psock->rx_msg_timer, kcm_rx_msg_timeout, - (unsigned long)psock); + cb.rcv_msg = kcm_rcv_strparser; + cb.abort_parser = NULL; + cb.parse_msg = kcm_parse_func_strparser; + cb.read_sock_done = kcm_read_sock_done; - INIT_WORK(&psock->rx_work, psock_rx_work); - INIT_DELAYED_WORK(&psock->rx_delayed_work, psock_rx_delayed_work); + err = strp_init(&psock->strp, csk, &cb); + if (err) { + kmem_cache_free(kcm_psockp, psock); + return err; + } sock_hold(csk); @@ -1727,9 +1415,9 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->save_write_space = csk->sk_write_space; psock->save_state_change = csk->sk_state_change; csk->sk_user_data = psock; - csk->sk_data_ready = psock_tcp_data_ready; - csk->sk_write_space = psock_tcp_write_space; - csk->sk_state_change = psock_tcp_state_change; + csk->sk_data_ready = psock_data_ready; + csk->sk_write_space = psock_write_space; + csk->sk_state_change = psock_state_change; write_unlock_bh(&csk->sk_callback_lock); /* Finished initialization, now add the psock to the MUX. */ @@ -1751,7 +1439,7 @@ static int kcm_attach(struct socket *sock, struct socket *csock, spin_unlock_bh(&mux->lock); /* Schedule RX work in case there are already bytes queued */ - queue_work(kcm_wq, &psock->rx_work); + strp_check_rcv(&psock->strp); return 0; } @@ -1791,6 +1479,8 @@ static void kcm_unattach(struct kcm_psock *psock) struct sock *csk = psock->sk; struct kcm_mux *mux = psock->mux; + lock_sock(csk); + /* Stop getting callbacks from TCP socket. After this there should * be no way to reserve a kcm for this psock. */ @@ -1799,7 +1489,7 @@ static void kcm_unattach(struct kcm_psock *psock) csk->sk_data_ready = psock->save_data_ready; csk->sk_write_space = psock->save_write_space; csk->sk_state_change = psock->save_state_change; - psock->rx_stopped = 1; + strp_stop(&psock->strp); if (WARN_ON(psock->rx_kcm)) { write_unlock_bh(&csk->sk_callback_lock); @@ -1822,18 +1512,17 @@ static void kcm_unattach(struct kcm_psock *psock) write_unlock_bh(&csk->sk_callback_lock); - del_timer_sync(&psock->rx_msg_timer); - cancel_work_sync(&psock->rx_work); - cancel_delayed_work_sync(&psock->rx_delayed_work); + /* Call strp_done without sock lock */ + release_sock(csk); + strp_done(&psock->strp); + lock_sock(csk); bpf_prog_put(psock->bpf_prog); - kfree_skb(psock->rx_skb_head); - psock->rx_skb_head = NULL; - spin_lock_bh(&mux->lock); aggregate_psock_stats(&psock->stats, &mux->aggregate_psock_stats); + save_strp_stats(&psock->strp, &mux->aggregate_strp_stats); KCM_STATS_INCR(mux->stats.psock_unattach); @@ -1876,6 +1565,8 @@ no_reserved: fput(csk->sk_socket->file); kmem_cache_free(kcm_psockp, psock); } + + release_sock(csk); } static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) @@ -1916,6 +1607,7 @@ static int kcm_unattach_ioctl(struct socket *sock, struct kcm_unattach *info) spin_unlock_bh(&mux->lock); + /* Lower socket lock should already be held */ kcm_unattach(psock); err = 0; @@ -2073,6 +1765,8 @@ static void release_mux(struct kcm_mux *mux) aggregate_mux_stats(&mux->stats, &knet->aggregate_mux_stats); aggregate_psock_stats(&mux->aggregate_psock_stats, &knet->aggregate_psock_stats); + aggregate_strp_stats(&mux->aggregate_strp_stats, + &knet->aggregate_strp_stats); list_del_rcu(&mux->kcm_mux_list); knet->count--; mutex_unlock(&knet->mutex); @@ -2152,6 +1846,13 @@ static int kcm_release(struct socket *sock) * it will just return. */ __skb_queue_purge(&sk->sk_write_queue); + + /* Set tx_stopped. This is checked when psock is bound to a kcm and we + * get a writespace callback. This prevents further work being queued + * from the callback (unbinding the psock occurs after canceling work. + */ + kcm->tx_stopped = 1; + release_sock(sk); spin_lock_bh(&mux->lock); diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index 5871537af387..2599af6378e4 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -139,7 +139,7 @@ struct l2tp_session { void (*session_close)(struct l2tp_session *session); void (*ref)(struct l2tp_session *session); void (*deref)(struct l2tp_session *session); -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) void (*show)(struct seq_file *m, void *priv); #endif uint8_t priv[0]; /* private data */ diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 57fc5a46ce06..965f7e344cef 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -121,7 +121,7 @@ static struct rtnl_link_stats64 *l2tp_eth_get_stats64(struct net_device *dev, } -static struct net_device_ops l2tp_eth_netdev_ops = { +static const struct net_device_ops l2tp_eth_netdev_ops = { .ndo_init = l2tp_eth_dev_init, .ndo_uninit = l2tp_eth_dev_uninit, .ndo_start_xmit = l2tp_eth_dev_xmit, @@ -195,7 +195,7 @@ static void l2tp_eth_delete(struct l2tp_session *session) } } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -268,7 +268,7 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; session->session_close = l2tp_eth_delete; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = l2tp_eth_show; #endif diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 1d02e8d20e56..bf3117771822 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -867,7 +867,7 @@ out: return skb->len; } -static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { +static const struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = { [L2TP_ATTR_NONE] = { .type = NLA_UNSPEC, }, [L2TP_ATTR_PW_TYPE] = { .type = NLA_U16, }, [L2TP_ATTR_ENCAP_TYPE] = { .type = NLA_U16, }, diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index 232cb92033e8..41d47bfda15c 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -177,7 +177,7 @@ static int pppol2tp_recv_payload_hook(struct sk_buff *skb) if (!pskb_may_pull(skb, 2)) return 1; - if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03)) + if ((skb->data[0] == PPP_ALLSTATIONS) && (skb->data[1] == PPP_UI)) skb_pull(skb, 2); return 0; @@ -282,7 +282,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session) static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) { - static const unsigned char ppph[2] = { 0xff, 0x03 }; struct sock *sk = sock->sk; struct sk_buff *skb; int error; @@ -312,7 +311,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, error = -ENOMEM; skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) + uhlen + session->hdr_len + - sizeof(ppph) + total_len, + 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 0, GFP_KERNEL); if (!skb) goto error_put_sess_tun; @@ -325,8 +324,8 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, skb_reserve(skb, uhlen); /* Add PPP header */ - skb->data[0] = ppph[0]; - skb->data[1] = ppph[1]; + skb->data[0] = PPP_ALLSTATIONS; + skb->data[1] = PPP_UI; skb_put(skb, 2); /* Copy user data into skb */ @@ -369,7 +368,6 @@ error: */ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { - static const u8 ppph[2] = { 0xff, 0x03 }; struct sock *sk = (struct sock *) chan->private; struct sock *sk_tun; struct l2tp_session *session; @@ -398,14 +396,14 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) sizeof(struct iphdr) + /* IP header */ uhlen + /* UDP header (if L2TP_ENCAPTYPE_UDP) */ session->hdr_len + /* L2TP header */ - sizeof(ppph); /* PPP header */ + 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ if (skb_cow_head(skb, headroom)) goto abort_put_sess_tun; /* Setup PPP header */ - __skb_push(skb, sizeof(ppph)); - skb->data[0] = ppph[0]; - skb->data[1] = ppph[1]; + __skb_push(skb, 2); + skb->data[0] = PPP_ALLSTATIONS; + skb->data[1] = PPP_UI; local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); @@ -440,7 +438,7 @@ static void pppol2tp_session_close(struct l2tp_session *session) BUG_ON(session->magic != L2TP_SESSION_MAGIC); if (sock) { - inet_shutdown(sock, 2); + inet_shutdown(sock, SEND_SHUTDOWN); /* Don't let the session go away before our socket does */ l2tp_session_inc_refcount(session); } @@ -554,7 +552,7 @@ out: return error; } -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; @@ -725,7 +723,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, session->recv_skb = pppol2tp_recv; session->session_close = pppol2tp_session_close; -#if defined(CONFIG_L2TP_DEBUGFS) || defined(CONFIG_L2TP_DEBUGFS_MODULE) +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) session->show = pppol2tp_show; #endif diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index c4a1c3e84e12..8da86ceca33d 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -100,15 +100,14 @@ u32 l3mdev_fib_table_by_index(struct net *net, int ifindex) EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); /** - * l3mdev_get_rt6_dst - IPv6 route lookup based on flow. Returns - * cached route for L3 master device if relevant - * to flow + * l3mdev_link_scope_lookup - IPv6 route lookup based on flow for link + * local and multicast addresses * @net: network namespace for device index lookup * @fl6: IPv6 flow struct for lookup */ -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - struct flowi6 *fl6) +struct dst_entry *l3mdev_link_scope_lookup(struct net *net, + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; @@ -121,70 +120,15 @@ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, dev = netdev_master_upper_dev_get_rcu(dev); if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_rt6_dst) - dst = dev->l3mdev_ops->l3mdev_get_rt6_dst(dev, fl6); + dev->l3mdev_ops->l3mdev_link_scope_lookup) + dst = dev->l3mdev_ops->l3mdev_link_scope_lookup(dev, fl6); rcu_read_unlock(); } return dst; } -EXPORT_SYMBOL_GPL(l3mdev_get_rt6_dst); - -/** - * l3mdev_get_saddr - get source address for a flow based on an interface - * enslaved to an L3 master device - * @net: network namespace for device index lookup - * @ifindex: Interface index - * @fl4: IPv4 flow struct - */ - -int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4) -{ - struct net_device *dev; - int rc = 0; - - if (ifindex) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr) - rc = dev->l3mdev_ops->l3mdev_get_saddr(dev, fl4); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr); - -int l3mdev_get_saddr6(struct net *net, const struct sock *sk, - struct flowi6 *fl6) -{ - struct net_device *dev; - int rc = 0; - - if (fl6->flowi6_oif) { - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, fl6->flowi6_oif); - if (dev && netif_is_l3_slave(dev)) - dev = netdev_master_upper_dev_get_rcu(dev); - - if (dev && netif_is_l3_master(dev) && - dev->l3mdev_ops->l3mdev_get_saddr6) - rc = dev->l3mdev_ops->l3mdev_get_saddr6(dev, sk, fl6); - - rcu_read_unlock(); - } - - return rc; -} -EXPORT_SYMBOL_GPL(l3mdev_get_saddr6); +EXPORT_SYMBOL_GPL(l3mdev_link_scope_lookup); /** * l3mdev_fib_rule_match - Determine if flowi references an @@ -222,3 +166,38 @@ out: return rc; } + +void l3mdev_update_flow(struct net *net, struct flowi *fl) +{ + struct net_device *dev; + int ifindex; + + rcu_read_lock(); + + if (fl->flowi_oif) { + dev = dev_get_by_index_rcu(net, fl->flowi_oif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_oif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + goto out; + } + } + } + + if (fl->flowi_iif) { + dev = dev_get_by_index_rcu(net, fl->flowi_iif); + if (dev) { + ifindex = l3mdev_master_ifindex_rcu(dev); + if (ifindex) { + fl->flowi_iif = ifindex; + fl->flowi_flags |= FLOWI_FLAG_SKIP_NH_OIF; + } + } + } + +out: + rcu_read_unlock(); +} +EXPORT_SYMBOL_GPL(l3mdev_update_flow); diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 8ae3ed97d95c..db916cf51ffe 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -38,7 +38,7 @@ static u16 llc_ui_sap_link_no_max[256]; static struct sockaddr_llc llc_ui_addrnull; static const struct proto_ops llc_ui_ops; -static long llc_ui_wait_for_conn(struct sock *sk, long timeout); +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout); static int llc_ui_wait_for_disc(struct sock *sk, long timeout); static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout); @@ -551,7 +551,7 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout) return rc; } -static long llc_ui_wait_for_conn(struct sock *sk, long timeout) +static bool llc_ui_wait_for_conn(struct sock *sk, long timeout) { DEFINE_WAIT(wait); diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index afa94687d5e1..f6749dced021 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -304,10 +304,13 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, buf_size = IEEE80211_MAX_AMPDU_BUF; /* make sure the size doesn't exceed the maximum supported by the hw */ - if (buf_size > local->hw.max_rx_aggregation_subframes) - buf_size = local->hw.max_rx_aggregation_subframes; + if (buf_size > sta->sta.max_rx_aggregation_subframes) + buf_size = sta->sta.max_rx_aggregation_subframes; params.buf_size = buf_size; + ht_dbg(sta->sdata, "AddBA Req buf_size=%d for %pM\n", + buf_size, sta->sta.addr); + /* examine state machine */ mutex_lock(&sta->ampdu_mlme.mtx); @@ -412,8 +415,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, } end: - if (status == WLAN_STATUS_SUCCESS) + if (status == WLAN_STATUS_SUCCESS) { __set_bit(tid, sta->ampdu_mlme.agg_session_valid); + __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); + } mutex_unlock(&sta->ampdu_mlme.mtx); end_no_lock: diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 543b1d4fc33d..fd6541f3ade3 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -3,6 +3,7 @@ * * Copyright 2006-2010 Johannes Berg * Copyright 2013-2015 Intel Mobile Communications GmbH + * Copyright (C) 2015-2016 Intel Deutschland GmbH * * This file is GPLv2 as found in COPYING. */ @@ -39,7 +40,7 @@ static struct wireless_dev *ieee80211_add_iface(struct wiphy *wiphy, if (type == NL80211_IFTYPE_MONITOR && flags) { sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } return wdev; @@ -73,8 +74,29 @@ static int ieee80211_change_iface(struct wiphy *wiphy, sdata->u.mgd.use_4addr = params->use_4addr; } - if (sdata->vif.type == NL80211_IFTYPE_MONITOR && flags) { + if (sdata->vif.type == NL80211_IFTYPE_MONITOR) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *monitor_sdata; + u32 mu_mntr_cap_flag = NL80211_EXT_FEATURE_MU_MIMO_AIR_SNIFFER; + + monitor_sdata = rtnl_dereference(local->monitor_sdata); + if (monitor_sdata && + wiphy_ext_feature_isset(wiphy, mu_mntr_cap_flag)) { + memcpy(monitor_sdata->vif.bss_conf.mu_group.membership, + params->vht_mumimo_groups, WLAN_MEMBERSHIP_LEN); + memcpy(monitor_sdata->vif.bss_conf.mu_group.position, + params->vht_mumimo_groups + WLAN_MEMBERSHIP_LEN, + WLAN_USER_POSITION_LEN); + monitor_sdata->vif.mu_mimo_owner = true; + ieee80211_bss_info_change_notify(monitor_sdata, + BSS_CHANGED_MU_GROUPS); + + ether_addr_copy(monitor_sdata->u.mntr.mu_follow_addr, + params->macaddr); + } + + if (!flags) + return 0; if (ieee80211_sdata_running(sdata)) { u32 mask = MONITOR_FLAG_COOK_FRAMES | @@ -89,11 +111,11 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * cooked_mntrs, monitor and all fif_* counters * reconfigure hardware */ - if ((*flags & mask) != (sdata->u.mntr_flags & mask)) + if ((*flags & mask) != (sdata->u.mntr.flags & mask)) return -EBUSY; ieee80211_adjust_monitor_flags(sdata, -1); - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; ieee80211_adjust_monitor_flags(sdata, 1); ieee80211_configure_filter(local); @@ -103,7 +125,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, * and ieee80211_do_open take care of "everything" * mentioned in the comment above. */ - sdata->u.mntr_flags = *flags; + sdata->u.mntr.flags = *flags; } } @@ -131,6 +153,149 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, ieee80211_sdata_stop(IEEE80211_WDEV_TO_SUB_IF(wdev)); } +static int ieee80211_start_nan(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + mutex_lock(&sdata->local->chanctx_mtx); + ret = ieee80211_check_combinations(sdata, NULL, 0, 0); + mutex_unlock(&sdata->local->chanctx_mtx); + if (ret < 0) + return ret; + + ret = ieee80211_do_open(wdev, true); + if (ret) + return ret; + + ret = drv_start_nan(sdata->local, sdata, conf); + if (ret) + ieee80211_sdata_stop(sdata); + + sdata->u.nan.conf = *conf; + + return ret; +} + +static void ieee80211_stop_nan(struct wiphy *wiphy, + struct wireless_dev *wdev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + drv_stop_nan(sdata->local, sdata); + ieee80211_sdata_stop(sdata); +} + +static int ieee80211_nan_change_conf(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_conf new_conf; + int ret = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + new_conf = sdata->u.nan.conf; + + if (changes & CFG80211_NAN_CONF_CHANGED_PREF) + new_conf.master_pref = conf->master_pref; + + if (changes & CFG80211_NAN_CONF_CHANGED_DUAL) + new_conf.dual = conf->dual; + + ret = drv_nan_change_conf(sdata->local, sdata, &new_conf, changes); + if (!ret) + sdata->u.nan.conf = new_conf; + + return ret; +} + +static int ieee80211_add_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + int ret; + + if (sdata->vif.type != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + spin_lock_bh(&sdata->u.nan.func_lock); + + ret = idr_alloc(&sdata->u.nan.function_inst_ids, + nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, + GFP_ATOMIC); + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (ret < 0) + return ret; + + nan_func->instance_id = ret; + + WARN_ON(nan_func->instance_id == 0); + + ret = drv_add_nan_func(sdata->local, sdata, nan_func); + if (ret) { + spin_lock_bh(&sdata->u.nan.func_lock); + idr_remove(&sdata->u.nan.function_inst_ids, + nan_func->instance_id); + spin_unlock_bh(&sdata->u.nan.func_lock); + } + + return ret; +} + +static struct cfg80211_nan_func * +ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, + u64 cookie) +{ + struct cfg80211_nan_func *func; + int id; + + lockdep_assert_held(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + if (func->cookie == cookie) + return func; + } + + return NULL; +} + +static void ieee80211_del_nan_func(struct wiphy *wiphy, + struct wireless_dev *wdev, u64 cookie) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + struct cfg80211_nan_func *func; + u8 instance_id = 0; + + if (sdata->vif.type != NL80211_IFTYPE_NAN || + !ieee80211_sdata_running(sdata)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = ieee80211_find_nan_func_by_cookie(sdata, cookie); + if (func) + instance_id = func->instance_id; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + if (instance_id) + drv_del_nan_func(sdata->local, sdata, instance_id); +} + static int ieee80211_set_noack_map(struct wiphy *wiphy, struct net_device *dev, u16 noack_map) @@ -236,6 +401,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -2015,6 +2181,7 @@ static int ieee80211_scan(struct wiphy *wiphy, !(req->flags & NL80211_SCAN_FLAG_AP))) return -EOPNOTSUPP; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -2940,10 +3107,6 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev, } chanctx = container_of(conf, struct ieee80211_chanctx, conf); - if (!chanctx) { - err = -EBUSY; - goto out; - } ch_switch.timestamp = 0; ch_switch.device_timestamp = 0; @@ -3360,6 +3523,63 @@ static int ieee80211_del_tx_ts(struct wiphy *wiphy, struct net_device *dev, return -ENOENT; } +void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + u64 cookie; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + + cookie = func->cookie; + idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_free_nan_func(func); + + cfg80211_nan_func_terminated(ieee80211_vif_to_wdev(vif), inst_id, + reason, cookie, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_terminated); + +void ieee80211_nan_func_match(struct ieee80211_vif *vif, + struct cfg80211_nan_match_params *match, + gfp_t gfp) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct cfg80211_nan_func *func; + + if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) + return; + + spin_lock_bh(&sdata->u.nan.func_lock); + + func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + if (WARN_ON(!func)) { + spin_unlock_bh(&sdata->u.nan.func_lock); + return; + } + match->cookie = func->cookie; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); +} +EXPORT_SYMBOL(ieee80211_nan_func_match); + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -3445,4 +3665,9 @@ const struct cfg80211_ops mac80211_config_ops = { .set_ap_chanwidth = ieee80211_set_ap_chanwidth, .add_tx_ts = ieee80211_add_tx_ts, .del_tx_ts = ieee80211_del_tx_ts, + .start_nan = ieee80211_start_nan, + .stop_nan = ieee80211_stop_nan, + .nan_change_conf = ieee80211_nan_change_conf, + .add_nan_func = ieee80211_add_nan_func, + .del_nan_func = ieee80211_del_nan_func, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 74142d07ad31..e75cbf6ecc26 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -274,6 +274,7 @@ ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, ieee80211_get_max_required_bw(sdata)); break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_WDS: @@ -646,6 +647,9 @@ static int ieee80211_assign_vif_chanctx(struct ieee80211_sub_if_data *sdata, struct ieee80211_chanctx *curr_ctx = NULL; int ret = 0; + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN)) + return -ENOTSUPP; + conf = rcu_dereference_protected(sdata->vif.chanctx_conf, lockdep_is_held(&local->chanctx_mtx)); @@ -718,6 +722,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: continue; case NL80211_IFTYPE_STATION: if (!sdata->u.mgd.associated) @@ -980,6 +985,7 @@ ieee80211_vif_chanctx_reservation_complete(struct ieee80211_sub_if_data *sdata) case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: case NUM_NL80211_IFTYPES: WARN_ON(1); break; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 2906c1004e1a..f56e2f487d09 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -71,138 +71,45 @@ DEBUGFS_READONLY_FILE(wep_iv, "%#08x", DEBUGFS_READONLY_FILE(rate_ctrl_alg, "%s", local->rate_ctrl ? local->rate_ctrl->ops->name : "hw/driver"); -struct aqm_info { - struct ieee80211_local *local; - size_t size; - size_t len; - unsigned char buf[0]; -}; - -#define AQM_HDR_LEN 200 -#define AQM_HW_ENTRY_LEN 40 -#define AQM_TXQ_ENTRY_LEN 110 - -static int aqm_open(struct inode *inode, struct file *file) -{ - struct ieee80211_local *local = inode->i_private; - struct ieee80211_sub_if_data *sdata; - struct sta_info *sta; - struct txq_info *txqi; - struct fq *fq = &local->fq; - struct aqm_info *info = NULL; - int len = 0; - int i; - - if (!local->ops->wake_tx_queue) - return -EOPNOTSUPP; - - len += AQM_HDR_LEN; - len += 6 * AQM_HW_ENTRY_LEN; - - rcu_read_lock(); - list_for_each_entry_rcu(sdata, &local->interfaces, list) - len += AQM_TXQ_ENTRY_LEN; - list_for_each_entry_rcu(sta, &local->sta_list, list) - len += AQM_TXQ_ENTRY_LEN * ARRAY_SIZE(sta->sta.txq); - rcu_read_unlock(); - - info = vmalloc(len); - if (!info) - return -ENOMEM; - - spin_lock_bh(&local->fq.lock); - rcu_read_lock(); - - file->private_data = info; - info->local = local; - info->size = len; - len = 0; - - len += scnprintf(info->buf + len, info->size - len, - "* hw\n" - "access name value\n" - "R fq_flows_cnt %u\n" - "R fq_backlog %u\n" - "R fq_overlimit %u\n" - "R fq_collisions %u\n" - "RW fq_limit %u\n" - "RW fq_quantum %u\n", - fq->flows_cnt, - fq->backlog, - fq->overlimit, - fq->collisions, - fq->limit, - fq->quantum); - - len += scnprintf(info->buf + len, - info->size - len, - "* vif\n" - "ifname addr ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sdata, &local->interfaces, list) { - txqi = to_txq_info(sdata->vif.txq); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %u %u %u %u %u %u %u %u\n", - sdata->name, - sdata->vif.addr, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - - len += scnprintf(info->buf + len, - info->size - len, - "* sta\n" - "ifname addr tid ac backlog-bytes backlog-packets flows overlimit collisions tx-bytes tx-packets\n"); - - list_for_each_entry_rcu(sta, &local->sta_list, list) { - sdata = sta->sdata; - for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - txqi = to_txq_info(sta->sta.txq[i]); - len += scnprintf(info->buf + len, info->size - len, - "%s %pM %d %d %u %u %u %u %u %u %u\n", - sdata->name, - sta->sta.addr, - txqi->txq.tid, - txqi->txq.ac, - txqi->tin.backlog_bytes, - txqi->tin.backlog_packets, - txqi->tin.flows, - txqi->tin.overlimit, - txqi->tin.collisions, - txqi->tin.tx_bytes, - txqi->tin.tx_packets); - } - } - - info->len = len; - - rcu_read_unlock(); - spin_unlock_bh(&local->fq.lock); - - return 0; -} - -static int aqm_release(struct inode *inode, struct file *file) -{ - vfree(file->private_data); - return 0; -} - static ssize_t aqm_read(struct file *file, char __user *user_buf, size_t count, loff_t *ppos) { - struct aqm_info *info = file->private_data; + struct ieee80211_local *local = file->private_data; + struct fq *fq = &local->fq; + char buf[200]; + int len = 0; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + len = scnprintf(buf, sizeof(buf), + "access name value\n" + "R fq_flows_cnt %u\n" + "R fq_backlog %u\n" + "R fq_overlimit %u\n" + "R fq_overmemory %u\n" + "R fq_collisions %u\n" + "R fq_memory_usage %u\n" + "RW fq_memory_limit %u\n" + "RW fq_limit %u\n" + "RW fq_quantum %u\n", + fq->flows_cnt, + fq->backlog, + fq->overmemory, + fq->overlimit, + fq->collisions, + fq->memory_usage, + fq->memory_limit, + fq->limit, + fq->quantum); + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); return simple_read_from_buffer(user_buf, count, ppos, - info->buf, info->len); + buf, len); } static ssize_t aqm_write(struct file *file, @@ -210,8 +117,7 @@ static ssize_t aqm_write(struct file *file, size_t count, loff_t *ppos) { - struct aqm_info *info = file->private_data; - struct ieee80211_local *local = info->local; + struct ieee80211_local *local = file->private_data; char buf[100]; size_t len; @@ -228,6 +134,8 @@ static ssize_t aqm_write(struct file *file, if (sscanf(buf, "fq_limit %u", &local->fq.limit) == 1) return count; + else if (sscanf(buf, "fq_memory_limit %u", &local->fq.memory_limit) == 1) + return count; else if (sscanf(buf, "fq_quantum %u", &local->fq.quantum) == 1) return count; @@ -237,8 +145,7 @@ static ssize_t aqm_write(struct file *file, static const struct file_operations aqm_ops = { .write = aqm_write, .read = aqm_read, - .open = aqm_open, - .release = aqm_release, + .open = simple_open, .llseek = default_llseek, }; @@ -302,6 +209,7 @@ static const char *hw_flag_names[] = { FLAG(USES_RSS), FLAG(TX_AMSDU), FLAG(TX_FRAG_LIST), + FLAG(REPORTS_LOW_ACK), #undef FLAG }; @@ -428,7 +336,9 @@ void debugfs_hw_add(struct ieee80211_local *local) DEBUGFS_ADD(hwflags); DEBUGFS_ADD(user_power); DEBUGFS_ADD(power); - DEBUGFS_ADD_MODE(aqm, 0600); + + if (local->ops->wake_tx_queue) + DEBUGFS_ADD_MODE(aqm, 0600); statsd = debugfs_create_dir("statistics", phyd); diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index a5ba739cd2a7..bcec1240f41d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -30,7 +30,7 @@ static ssize_t ieee80211_if_read( size_t count, loff_t *ppos, ssize_t (*format)(const struct ieee80211_sub_if_data *, char *, int)) { - char buf[70]; + char buf[200]; ssize_t ret = -EINVAL; read_lock(&dev_base_lock); @@ -486,6 +486,38 @@ static ssize_t ieee80211_if_fmt_num_buffered_multicast( } IEEE80211_IF_FILE_R(num_buffered_multicast); +static ssize_t ieee80211_if_fmt_aqm( + const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) +{ + struct ieee80211_local *local = sdata->local; + struct txq_info *txqi = to_txq_info(sdata->vif.txq); + int len; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + len = scnprintf(buf, + buflen, + "ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n" + "%u %u %u %u %u %u %u %u %u %u\n", + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + return len; +} +IEEE80211_IF_FILE_R(aqm); + /* IBSS attributes */ static ssize_t ieee80211_if_fmt_tsf( const struct ieee80211_sub_if_data *sdata, char *buf, int buflen) @@ -524,9 +556,15 @@ static ssize_t ieee80211_if_parse_tsf( ret = kstrtoull(buf, 10, &tsf); if (ret < 0) return ret; - if (tsf_is_delta) - tsf = drv_get_tsf(local, sdata) + tsf_is_delta * tsf; - if (local->ops->set_tsf) { + if (tsf_is_delta && local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsf_is_delta * tsf); + wiphy_info(local->hw.wiphy, + "debugfs offset TSF by %018lld\n", + tsf_is_delta * tsf); + } else if (local->ops->set_tsf) { + if (tsf_is_delta) + tsf = drv_get_tsf(local, sdata) + + tsf_is_delta * tsf; drv_set_tsf(local, sdata, tsf); wiphy_info(local->hw.wiphy, "debugfs set TSF to %#018llx\n", tsf); @@ -618,6 +656,9 @@ static void add_common_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_2ghz); DEBUGFS_ADD(rc_rateidx_vht_mcs_mask_5ghz); DEBUGFS_ADD(hw_queues); + + if (sdata->local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); } static void add_sta_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c index fd334133ff45..a2fcdb47a0e6 100644 --- a/net/mac80211/debugfs_sta.c +++ b/net/mac80211/debugfs_sta.c @@ -133,6 +133,55 @@ static ssize_t sta_last_seq_ctrl_read(struct file *file, char __user *userbuf, } STA_OPS(last_seq_ctrl); +#define AQM_TXQ_ENTRY_LEN 130 + +static ssize_t sta_aqm_read(struct file *file, char __user *userbuf, + size_t count, loff_t *ppos) +{ + struct sta_info *sta = file->private_data; + struct ieee80211_local *local = sta->local; + size_t bufsz = AQM_TXQ_ENTRY_LEN*(IEEE80211_NUM_TIDS+1); + char *buf = kzalloc(bufsz, GFP_KERNEL), *p = buf; + struct txq_info *txqi; + ssize_t rv; + int i; + + if (!buf) + return -ENOMEM; + + spin_lock_bh(&local->fq.lock); + rcu_read_lock(); + + p += scnprintf(p, + bufsz+buf-p, + "tid ac backlog-bytes backlog-packets new-flows drops marks overlimit collisions tx-bytes tx-packets\n"); + + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + txqi = to_txq_info(sta->sta.txq[i]); + p += scnprintf(p, bufsz+buf-p, + "%d %d %u %u %u %u %u %u %u %u %u\n", + txqi->txq.tid, + txqi->txq.ac, + txqi->tin.backlog_bytes, + txqi->tin.backlog_packets, + txqi->tin.flows, + txqi->cstats.drop_count, + txqi->cstats.ecn_mark, + txqi->tin.overlimit, + txqi->tin.collisions, + txqi->tin.tx_bytes, + txqi->tin.tx_packets); + } + + rcu_read_unlock(); + spin_unlock_bh(&local->fq.lock); + + rv = simple_read_from_buffer(userbuf, count, ppos, buf, p - buf); + kfree(buf); + return rv; +} +STA_OPS(aqm); + static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { @@ -478,6 +527,9 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) DEBUGFS_ADD_COUNTER(rx_fragments, rx_stats.fragments); DEBUGFS_ADD_COUNTER(tx_filtered, status_stats.filtered); + if (local->ops->wake_tx_queue) + DEBUGFS_ADD(aqm); + if (sizeof(sta->driver_buffered_tids) == sizeof(u32)) debugfs_create_x32("driver_buffered_tids", 0400, sta->debugfs_dir, @@ -492,10 +544,6 @@ void ieee80211_sta_debugfs_add(struct sta_info *sta) void ieee80211_sta_debugfs_remove(struct sta_info *sta) { - struct ieee80211_local *local = sta->local; - struct ieee80211_sub_if_data *sdata = sta->sdata; - - drv_sta_remove_debugfs(local, sdata, &sta->sta, sta->debugfs_dir); debugfs_remove_recursive(sta->debugfs_dir); sta->debugfs_dir = NULL; } diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index c258f1041d33..bb886e7db47f 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -62,7 +62,7 @@ int drv_add_interface(struct ieee80211_local *local, if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_AP_VLAN || (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, WANT_MONITOR_VIF) && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)))) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)))) return -EINVAL; trace_drv_add_interface(local, sdata); @@ -215,6 +215,21 @@ void drv_set_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset) +{ + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return; + + trace_drv_offset_tsf(local, sdata, offset); + if (local->ops->offset_tsf) + local->ops->offset_tsf(&local->hw, &sdata->vif, offset); + trace_drv_return_void(local); +} + void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata) { diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index ba5fc1f01e53..09f77e4a8a79 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -162,7 +162,9 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local, return; if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || - sdata->vif.type == NL80211_IFTYPE_MONITOR)) + sdata->vif.type == NL80211_IFTYPE_NAN || + (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !sdata->vif.mu_mimo_owner))) return; if (!check_sdata_in_driver(sdata)) @@ -498,21 +500,6 @@ static inline void drv_sta_add_debugfs(struct ieee80211_local *local, local->ops->sta_add_debugfs(&local->hw, &sdata->vif, sta, dir); } - -static inline void drv_sta_remove_debugfs(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct dentry *dir) -{ - might_sleep(); - - sdata = get_bss_sdata(sdata); - check_sdata_in_driver(sdata); - - if (local->ops->sta_remove_debugfs) - local->ops->sta_remove_debugfs(&local->hw, &sdata->vif, - sta, dir); -} #endif static inline void drv_sta_pre_rcu_remove(struct ieee80211_local *local, @@ -582,6 +569,9 @@ u64 drv_get_tsf(struct ieee80211_local *local, void drv_set_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, u64 tsf); +void drv_offset_tsf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset); void drv_reset_tsf(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata); @@ -1088,13 +1078,13 @@ static inline void drv_leave_ibss(struct ieee80211_local *local, } static inline u32 drv_get_expected_throughput(struct ieee80211_local *local, - struct ieee80211_sta *sta) + struct sta_info *sta) { u32 ret = 0; - trace_drv_get_expected_throughput(sta); - if (local->ops->get_expected_throughput) - ret = local->ops->get_expected_throughput(&local->hw, sta); + trace_drv_get_expected_throughput(&sta->sta); + if (local->ops->get_expected_throughput && sta->uploaded) + ret = local->ops->get_expected_throughput(&local->hw, &sta->sta); trace_drv_return_u32(local, ret); return ret; @@ -1179,4 +1169,83 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local, local->ops->wake_tx_queue(&local->hw, &txq->txq); } +static inline int drv_start_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_start_nan(local, sdata, conf); + ret = local->ops->start_nan(&local->hw, &sdata->vif, conf); + trace_drv_return_int(local, ret); + return ret; +} + +static inline void drv_stop_nan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_stop_nan(local, sdata); + local->ops->stop_nan(&local->hw, &sdata->vif); + trace_drv_return_void(local); +} + +static inline int drv_nan_change_conf(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_change_conf) + return -EOPNOTSUPP; + + trace_drv_nan_change_conf(local, sdata, conf, changes); + ret = local->ops->nan_change_conf(&local->hw, &sdata->vif, conf, + changes); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline int drv_add_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *nan_func) +{ + int ret; + + might_sleep(); + check_sdata_in_driver(sdata); + + if (!local->ops->add_nan_func) + return -EOPNOTSUPP; + + trace_drv_add_nan_func(local, sdata, nan_func); + ret = local->ops->add_nan_func(&local->hw, &sdata->vif, nan_func); + trace_drv_return_int(local, ret); + + return ret; +} + +static inline void drv_del_nan_func(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id) +{ + might_sleep(); + check_sdata_in_driver(sdata); + + trace_drv_del_nan_func(local, sdata, instance_id); + if (local->ops->del_nan_func) + local->ops->del_nan_func(&local->hw, &sdata->vif, instance_id); + trace_drv_return_void(local); +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f56d342c31b8..34c2add2c455 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -3,7 +3,7 @@ * Copyright 2005, Devicescape Software, Inc. * Copyright 2006-2007 Jiri Benc * Copyright 2007-2010 Johannes Berg - * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2013-2015 Intel Mobile Communications GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -86,6 +86,8 @@ struct ieee80211_local; #define IEEE80211_DEAUTH_FRAME_LEN (24 /* hdr */ + 2 /* reason */) +#define IEEE80211_MAX_NAN_INSTANCE_ID 255 + struct ieee80211_fragment_entry { struct sk_buff_head skb_list; unsigned long first_frag_time; @@ -813,17 +815,39 @@ enum txq_info_flags { * @def_flow: used as a fallback flow when a packet destined to @tin hashes to * a fq_flow which is already owned by a different tin * @def_cvars: codel vars for @def_flow + * @frags: used to keep fragments created after dequeue */ struct txq_info { struct fq_tin tin; struct fq_flow def_flow; struct codel_vars def_cvars; + struct codel_stats cstats; + struct sk_buff_head frags; unsigned long flags; /* keep last! */ struct ieee80211_txq txq; }; +struct ieee80211_if_mntr { + u32 flags; + u8 mu_follow_addr[ETH_ALEN] __aligned(2); +}; + +/** + * struct ieee80211_if_nan - NAN state + * + * @conf: current NAN configuration + * @func_ids: a bitmap of available instance_id's + */ +struct ieee80211_if_nan { + struct cfg80211_nan_conf conf; + + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -922,7 +946,8 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ibss ibss; struct ieee80211_if_mesh mesh; struct ieee80211_if_ocb ocb; - u32 mntr_flags; + struct ieee80211_if_mntr mntr; + struct ieee80211_if_nan nan; } u; #ifdef CONFIG_MAC80211_DEBUGFS @@ -1112,7 +1137,6 @@ struct ieee80211_local { struct fq fq; struct codel_vars *cvars; struct codel_params cparams; - struct codel_stats cstats; const struct ieee80211_ops *ops; @@ -1208,7 +1232,7 @@ struct ieee80211_local { spinlock_t tim_lock; unsigned long num_sta; struct list_head sta_list; - struct rhashtable sta_hash; + struct rhltable sta_hash; struct timer_list sta_cleanup; int sta_generation; @@ -1476,6 +1500,13 @@ static inline struct txq_info *to_txq_info(struct ieee80211_txq *txq) return container_of(txq, struct txq_info, txq); } +static inline bool txq_has_queue(struct ieee80211_txq *txq) +{ + struct txq_info *txqi = to_txq_info(txq); + + return !(skb_queue_empty(&txqi->frags) && !txqi->tin.backlog_packets); +} + static inline int ieee80211_bssid_match(const u8 *raddr, const u8 *addr) { return ether_addr_equal(raddr, addr) || diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index b123a9e325b3..638ec0759078 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -43,6 +43,8 @@ * by either the RTNL, the iflist_mtx or RCU. */ +static void ieee80211_iface_work(struct work_struct *work); + bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata) { struct ieee80211_chanctx_conf *chanctx_conf; @@ -188,7 +190,7 @@ static int ieee80211_verify_mac(struct ieee80211_sub_if_data *sdata, u8 *addr, continue; if (iter->vif.type == NL80211_IFTYPE_MONITOR && - !(iter->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(iter->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; m = iter->vif.addr; @@ -217,7 +219,7 @@ static int ieee80211_change_mac(struct net_device *dev, void *addr) return -EBUSY; if (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + !(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) check_dup = false; ret = ieee80211_verify_mac(sdata, sa->sa_data, check_dup); @@ -325,6 +327,9 @@ static int ieee80211_check_queues(struct ieee80211_sub_if_data *sdata, int n_queues = sdata->local->hw.queues; int i; + if (iftype == NL80211_IFTYPE_NAN) + return 0; + if (iftype != NL80211_IFTYPE_P2P_DEVICE) { for (i = 0; i < IEEE80211_NUM_ACS; i++) { if (WARN_ON_ONCE(sdata->vif.hw_queue[i] == @@ -357,7 +362,7 @@ void ieee80211_adjust_monitor_flags(struct ieee80211_sub_if_data *sdata, const int offset) { struct ieee80211_local *local = sdata->local; - u32 flags = sdata->u.mntr_flags; + u32 flags = sdata->u.mntr.flags; #define ADJUST(_f, _s) do { \ if (flags & MONITOR_FLAG_##_f) \ @@ -448,6 +453,9 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + skb_queue_head_init(&sdata->skb_queue); + INIT_WORK(&sdata->work, ieee80211_iface_work); + return 0; } @@ -540,6 +548,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -589,12 +598,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) } break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs++; break; } - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { res = drv_add_interface(local, sdata); if (res) goto err_stop; @@ -641,7 +650,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->fif_probe_req++; } - if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) changed |= ieee80211_reset_erp_info(sdata); ieee80211_bss_info_change_notify(sdata, changed); @@ -655,6 +665,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; default: /* not reached */ @@ -787,6 +798,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, struct ps_data *ps; struct cfg80211_chan_def chandef; bool cancel_scan; + struct cfg80211_nan_func *func; clear_bit(SDATA_STATE_RUNNING, &sdata->state); @@ -926,7 +938,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, /* no need to tell driver */ break; case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) { + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) { local->cooked_mntrs--; break; } @@ -939,6 +951,18 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_adjust_monitor_flags(sdata, -1); break; + case NL80211_IFTYPE_NAN: + /* clean all the functions */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { + idr_remove(&sdata->u.nan.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.function_inst_ids); + + spin_unlock_bh(&sdata->u.nan.func_lock); + break; case NL80211_IFTYPE_P2P_DEVICE: /* relies on synchronize_rcu() below */ RCU_INIT_POINTER(local->p2p_sdata, NULL); @@ -1012,7 +1036,7 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) break; /* fall through */ @@ -1444,12 +1468,17 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_MONITOR: sdata->dev->type = ARPHRD_IEEE80211_RADIOTAP; sdata->dev->netdev_ops = &ieee80211_monitorif_ops; - sdata->u.mntr_flags = MONITOR_FLAG_CONTROL | + sdata->u.mntr.flags = MONITOR_FLAG_CONTROL | MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_WDS: sdata->vif.bss_conf.bssid = NULL; break; + case NL80211_IFTYPE_NAN: + idr_init(&sdata->u.nan.function_inst_ids); + spin_lock_init(&sdata->u.nan.func_lock); + sdata->vif.bss_conf.bssid = sdata->vif.addr; + break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; @@ -1717,7 +1746,7 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ASSERT_RTNL(); - if (type == NL80211_IFTYPE_P2P_DEVICE) { + if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { struct wireless_dev *wdev; sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, diff --git a/net/mac80211/main.c b/net/mac80211/main.c index d00ea9b13f49..1075ac24c8c5 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -660,6 +660,9 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, ieee80211_roc_setup(local); + local->hw.radiotap_timestamp.units_pos = -1; + local->hw.radiotap_timestamp.accuracy = -1; + return &local->hw; err_free: wiphy_free(wiphy); @@ -818,6 +821,11 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) !local->ops->tdls_recv_channel_switch)) return -EOPNOTSUPP; + if (WARN_ON(local->hw.wiphy->interface_modes & + BIT(NL80211_IFTYPE_NAN) && + (!local->ops->start_nan || !local->ops->stop_nan))) + return -EINVAL; + #ifdef CONFIG_PM if (hw->wiphy->wowlan && (!local->ops->suspend || !local->ops->resume)) return -EINVAL; @@ -1055,6 +1063,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->dynamic_ps_forced_timeout = -1; + if (!local->hw.max_nan_de_entries) + local->hw.max_nan_de_entries = IEEE80211_MAX_NAN_INSTANCE_ID; + result = ieee80211_wep_init(local); if (result < 0) wiphy_debug(local->hw.wiphy, "Failed to initialize wep: %d\n", diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index faccef977670..b747c9645e43 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -326,22 +326,33 @@ static u32 airtime_link_metric_get(struct ieee80211_local *local, u32 tx_time, estimated_retx; u64 result; - if (sta->mesh->fail_avg >= 100) - return MAX_METRIC; + /* Try to get rate based on HW/SW RC algorithm. + * Rate is returned in units of Kbps, correct this + * to comply with airtime calculation units + * Round up in case we get rate < 100Kbps + */ + rate = DIV_ROUND_UP(sta_get_expected_throughput(sta), 100); - sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); - rate = cfg80211_calculate_bitrate(&rinfo); - if (WARN_ON(!rate)) - return MAX_METRIC; + if (rate) { + err = 0; + } else { + if (sta->mesh->fail_avg >= 100) + return MAX_METRIC; - err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + sta_set_rate_info_tx(sta, &sta->tx_stats.last_rate, &rinfo); + rate = cfg80211_calculate_bitrate(&rinfo); + if (WARN_ON(!rate)) + return MAX_METRIC; + + err = (sta->mesh->fail_avg << ARITH_SHIFT) / 100; + } /* bitrate is in units of 100 Kbps, while we need rate in units of * 1Mbps. This will be corrected on tx_time computation. */ tx_time = (device_constant + 10 * test_frame_len / rate); estimated_retx = ((1 << (2 * ARITH_SHIFT)) / (s_unit - err)); - result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT) ; + result = (tx_time * estimated_retx) >> (2 * ARITH_SHIFT); return (u32)result; } diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 64bc22ad9496..faca22cd02b5 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -28,7 +28,7 @@ * could be, for instance, in case a neighbor is restarted and its TSF counter * reset. */ -#define TOFFSET_MAXIMUM_ADJUSTMENT 30000 /* 30 ms */ +#define TOFFSET_MAXIMUM_ADJUSTMENT 800 /* 0.8 ms */ struct sync_method { u8 method; @@ -70,9 +70,13 @@ void mesh_sync_adjust_tbtt(struct ieee80211_sub_if_data *sdata) } spin_unlock_bh(&ifmsh->sync_offset_lock); - tsf = drv_get_tsf(local, sdata); - if (tsf != -1ULL) - drv_set_tsf(local, sdata, tsf + tsfdelta); + if (local->ops->offset_tsf) { + drv_offset_tsf(local, sdata, tsfdelta); + } else { + tsf = drv_get_tsf(local, sdata); + if (tsf != -1ULL) + drv_set_tsf(local, sdata, tsf + tsfdelta); + } } static void mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 8d426f637f58..7486f2dab4ba 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1672,11 +1672,15 @@ __ieee80211_sta_handle_tspec_ac_params(struct ieee80211_sub_if_data *sdata) non_acm_ac++) if (!(sdata->wmm_acm & BIT(7 - 2 * non_acm_ac))) break; - /* The loop will result in using BK even if it requires - * admission control, such configuration makes no sense - * and we have to transmit somehow - the AC selection - * does the same thing. + /* Usually the loop will result in using BK even if it + * requires admission control, but such a configuration + * makes no sense and we have to transmit somehow - the + * AC selection does the same thing. + * If we started out trying to downgrade from BK, then + * the extra condition here might be needed. */ + if (non_acm_ac >= IEEE80211_NUM_ACS) + non_acm_ac = IEEE80211_AC_BK; if (drv_conf_tx(local, sdata, ac, &sdata->tx_conf[non_acm_ac])) sdata_err(sdata, diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index 55a9c5b94ce1..c3f610bba3fe 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -128,7 +128,8 @@ void ieee80211_offchannel_stop_vifs(struct ieee80211_local *local) if (!ieee80211_sdata_running(sdata)) continue; - if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE) + if (sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN) continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR) @@ -838,6 +839,7 @@ int ieee80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev, case NL80211_IFTYPE_P2P_DEVICE: need_offchan = true; break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index 00a43a70e1fc..28a3a0957c9e 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -178,8 +178,7 @@ int __ieee80211_suspend(struct ieee80211_hw *hw, struct cfg80211_wowlan *wowlan) WARN_ON(!list_empty(&local->chanctx_list)); /* stop hardware - this must stop RX */ - if (local->open_count) - ieee80211_stop_device(local); + ieee80211_stop_device(local); suspend: local->suspended = true; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 9dce3b157908..6175db385ba7 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -180,6 +180,11 @@ ieee80211_rx_radiotap_hdrlen(struct ieee80211_local *local, len += 12; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + len = ALIGN(len, 8); + len += 12; + } + if (status->chains) { /* antenna and antenna signal fields */ len += 2 * hweight8(status->chains); @@ -447,6 +452,31 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, pos += 2; } + if (local->hw.radiotap_timestamp.units_pos >= 0) { + u16 accuracy = 0; + u8 flags = IEEE80211_RADIOTAP_TIMESTAMP_FLAG_32BIT; + + rthdr->it_present |= + cpu_to_le32(1 << IEEE80211_RADIOTAP_TIMESTAMP); + + /* ensure 8 byte alignment */ + while ((pos - (u8 *)rthdr) & 7) + pos++; + + put_unaligned_le64(status->device_timestamp, pos); + pos += sizeof(u64); + + if (local->hw.radiotap_timestamp.accuracy >= 0) { + accuracy = local->hw.radiotap_timestamp.accuracy; + flags |= IEEE80211_RADIOTAP_TIMESTAMP_FLAG_ACCURACY; + } + put_unaligned_le16(accuracy, pos); + pos += sizeof(u16); + + *pos++ = local->hw.radiotap_timestamp.units_pos; + *pos++ = flags; + } + for_each_set_bit(chain, &chains, IEEE80211_MAX_CHAINS) { *pos++ = status->chain_signal[chain]; *pos++ = chain; @@ -485,6 +515,9 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, struct net_device *prev_dev = NULL; int present_fcs_len = 0; unsigned int rtap_vendor_space = 0; + struct ieee80211_mgmt *mgmt; + struct ieee80211_sub_if_data *monitor_sdata = + rcu_dereference(local->monitor_sdata); if (unlikely(status->flag & RX_FLAG_RADIOTAP_VENDOR_DATA)) { struct ieee80211_vendor_radiotap *rtap = (void *)origskb->data; @@ -567,7 +600,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, if (sdata->vif.type != NL80211_IFTYPE_MONITOR) continue; - if (sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) + if (sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) continue; if (!ieee80211_sdata_running(sdata)) @@ -585,6 +618,23 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, ieee80211_rx_stats(sdata->dev, skb->len); } + mgmt = (void *)skb->data; + if (monitor_sdata && + skb->len >= IEEE80211_MIN_ACTION_SIZE + 1 + VHT_MUMIMO_GROUPS_DATA_LEN && + ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category == WLAN_CATEGORY_VHT && + mgmt->u.action.u.vht_group_notif.action_code == WLAN_VHT_ACTION_GROUPID_MGMT && + is_valid_ether_addr(monitor_sdata->u.mntr.mu_follow_addr) && + ether_addr_equal(mgmt->da, monitor_sdata->u.mntr.mu_follow_addr)) { + struct sk_buff *mu_skb = skb_copy(skb, GFP_ATOMIC); + + if (mu_skb) { + mu_skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&monitor_sdata->skb_queue, mu_skb); + ieee80211_queue_work(&local->hw, &monitor_sdata->work); + } + } + if (prev_dev) { skb->dev = prev_dev; netif_receive_skb(skb); @@ -1072,8 +1122,15 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); - if (!tid_agg_rx) + if (!tid_agg_rx) { + if (ack_policy == IEEE80211_QOS_CTL_ACK_POLICY_BLOCKACK && + !test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); goto dont_reorder; + } /* qos null data frames are excluded */ if (unlikely(hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_NULLFUNC))) @@ -1266,9 +1323,7 @@ static void sta_ps_start(struct sta_info *sta) return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - - if (txqi->tin.backlog_packets) + if (txq_has_queue(sta->sta.txq[tid])) set_bit(tid, &sta->txq_buffered_tids); else clear_bit(tid, &sta->txq_buffered_tids); @@ -2535,6 +2590,12 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) tid = le16_to_cpu(bar_data.control) >> 12; + if (!test_bit(tid, rx->sta->ampdu_mlme.agg_session_valid) && + !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) + ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, + WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_REQUIRE_SETUP); + tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) return RX_DROP_MONITOR; @@ -3147,7 +3208,7 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, continue; if (sdata->vif.type != NL80211_IFTYPE_MONITOR || - !(sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES)) + !(sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES)) continue; if (prev_dev) { @@ -3523,6 +3584,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) ieee80211_is_probe_req(hdr->frame_control) || ieee80211_is_probe_resp(hdr->frame_control) || ieee80211_is_beacon(hdr->frame_control); + case NL80211_IFTYPE_NAN: + /* Currently no frames on NAN interface are allowed */ + return false; default: break; } @@ -3940,7 +4004,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, __le16 fc; struct ieee80211_rx_data rx; struct ieee80211_sub_if_data *prev; - struct rhash_head *tmp; + struct rhlist_head *tmp; int err = 0; fc = ((struct ieee80211_hdr *)skb->data)->frame_control; @@ -3983,13 +4047,10 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, goto out; } else if (ieee80211_is_data(fc)) { struct sta_info *sta, *prev_sta; - const struct bucket_table *tbl; prev_sta = NULL; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr2, sta, tmp) { + for_each_sta_info(local, hdr->addr2, sta, tmp) { if (!prev_sta) { prev_sta = sta; continue; diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 070b40f15850..23d8ac829279 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -420,7 +420,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, { struct ieee80211_local *local = hw_to_local(hw); - trace_api_scan_completed(local, info); + trace_api_scan_completed(local, info->aborted); set_bit(SCAN_COMPLETED, &local->scanning); if (info->aborted) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aa58df80ede0..78e9ecbc96e6 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -67,12 +67,10 @@ static const struct rhashtable_params sta_rht_params = { .nelem_hint = 3, /* start small */ - .insecure_elasticity = true, /* Disable chain-length checks. */ .automatic_shrinking = true, .head_offset = offsetof(struct sta_info, hash_node), .key_offset = offsetof(struct sta_info, addr), .key_len = ETH_ALEN, - .hashfn = sta_addr_hash, .max_size = CONFIG_MAC80211_STA_HASH_MAX_SIZE, }; @@ -80,8 +78,8 @@ static const struct rhashtable_params sta_rht_params = { static int sta_info_hash_del(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_remove_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_remove(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void __cleanup_single_sta(struct sta_info *sta) @@ -157,19 +155,22 @@ static void cleanup_single_sta(struct sta_info *sta) sta_info_free(local, sta); } +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr) +{ + return rhltable_lookup(&local->sta_hash, addr, sta_rht_params); +} + /* protected by RCU */ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata) { rcu_read_unlock(); /* this is safe as the caller must already hold @@ -190,14 +191,11 @@ struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) { struct ieee80211_local *local = sdata->local; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; rcu_read_lock(); - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (sta->sdata == sdata || (sta->sdata->bss && sta->sdata->bss == sdata->bss)) { rcu_read_unlock(); @@ -263,8 +261,8 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta) static int sta_info_hash_add(struct ieee80211_local *local, struct sta_info *sta) { - return rhashtable_insert_fast(&local->sta_hash, &sta->hash_node, - sta_rht_params); + return rhltable_insert(&local->sta_hash, &sta->hash_node, + sta_rht_params); } static void sta_deliver_ps_frames(struct work_struct *wk) @@ -340,6 +338,9 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, memcpy(sta->addr, addr, ETH_ALEN); memcpy(sta->sta.addr, addr, ETH_ALEN); + sta->sta.max_rx_aggregation_subframes = + local->hw.max_rx_aggregation_subframes; + sta->local = local; sta->sdata = sdata; sta->rx_stats.last_rx = jiffies; @@ -450,9 +451,9 @@ static int sta_info_insert_check(struct sta_info *sta) is_multicast_ether_addr(sta->sta.addr))) return -EINVAL; - /* Strictly speaking this isn't necessary as we hold the mutex, but - * the rhashtable code can't really deal with that distinction. We - * do require the mutex for correctness though. + /* The RCU read lock is required by rhashtable due to + * asynchronous resize/rehash. We also require the mutex + * for correctness. */ rcu_read_lock(); lockdep_assert_held(&sdata->local->sta_mtx); @@ -687,7 +688,7 @@ static void __sta_info_recalc_tim(struct sta_info *sta, bool ignore_pending) } /* No need to do anything if the driver does all */ - if (ieee80211_hw_check(&local->hw, AP_LINK_PS)) + if (!local->ops->set_tim) return; if (sta->dead) @@ -1040,16 +1041,11 @@ static void sta_info_cleanup(unsigned long data) round_jiffies(jiffies + STA_INFO_CLEANUP_INTERVAL)); } -u32 sta_addr_hash(const void *key, u32 length, u32 seed) -{ - return jhash(key, ETH_ALEN, seed); -} - int sta_info_init(struct ieee80211_local *local) { int err; - err = rhashtable_init(&local->sta_hash, &sta_rht_params); + err = rhltable_init(&local->sta_hash, &sta_rht_params); if (err) return err; @@ -1065,7 +1061,7 @@ int sta_info_init(struct ieee80211_local *local) void sta_info_stop(struct ieee80211_local *local) { del_timer_sync(&local->sta_cleanup); - rhashtable_destroy(&local->sta_hash); + rhltable_destroy(&local->sta_hash); } @@ -1135,17 +1131,14 @@ struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, const u8 *localaddr) { struct ieee80211_local *local = hw_to_local(hw); + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; - const struct bucket_table *tbl; - - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); /* * Just return a random station if localaddr is NULL * ... first in list. */ - for_each_sta_info(local, tbl, addr, sta, tmp) { + for_each_sta_info(local, addr, sta, tmp) { if (localaddr && !ether_addr_equal(sta->sdata->vif.addr, localaddr)) continue; @@ -1209,12 +1202,10 @@ void ieee80211_sta_ps_deliver_wakeup(struct sta_info *sta) if (sta->sta.txq[0]) { for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[i]); - - if (!txqi->tin.backlog_packets) + if (!txq_has_queue(sta->sta.txq[i])) continue; - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, to_txq_info(sta->sta.txq[i])); } } @@ -1645,10 +1636,8 @@ ieee80211_sta_ps_deliver_response(struct sta_info *sta, return; for (tid = 0; tid < ARRAY_SIZE(sta->sta.txq); tid++) { - struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]); - if (!(driver_release_tids & BIT(tid)) || - txqi->tin.backlog_packets) + txq_has_queue(sta->sta.txq[tid])) continue; sta_info_recalc_tim(sta); @@ -2279,11 +2268,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) if (test_sta_flag(sta, WLAN_STA_TDLS_PEER)) sinfo->sta_flags.set |= BIT(NL80211_STA_FLAG_TDLS_PEER); - /* check if the driver has a SW RC implementation */ - if (ref && ref->ops->get_expected_throughput) - thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); - else - thr = drv_get_expected_throughput(local, &sta->sta); + thr = sta_get_expected_throughput(sta); if (thr != 0) { sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT); @@ -2291,6 +2276,25 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) } } +u32 sta_get_expected_throughput(struct sta_info *sta) +{ + struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_local *local = sdata->local; + struct rate_control_ref *ref = NULL; + u32 thr = 0; + + if (test_sta_flag(sta, WLAN_STA_RATE_CONTROL)) + ref = local->rate_ctrl; + + /* check if the driver has a SW RC implementation */ + if (ref && ref->ops->get_expected_throughput) + thr = ref->ops->get_expected_throughput(sta->rate_ctrl_priv); + else + thr = drv_get_expected_throughput(local, sta); + + return thr; +} + unsigned long ieee80211_sta_last_active(struct sta_info *sta) { struct ieee80211_sta_rx_stats *stats = sta_get_last_rx_stats(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 78b0ef32dddd..ed5fcb984a01 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -230,6 +230,8 @@ struct tid_ampdu_rx { * @tid_rx_stop_requested: bitmap indicating which BA sessions per TID the * driver requested to close until the work for it runs * @agg_session_valid: bitmap indicating which TID has a rx BA session open on + * @unexpected_agg: bitmap indicating which TID already sent a delBA due to + * unexpected aggregation related frames outside a session * @work: work struct for starting/stopping aggregation * @tid_tx: aggregation info for Tx per TID * @tid_start_tx: sessions where start was requested @@ -244,6 +246,7 @@ struct sta_ampdu_mlme { unsigned long tid_rx_timer_expired[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long tid_rx_stop_requested[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; unsigned long agg_session_valid[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; + unsigned long unexpected_agg[BITS_TO_LONGS(IEEE80211_NUM_TIDS)]; /* tx */ struct work_struct work; struct tid_ampdu_tx __rcu *tid_tx[IEEE80211_NUM_TIDS]; @@ -452,7 +455,7 @@ struct sta_info { /* General information, mostly static */ struct list_head list, free_list; struct rcu_head rcu_head; - struct rhash_head hash_node; + struct rhlist_head hash_node; u8 addr[ETH_ALEN]; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; @@ -635,6 +638,9 @@ rcu_dereference_protected_tid_tx(struct sta_info *sta, int tid) */ #define STA_INFO_CLEANUP_INTERVAL (10 * HZ) +struct rhlist_head *sta_info_hash_lookup(struct ieee80211_local *local, + const u8 *addr); + /* * Get a STA info, must be under RCU read lock. */ @@ -644,17 +650,9 @@ struct sta_info *sta_info_get(struct ieee80211_sub_if_data *sdata, struct sta_info *sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr); -u32 sta_addr_hash(const void *key, u32 length, u32 seed); - -#define _sta_bucket_idx(_tbl, _a) \ - rht_bucket_index(_tbl, sta_addr_hash(_a, ETH_ALEN, (_tbl)->hash_rnd)) - -#define for_each_sta_info(local, tbl, _addr, _sta, _tmp) \ - rht_for_each_entry_rcu(_sta, _tmp, tbl, \ - _sta_bucket_idx(tbl, _addr), \ - hash_node) \ - /* compare address and run code only if it matches */ \ - if (ether_addr_equal(_sta->addr, (_addr))) +#define for_each_sta_info(local, _addr, _sta, _tmp) \ + rhl_for_each_entry_rcu(_sta, _tmp, \ + sta_info_hash_lookup(local, _addr), hash_node) /* * Get STA info by index, BROKEN! @@ -712,6 +710,8 @@ void sta_set_rate_info_tx(struct sta_info *sta, struct rate_info *rinfo); void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo); +u32 sta_get_expected_throughput(struct sta_info *sta); + void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, unsigned long exp_time); u8 sta_info_tx_streams(struct sta_info *sta); diff --git a/net/mac80211/status.c b/net/mac80211/status.c index a2a68269675d..ddf71c648cab 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -557,6 +557,12 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, static void ieee80211_lost_packet(struct sta_info *sta, struct ieee80211_tx_info *info) { + /* If driver relies on its own algorithm for station kickout, skip + * mac80211 packet loss mechanism. + */ + if (ieee80211_hw_check(&sta->local->hw, REPORTS_LOW_ACK)) + return; + /* This packet was aggregated but doesn't carry status info */ if ((info->flags & IEEE80211_TX_CTL_AMPDU) && !(info->flags & IEEE80211_TX_STAT_AMPDU)) @@ -709,7 +715,7 @@ void ieee80211_tx_monitor(struct ieee80211_local *local, struct sk_buff *skb, if (!ieee80211_sdata_running(sdata)) continue; - if ((sdata->u.mntr_flags & MONITOR_FLAG_COOK_FRAMES) && + if ((sdata->u.mntr.flags & MONITOR_FLAG_COOK_FRAMES) && !send_to_cooked) continue; @@ -740,8 +746,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); __le16 fc; struct ieee80211_supported_band *sband; + struct rhlist_head *tmp; struct sta_info *sta; - struct rhash_head *tmp; int retry_count; int rates_idx; bool send_to_cooked; @@ -749,7 +755,6 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) struct ieee80211_bar *bar; int shift = 0; int tid = IEEE80211_NUM_TIDS; - const struct bucket_table *tbl; rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count); @@ -758,9 +763,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) sband = local->hw.wiphy->bands[info->band]; fc = hdr->frame_control; - tbl = rht_dereference_rcu(local->sta_hash.tbl, &local->sta_hash); - - for_each_sta_info(local, tbl, hdr->addr1, sta, tmp) { + for_each_sta_info(local, hdr->addr1, sta, tmp) { /* skip wrong virtual interface */ if (!ether_addr_equal(hdr->addr2, sta->sdata->vif.addr)) continue; diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 77e4c53baefb..92a47afaa989 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -984,6 +984,32 @@ TRACE_EVENT(drv_set_tsf, ) ); +TRACE_EVENT(drv_offset_tsf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + s64 offset), + + TP_ARGS(local, sdata, offset), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(s64, tsf_offset) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->tsf_offset = offset; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " tsf offset:%lld", + LOCAL_PR_ARG, VIF_PR_ARG, + (unsigned long long)__entry->tsf_offset + ) +); + DEFINE_EVENT(local_sdata_evt, drv_reset_tsf, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), @@ -1700,6 +1726,139 @@ TRACE_EVENT(drv_get_expected_throughput, ) ); +TRACE_EVENT(drv_start_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf), + + TP_ARGS(local, sdata, conf), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual + ) +); + +TRACE_EVENT(drv_stop_nan, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata), + + TP_ARGS(local, sdata), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT, + LOCAL_PR_ARG, VIF_PR_ARG + ) +); + +TRACE_EVENT(drv_nan_change_conf, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_conf *conf, + u32 changes), + + TP_ARGS(local, sdata, conf, changes), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, master_pref) + __field(u8, dual) + __field(u32, changes) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", master preference: %u, dual: %d, changes: 0x%x", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes + ) +); + +TRACE_EVENT(drv_add_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + const struct cfg80211_nan_func *func), + + TP_ARGS(local, sdata, func), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, type) + __field(u8, inst_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->type = func->type; + __entry->inst_id = func->instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", type: %u, inst_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->type, __entry->inst_id + ) +); + +TRACE_EVENT(drv_del_nan_func, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u8 instance_id), + + TP_ARGS(local, sdata, instance_id), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u8, instance_id) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->instance_id = instance_id; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT + ", instance_id: %u", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->instance_id + ) +); + /* * Tracing for API calls that drivers call. */ diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 18b285e06bc8..1c56abc49627 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -796,36 +796,6 @@ static __le16 ieee80211_tx_next_seq(struct sta_info *sta, int tid) return ret; } -static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, - struct ieee80211_vif *vif, - struct ieee80211_sta *pubsta, - struct sk_buff *skb) -{ - struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - struct ieee80211_txq *txq = NULL; - - if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || - (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) - return NULL; - - if (!ieee80211_is_data(hdr->frame_control)) - return NULL; - - if (pubsta) { - u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; - - txq = pubsta->txq[tid]; - } else if (vif) { - txq = vif->txq; - } - - if (!txq) - return NULL; - - return to_txq_info(txq); -} - static ieee80211_tx_result debug_noinline ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) { @@ -883,9 +853,7 @@ ieee80211_tx_h_sequence(struct ieee80211_tx_data *tx) tid = *qc & IEEE80211_QOS_CTL_TID_MASK; tx->sta->tx_stats.msdu[tid]++; - if (!ieee80211_get_txq(tx->local, info->control.vif, &tx->sta->sta, - tx->skb)) - hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); + hdr->seq_ctrl = ieee80211_tx_next_seq(tx->sta, tid); return TX_CONTINUE; } @@ -1274,6 +1242,36 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata, return TX_CONTINUE; } +static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, + struct ieee80211_vif *vif, + struct ieee80211_sta *pubsta, + struct sk_buff *skb) +{ + struct ieee80211_hdr *hdr = (struct ieee80211_hdr *) skb->data; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_txq *txq = NULL; + + if ((info->flags & IEEE80211_TX_CTL_SEND_AFTER_DTIM) || + (info->control.flags & IEEE80211_TX_CTRL_PS_RESPONSE)) + return NULL; + + if (!ieee80211_is_data(hdr->frame_control)) + return NULL; + + if (pubsta) { + u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; + + txq = pubsta->txq[tid]; + } else if (vif) { + txq = vif->txq; + } + + if (!txq) + return NULL; + + return to_txq_info(txq); +} + static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) { IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); @@ -1344,7 +1342,7 @@ static struct sk_buff *fq_tin_dequeue_func(struct fq *fq, local = container_of(fq, struct ieee80211_local, fq); txqi = container_of(tin, struct txq_info, tin); cparams = &local->cparams; - cstats = &local->cstats; + cstats = &txqi->cstats; if (flow == &txqi->def_flow) cvars = &txqi->def_cvars; @@ -1404,6 +1402,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, fq_tin_init(&txqi->tin); fq_flow_init(&txqi->def_flow); codel_vars_init(&txqi->def_cvars); + codel_stats_init(&txqi->cstats); + __skb_queue_head_init(&txqi->frags); txqi->txq.vif = &sdata->vif; @@ -1426,6 +1426,7 @@ void ieee80211_txq_purge(struct ieee80211_local *local, struct fq_tin *tin = &txqi->tin; fq_tin_reset(fq, tin, fq_skb_free_func); + ieee80211_purge_tx_queue(&local->hw, &txqi->frags); } int ieee80211_txq_setup_flows(struct ieee80211_local *local) @@ -1433,6 +1434,8 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) struct fq *fq = &local->fq; int ret; int i; + bool supp_vht = false; + enum nl80211_band band; if (!local->ops->wake_tx_queue) return 0; @@ -1441,8 +1444,24 @@ int ieee80211_txq_setup_flows(struct ieee80211_local *local) if (ret) return ret; + /* + * If the hardware doesn't support VHT, it is safe to limit the maximum + * queue size. 4 Mbytes is 64 max-size aggregates in 802.11n. + */ + for (band = 0; band < NUM_NL80211_BANDS; band++) { + struct ieee80211_supported_band *sband; + + sband = local->hw.wiphy->bands[band]; + if (!sband) + continue; + + supp_vht = supp_vht || sband->vht_cap.vht_supported; + } + + if (!supp_vht) + fq->memory_limit = 4 << 20; /* 4 Mbytes */ + codel_params_init(&local->cparams); - codel_stats_init(&local->cstats); local->cparams.interval = MS2TIME(100); local->cparams.target = MS2TIME(20); local->cparams.ecn = true; @@ -1477,54 +1496,46 @@ void ieee80211_txq_teardown_flows(struct ieee80211_local *local) spin_unlock_bh(&fq->lock); } -struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, - struct ieee80211_txq *txq) +static bool ieee80211_queue_skb(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, + struct sk_buff *skb) { - struct ieee80211_local *local = hw_to_local(hw); - struct txq_info *txqi = container_of(txq, struct txq_info, txq); - struct ieee80211_hdr *hdr; - struct sk_buff *skb = NULL; + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); struct fq *fq = &local->fq; - struct fq_tin *tin = &txqi->tin; + struct ieee80211_vif *vif; + struct txq_info *txqi; + struct ieee80211_sta *pubsta; + + if (!local->ops->wake_tx_queue || + sdata->vif.type == NL80211_IFTYPE_MONITOR) + return false; + + if (sta && sta->uploaded) + pubsta = &sta->sta; + else + pubsta = NULL; + + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + sdata = container_of(sdata->bss, + struct ieee80211_sub_if_data, u.ap); + + vif = &sdata->vif; + txqi = ieee80211_get_txq(local, vif, pubsta, skb); + + if (!txqi) + return false; + + info->control.vif = vif; spin_lock_bh(&fq->lock); - - if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) - goto out; - - skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); - if (!skb) - goto out; - - ieee80211_set_skb_vif(skb, txqi); - - hdr = (struct ieee80211_hdr *)skb->data; - if (txq->sta && ieee80211_is_data_qos(hdr->frame_control)) { - struct sta_info *sta = container_of(txq->sta, struct sta_info, - sta); - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); - - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, txq->tid); - if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags)) - info->flags |= IEEE80211_TX_CTL_AMPDU; - else - info->flags &= ~IEEE80211_TX_CTL_AMPDU; - } - -out: + ieee80211_txq_enqueue(local, txqi, skb); spin_unlock_bh(&fq->lock); - if (skb && skb_has_frag_list(skb) && - !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { - if (skb_linearize(skb)) { - ieee80211_free_txskb(&local->hw, skb); - return NULL; - } - } + drv_wake_tx_queue(local, txqi); - return skb; + return true; } -EXPORT_SYMBOL(ieee80211_tx_dequeue); static bool ieee80211_tx_frags(struct ieee80211_local *local, struct ieee80211_vif *vif, @@ -1533,9 +1544,7 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, bool txpending) { struct ieee80211_tx_control control = {}; - struct fq *fq = &local->fq; struct sk_buff *skb, *tmp; - struct txq_info *txqi; unsigned long flags; skb_queue_walk_safe(skbs, skb, tmp) { @@ -1550,21 +1559,6 @@ static bool ieee80211_tx_frags(struct ieee80211_local *local, } #endif - txqi = ieee80211_get_txq(local, vif, sta, skb); - if (txqi) { - info->control.vif = vif; - - __skb_unlink(skb, skbs); - - spin_lock_bh(&fq->lock); - ieee80211_txq_enqueue(local, txqi, skb); - spin_unlock_bh(&fq->lock); - - drv_wake_tx_queue(local, txqi); - - continue; - } - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); if (local->queue_stop_reasons[q] || (!txpending && !skb_queue_empty(&local->pending[q]))) { @@ -1648,7 +1642,7 @@ static bool __ieee80211_tx(struct ieee80211_local *local, switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE) { + if (sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { vif = &sdata->vif; break; } @@ -1685,10 +1679,13 @@ static bool __ieee80211_tx(struct ieee80211_local *local, /* * Invoke TX handlers, return 0 on success and non-zero if the * frame was dropped or queued. + * + * The handlers are split into an early and late part. The latter is everything + * that can be sensitive to reordering, and will be deferred to after packets + * are dequeued from the intermediate queues (when they are enabled). */ -static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +static int invoke_tx_handlers_early(struct ieee80211_tx_data *tx) { - struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); ieee80211_tx_result res = TX_DROP; #define CALL_TXH(txh) \ @@ -1706,6 +1703,31 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) if (!ieee80211_hw_check(&tx->local->hw, HAS_RATE_CONTROL)) CALL_TXH(ieee80211_tx_h_rate_ctrl); + txh_done: + if (unlikely(res == TX_DROP)) { + I802_DEBUG_INC(tx->local->tx_handlers_drop); + if (tx->skb) + ieee80211_free_txskb(&tx->local->hw, tx->skb); + else + ieee80211_purge_tx_queue(&tx->local->hw, &tx->skbs); + return -1; + } else if (unlikely(res == TX_QUEUED)) { + I802_DEBUG_INC(tx->local->tx_handlers_queued); + return -1; + } + + return 0; +} + +/* + * Late handlers can be called while the sta lock is held. Handlers that can + * cause packets to be generated will cause deadlock! + */ +static int invoke_tx_handlers_late(struct ieee80211_tx_data *tx) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx->skb); + ieee80211_tx_result res = TX_CONTINUE; + if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION)) { __skb_queue_tail(&tx->skbs, tx->skb); tx->skb = NULL; @@ -1738,6 +1760,15 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx) return 0; } +static int invoke_tx_handlers(struct ieee80211_tx_data *tx) +{ + int r = invoke_tx_handlers_early(tx); + + if (r) + return r; + return invoke_tx_handlers_late(tx); +} + bool ieee80211_tx_prepare_skb(struct ieee80211_hw *hw, struct ieee80211_vif *vif, struct sk_buff *skb, int band, struct ieee80211_sta **sta) @@ -1812,7 +1843,13 @@ static bool ieee80211_tx(struct ieee80211_sub_if_data *sdata, info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; - if (!invoke_tx_handlers(&tx)) + if (invoke_tx_handlers_early(&tx)) + return false; + + if (ieee80211_queue_skb(local, sdata, tx.sta, tx.skb)) + return true; + + if (!invoke_tx_handlers_late(&tx)) result = __ieee80211_tx(local, &tx.skbs, led_len, tx.sta, txpending); @@ -2268,15 +2305,9 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_STATION: if (sdata->wdev.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS) { sta = sta_info_get(sdata, skb->data); - if (sta) { - bool tdls_peer, tdls_auth; - - tdls_peer = test_sta_flag(sta, - WLAN_STA_TDLS_PEER); - tdls_auth = test_sta_flag(sta, - WLAN_STA_TDLS_PEER_AUTH); - - if (tdls_peer && tdls_auth) { + if (sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER)) { + if (test_sta_flag(sta, + WLAN_STA_TDLS_PEER_AUTH)) { *sta_out = sta; return 0; } @@ -2288,8 +2319,7 @@ static int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, * after a TDLS sta is removed due to being * unreachable. */ - if (tdls_peer && !tdls_auth && - !ieee80211_is_tdls_setup(skb)) + if (!ieee80211_is_tdls_setup(skb)) return -EINVAL; } @@ -2339,7 +2369,6 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, struct mesh_path __maybe_unused *mppath = NULL, *mpath = NULL; const u8 *encaps_data; int encaps_len, skip_header_bytes; - int nh_pos, h_pos; bool wme_sta = false, authorized = false; bool tdls_peer; bool multicast; @@ -2645,13 +2674,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, encaps_len = 0; } - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - head_need = hdrlen + encaps_len + meshhdrlen - skb_headroom(skb); /* @@ -2677,18 +2700,12 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } } - if (encaps_data) { + if (encaps_data) memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } #ifdef CONFIG_MAC80211_MESH - if (meshhdrlen > 0) { + if (meshhdrlen > 0) memcpy(skb_push(skb, meshhdrlen), &mesh_hdr, meshhdrlen); - nh_pos += meshhdrlen; - h_pos += meshhdrlen; - } #endif if (ieee80211_is_data_qos(fc)) { @@ -2704,15 +2721,7 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, } else memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); info = IEEE80211_SKB_CB(skb); memset(info, 0, sizeof(*info)); @@ -3184,8 +3193,71 @@ out: return ret; } +/* + * Can be called while the sta lock is held. Anything that can cause packets to + * be generated will cause deadlock! + */ +static void ieee80211_xmit_fast_finish(struct ieee80211_sub_if_data *sdata, + struct sta_info *sta, u8 pn_offs, + struct ieee80211_key *key, + struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_hdr *hdr = (void *)skb->data; + u8 tid = IEEE80211_NUM_TIDS; + + if (key) + info->control.hw_key = &key->conf; + + ieee80211_tx_stats(skb->dev, skb->len); + + if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { + tid = skb->priority & IEEE80211_QOS_CTL_TAG1D_MASK; + *ieee80211_get_qos_ctl(hdr) = tid; + hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); + } else { + info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; + hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); + sdata->sequence_number += 0x10; + } + + if (skb_shinfo(skb)->gso_size) + sta->tx_stats.msdu[tid] += + DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); + else + sta->tx_stats.msdu[tid]++; + + info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + + /* statistics normally done by ieee80211_tx_h_stats (but that + * has to consider fragmentation, so is more complex) + */ + sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; + sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + + if (pn_offs) { + u64 pn; + u8 *crypto_hdr = skb->data + pn_offs; + + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_CCMP_256: + case WLAN_CIPHER_SUITE_GCMP: + case WLAN_CIPHER_SUITE_GCMP_256: + pn = atomic64_inc_return(&key->conf.tx_pn); + crypto_hdr[0] = pn; + crypto_hdr[1] = pn >> 8; + crypto_hdr[4] = pn >> 16; + crypto_hdr[5] = pn >> 24; + crypto_hdr[6] = pn >> 32; + crypto_hdr[7] = pn >> 40; + break; + } + } +} + static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, - struct net_device *dev, struct sta_info *sta, + struct sta_info *sta, struct ieee80211_fast_tx *fast_tx, struct sk_buff *skb) { @@ -3236,8 +3308,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } - ieee80211_tx_stats(dev, skb->len + extra_head); - if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) && ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb)) return true; @@ -3266,24 +3336,7 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT | IEEE80211_TX_CTL_DONTFRAG | (tid_tx ? IEEE80211_TX_CTL_AMPDU : 0); - - if (hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) { - *ieee80211_get_qos_ctl(hdr) = tid; - if (!ieee80211_get_txq(local, &sdata->vif, &sta->sta, skb)) - hdr->seq_ctrl = ieee80211_tx_next_seq(sta, tid); - } else { - info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ; - hdr->seq_ctrl = cpu_to_le16(sdata->sequence_number); - sdata->sequence_number += 0x10; - } - - if (skb_shinfo(skb)->gso_size) - sta->tx_stats.msdu[tid] += - DIV_ROUND_UP(skb->len, skb_shinfo(skb)->gso_size); - else - sta->tx_stats.msdu[tid]++; - - info->hw_queue = sdata->vif.hw_queue[skb_get_queue_mapping(skb)]; + info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT; __skb_queue_head_init(&tx.skbs); @@ -3293,9 +3346,6 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, tx.sta = sta; tx.key = fast_tx->key; - if (fast_tx->key) - info->control.hw_key = &fast_tx->key->conf; - if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { tx.skb = skb; r = ieee80211_tx_h_rate_ctrl(&tx); @@ -3309,31 +3359,11 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, } } - /* statistics normally done by ieee80211_tx_h_stats (but that - * has to consider fragmentation, so is more complex) - */ - sta->tx_stats.bytes[skb_get_queue_mapping(skb)] += skb->len; - sta->tx_stats.packets[skb_get_queue_mapping(skb)]++; + if (ieee80211_queue_skb(local, sdata, sta, skb)) + return true; - if (fast_tx->pn_offs) { - u64 pn; - u8 *crypto_hdr = skb->data + fast_tx->pn_offs; - - switch (fast_tx->key->conf.cipher) { - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_CCMP_256: - case WLAN_CIPHER_SUITE_GCMP: - case WLAN_CIPHER_SUITE_GCMP_256: - pn = atomic64_inc_return(&fast_tx->key->conf.tx_pn); - crypto_hdr[0] = pn; - crypto_hdr[1] = pn >> 8; - crypto_hdr[4] = pn >> 16; - crypto_hdr[5] = pn >> 24; - crypto_hdr[6] = pn >> 32; - crypto_hdr[7] = pn >> 40; - break; - } - } + ieee80211_xmit_fast_finish(sdata, sta, fast_tx->pn_offs, + fast_tx->key, skb); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -3344,6 +3374,94 @@ static bool ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata, return true; } +struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, + struct ieee80211_txq *txq) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct txq_info *txqi = container_of(txq, struct txq_info, txq); + struct ieee80211_hdr *hdr; + struct sk_buff *skb = NULL; + struct fq *fq = &local->fq; + struct fq_tin *tin = &txqi->tin; + struct ieee80211_tx_info *info; + struct ieee80211_tx_data tx; + ieee80211_tx_result r; + + spin_lock_bh(&fq->lock); + + if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags)) + goto out; + + /* Make sure fragments stay together. */ + skb = __skb_dequeue(&txqi->frags); + if (skb) + goto out; + +begin: + skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func); + if (!skb) + goto out; + + ieee80211_set_skb_vif(skb, txqi); + + hdr = (struct ieee80211_hdr *)skb->data; + info = IEEE80211_SKB_CB(skb); + + memset(&tx, 0, sizeof(tx)); + __skb_queue_head_init(&tx.skbs); + tx.local = local; + tx.skb = skb; + tx.sdata = vif_to_sdata(info->control.vif); + + if (txq->sta) + tx.sta = container_of(txq->sta, struct sta_info, sta); + + /* + * The key can be removed while the packet was queued, so need to call + * this here to get the current key. + */ + r = ieee80211_tx_h_select_key(&tx); + if (r != TX_CONTINUE) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + + if (info->control.flags & IEEE80211_TX_CTRL_FAST_XMIT) { + struct sta_info *sta = container_of(txq->sta, struct sta_info, + sta); + u8 pn_offs = 0; + + if (tx.key && + (tx.key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV)) + pn_offs = ieee80211_hdrlen(hdr->frame_control); + + ieee80211_xmit_fast_finish(sta->sdata, sta, pn_offs, + tx.key, skb); + } else { + if (invoke_tx_handlers_late(&tx)) + goto begin; + + skb = __skb_dequeue(&tx.skbs); + + if (!skb_queue_empty(&tx.skbs)) + skb_queue_splice_tail(&tx.skbs, &txqi->frags); + } + + if (skb && skb_has_frag_list(skb) && + !ieee80211_hw_check(&local->hw, TX_FRAG_LIST)) { + if (skb_linearize(skb)) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } + } + +out: + spin_unlock_bh(&fq->lock); + + return skb; +} +EXPORT_SYMBOL(ieee80211_tx_dequeue); + void __ieee80211_subif_start_xmit(struct sk_buff *skb, struct net_device *dev, u32 info_flags) @@ -3368,7 +3486,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb, fast_tx = rcu_dereference(sta->fast_tx); if (fast_tx && - ieee80211_xmit_fast(sdata, dev, sta, fast_tx, skb)) + ieee80211_xmit_fast(sdata, sta, fast_tx, skb)) goto out; } @@ -4395,9 +4513,6 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata, int ac = ieee802_1d_to_ac[tid & 7]; skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb_set_queue_mapping(skb, ac); skb->priority = tid; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 42bf0b6685e8..545c79a42a77 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -598,7 +598,7 @@ static void __iterate_interfaces(struct ieee80211_local *local, list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { case NL80211_IFTYPE_MONITOR: - if (!(sdata->u.mntr_flags & MONITOR_FLAG_ACTIVE)) + if (!(sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE)) continue; break; case NL80211_IFTYPE_AP_VLAN: @@ -1209,7 +1209,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata, } if (sdata->vif.type != NL80211_IFTYPE_MONITOR && - sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE) { + sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE && + sdata->vif.type != NL80211_IFTYPE_NAN) { sdata->vif.bss_conf.qos = enable_qos; if (bss_notify) ieee80211_bss_info_change_notify(sdata, @@ -1748,6 +1749,46 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) mutex_unlock(&local->sta_mtx); } +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + struct cfg80211_nan_func *func, **funcs; + int res, id, i = 0; + + res = drv_start_nan(sdata->local, sdata, + &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + funcs = kzalloc((sdata->local->hw.max_nan_de_entries + 1) * + sizeof(*funcs), GFP_KERNEL); + if (!funcs) + return -ENOMEM; + + /* Add all the functions: + * This is a little bit ugly. We need to call a potentially sleeping + * callback for each NAN function, so we can't hold the spinlock. + */ + spin_lock_bh(&sdata->u.nan.func_lock); + + idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + funcs[i++] = func; + + spin_unlock_bh(&sdata->u.nan.func_lock); + + for (i = 0; funcs[i]; i++) { + res = drv_add_nan_func(sdata->local, sdata, funcs[i]); + if (WARN_ON(res)) + ieee80211_nan_func_terminated(&sdata->vif, + funcs[i]->instance_id, + NL80211_NAN_FUNC_TERM_REASON_ERROR, + GFP_KERNEL); + } + + kfree(funcs); + + return 0; +} + int ieee80211_reconfig(struct ieee80211_local *local) { struct ieee80211_hw *hw = &local->hw; @@ -1971,6 +2012,13 @@ int ieee80211_reconfig(struct ieee80211_local *local) ieee80211_bss_info_change_notify(sdata, changed); } break; + case NL80211_IFTYPE_NAN: + res = ieee80211_reconfig_nan(sdata); + if (res < 0) { + ieee80211_handle_reconfig_failure(local); + return res; + } + break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: @@ -2555,7 +2603,6 @@ int ieee80211_add_srates_ie(struct ieee80211_sub_if_data *sdata, if (need_basic && basic_rates & BIT(i)) basic = 0x80; - rate = sband->bitrates[i].bitrate; rate = DIV_ROUND_UP(sband->bitrates[i].bitrate, 5 * (1 << shift)); *pos++ = basic | (u8) rate; @@ -3394,11 +3441,18 @@ void ieee80211_txq_get_depth(struct ieee80211_txq *txq, unsigned long *byte_cnt) { struct txq_info *txqi = to_txq_info(txq); + u32 frag_cnt = 0, frag_bytes = 0; + struct sk_buff *skb; + + skb_queue_walk(&txqi->frags, skb) { + frag_cnt++; + frag_bytes += skb->len; + } if (frame_cnt) - *frame_cnt = txqi->tin.backlog_packets; + *frame_cnt = txqi->tin.backlog_packets + frag_cnt; if (byte_cnt) - *byte_cnt = txqi->tin.backlog_bytes; + *byte_cnt = txqi->tin.backlog_bytes + frag_bytes; } EXPORT_SYMBOL(ieee80211_txq_get_depth); diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c index 7079cd32a7ad..06019dba4b10 100644 --- a/net/mac802154/iface.c +++ b/net/mac802154/iface.c @@ -663,6 +663,7 @@ ieee802154_if_add(struct ieee802154_local *local, const char *name, /* TODO check this */ SET_NETDEV_DEV(ndev, &local->phy->dev); + dev_net_set(ndev, wpan_phy_net(local->hw.phy)); sdata = netdev_priv(ndev); ndev->ieee802154_ptr = &sdata->wpan_dev; memcpy(sdata->name, ndev->name, IFNAMSIZ); diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c index 446e1300383e..4dcf6e18563a 100644 --- a/net/mac802154/rx.c +++ b/net/mac802154/rx.c @@ -101,11 +101,16 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata, sdata->dev->stats.rx_bytes += skb->len; switch (mac_cb(skb)->type) { + case IEEE802154_FC_TYPE_BEACON: + case IEEE802154_FC_TYPE_ACK: + case IEEE802154_FC_TYPE_MAC_CMD: + goto fail; + case IEEE802154_FC_TYPE_DATA: return ieee802154_deliver_skb(skb); default: - pr_warn("ieee802154: bad frame received (type = %d)\n", - mac_cb(skb)->type); + pr_warn_ratelimited("ieee802154: bad frame received " + "(type = %d)\n", mac_cb(skb)->type); goto fail; } diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 5c161e7759b5..0e4334cbde17 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -961,9 +961,6 @@ static void mpls_ifdown(struct net_device *dev, int event) RCU_INIT_POINTER(nh->nh_dev, NULL); } endfor_nexthops(rt); } - - - return; } static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) @@ -997,8 +994,6 @@ static void mpls_ifup(struct net_device *dev, unsigned int nh_flags) ACCESS_ONCE(rt->rt_nhn_alive) = alive; } - - return; } static int mpls_dev_notify(struct notifier_block *this, unsigned long event, diff --git a/net/mpls/internal.h b/net/mpls/internal.h index 732a5c17e986..bdfef6c3271a 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,9 +1,6 @@ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H - -struct mpls_shim_hdr { - __be32 label_stack_entry; -}; +#include struct mpls_entry_decoded { u32 label; @@ -93,11 +90,6 @@ struct mpls_route { /* next hop label forwarding entry */ #define endfor_nexthops(rt) } -static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb) -{ - return (struct mpls_shim_hdr *)skb_network_header(skb); -} - static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos) { struct mpls_shim_hdr result; diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c index 2055e57ed1c3..b4da6d8e8632 100644 --- a/net/mpls/mpls_gso.c +++ b/net/mpls/mpls_gso.c @@ -23,32 +23,50 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb, netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); + u16 mac_offset = skb->mac_header; netdev_features_t mpls_features; + u16 mac_len = skb->mac_len; __be16 mpls_protocol; + unsigned int mpls_hlen; + + skb_reset_network_header(skb); + mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb); + if (unlikely(!pskb_may_pull(skb, mpls_hlen))) + goto out; /* Setup inner SKB. */ mpls_protocol = skb->protocol; skb->protocol = skb->inner_protocol; - /* Push back the mac header that skb_mac_gso_segment() has pulled. - * It will be re-pulled by the call to skb_mac_gso_segment() below - */ - __skb_push(skb, skb->mac_len); + __skb_pull(skb, mpls_hlen); + + skb->mac_len = 0; + skb_reset_mac_header(skb); /* Segment inner packet. */ mpls_features = skb->dev->mpls_features & features; segs = skb_mac_gso_segment(skb, mpls_features); + if (IS_ERR_OR_NULL(segs)) { + skb_gso_error_unwind(skb, mpls_protocol, mpls_hlen, mac_offset, + mac_len); + goto out; + } + skb = segs; + mpls_hlen += mac_len; + do { + skb->mac_len = mac_len; + skb->protocol = mpls_protocol; - /* Restore outer protocol. */ - skb->protocol = mpls_protocol; + skb_reset_inner_network_header(skb); - /* Re-pull the mac header that the call to skb_mac_gso_segment() - * above pulled. It will be re-pushed after returning - * skb_mac_gso_segment(), an indirect caller of this function. - */ - __skb_pull(skb, skb->data - skb_mac_header(skb)); + __skb_push(skb, mpls_hlen); + skb_reset_mac_header(skb); + skb_set_network_header(skb, mac_len); + } while ((skb = skb->next)); + +out: return segs; } diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 644a8da6d4bd..cf52cf30ac4b 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -37,7 +37,7 @@ static unsigned int mpls_encap_size(struct mpls_iptunnel_encap *en) return en->labels * sizeof(struct mpls_shim_hdr); } -static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) +static int mpls_xmit(struct sk_buff *skb) { struct mpls_iptunnel_encap *tun_encap_info; struct mpls_shim_hdr *hdr; @@ -90,7 +90,11 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb_cow(skb, hh_len + new_header_size)) goto drop; + skb_set_inner_protocol(skb, skb->protocol); + skb_reset_inner_network_header(skb); + skb_push(skb, new_header_size); + skb_reset_network_header(skb); skb->dev = out_dev; @@ -115,7 +119,7 @@ static int mpls_output(struct net *net, struct sock *sk, struct sk_buff *skb) net_dbg_ratelimited("%s: packet transmission failed: %d\n", __func__, err); - return 0; + return LWTUNNEL_XMIT_DONE; drop: kfree_skb(skb); @@ -153,7 +157,8 @@ static int mpls_build_state(struct net_device *dev, struct nlattr *nla, if (ret) goto errout; newts->type = LWTUNNEL_ENCAP_MPLS; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_XMIT_REDIRECT; + newts->headroom = mpls_encap_size(tun_encap_info); *ts = newts; @@ -209,7 +214,7 @@ static int mpls_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) static const struct lwtunnel_encap_ops mpls_iptun_ops = { .build_state = mpls_build_state, - .output = mpls_output, + .xmit = mpls_xmit, .fill_encap = mpls_fill_encap_info, .get_encap_size = mpls_encap_nlsize, .cmp_encap = mpls_encap_cmp, diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 33738c060547..13290a70fa71 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -170,6 +170,7 @@ struct ncsi_package; #define NCSI_PACKAGE_SHIFT 5 #define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7) +#define NCSI_RESERVED_CHANNEL 0x1f #define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1)) #define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c)) @@ -186,9 +187,15 @@ struct ncsi_channel { struct ncsi_channel_mode modes[NCSI_MODE_MAX]; struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; struct ncsi_channel_stats stats; - struct timer_list timer; /* Link monitor timer */ - bool enabled; /* Timer is enabled */ - unsigned int timeout; /* Times of timeout */ + struct { + struct timer_list timer; + bool enabled; + unsigned int state; +#define NCSI_CHANNEL_MONITOR_START 0 +#define NCSI_CHANNEL_MONITOR_RETRY 1 +#define NCSI_CHANNEL_MONITOR_WAIT 2 +#define NCSI_CHANNEL_MONITOR_WAIT_MAX 5 + } monitor; struct list_head node; struct list_head link; }; @@ -206,7 +213,8 @@ struct ncsi_package { struct ncsi_request { unsigned char id; /* Request ID - 0 to 255 */ bool used; /* Request that has been assigned */ - bool driven; /* Drive state machine */ + unsigned int flags; /* NCSI request property */ +#define NCSI_REQ_FLAG_EVENT_DRIVEN 1 struct ncsi_dev_priv *ndp; /* Associated NCSI device */ struct sk_buff *cmd; /* Associated NCSI command packet */ struct sk_buff *rsp; /* Associated NCSI response packet */ @@ -258,6 +266,7 @@ struct ncsi_dev_priv { struct list_head packages; /* List of packages */ struct ncsi_request requests[256]; /* Request table */ unsigned int request_id; /* Last used request ID */ +#define NCSI_REQ_START_IDX 1 unsigned int pending_req_num; /* Number of pending requests */ struct ncsi_package *active_package; /* Currently handled package */ struct ncsi_channel *active_channel; /* Currently handled channel */ @@ -274,7 +283,7 @@ struct ncsi_cmd_arg { unsigned char package; /* Destination package ID */ unsigned char channel; /* Detination channel ID or 0x1f */ unsigned short payload; /* Command packet payload length */ - bool driven; /* Drive the state machine? */ + unsigned int req_flags; /* NCSI request properties */ union { unsigned char bytes[16]; /* Command packet specific data */ unsigned short words[8]; @@ -313,7 +322,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, unsigned char id, struct ncsi_package **np, struct ncsi_channel **nc); -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven); +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags); void ncsi_free_request(struct ncsi_request *nr); struct ncsi_dev *ncsi_find_dev(struct net_device *dev); int ncsi_process_next_channel(struct ncsi_dev_priv *ndp); diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index d463468442ae..b41a6617d498 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -53,7 +53,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, struct ncsi_aen_lsc_pkt *lsc; struct ncsi_channel *nc; struct ncsi_channel_mode *ncm; - unsigned long old_data; + bool chained; + int state; + unsigned long old_data, data; unsigned long flags; /* Find the NCSI channel */ @@ -62,20 +64,27 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, return -ENODEV; /* Update the link status */ - ncm = &nc->modes[NCSI_MODE_LINK]; lsc = (struct ncsi_aen_lsc_pkt *)h; + + spin_lock_irqsave(&nc->lock, flags); + ncm = &nc->modes[NCSI_MODE_LINK]; old_data = ncm->data[2]; - ncm->data[2] = ntohl(lsc->status); + data = ntohl(lsc->status); + ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); - if (!((old_data ^ ncm->data[2]) & 0x1) || - !list_empty(&nc->link)) + + chained = !list_empty(&nc->link); + state = nc->state; + spin_unlock_irqrestore(&nc->lock, flags); + + if (!((old_data ^ data) & 0x1) || chained) return 0; - if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) && - !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1))) + if (!(state == NCSI_CHANNEL_INACTIVE && (data & 0x1)) && + !(state == NCSI_CHANNEL_ACTIVE && !(data & 0x1))) return 0; if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) ndp->flags |= NCSI_DEV_RESHUFFLE; ncsi_stop_channel_monitor(nc); @@ -97,13 +106,21 @@ static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, if (!nc) return -ENODEV; + spin_lock_irqsave(&nc->lock, flags); if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); return 0; + } + spin_unlock_irqrestore(&nc->lock, flags); ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 21057a8ceeac..db7083bfd476 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -272,7 +272,7 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) struct sk_buff *skb; struct ncsi_request *nr; - nr = ncsi_alloc_request(ndp, nca->driven); + nr = ncsi_alloc_request(ndp, nca->req_flags); if (!nr) return NULL; diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index ef017b871857..5e509e547c2d 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -132,6 +132,7 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) struct ncsi_dev *nd = &ndp->ndev; struct ncsi_package *np; struct ncsi_channel *nc; + unsigned long flags; nd->state = ncsi_dev_state_functional; if (force_down) { @@ -142,14 +143,21 @@ static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) nd->link_up = 0; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_ACTIVE) + nc->state != NCSI_CHANNEL_ACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); nd->link_up = 1; goto report; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -163,43 +171,55 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; struct ncsi_cmd_arg nca; - bool enabled; - unsigned int timeout; + bool enabled, chained; + unsigned int monitor_state; unsigned long flags; - int ret; + int state, ret; spin_lock_irqsave(&nc->lock, flags); - timeout = nc->timeout; - enabled = nc->enabled; + state = nc->state; + chained = !list_empty(&nc->link); + enabled = nc->monitor.enabled; + monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || !list_empty(&nc->link)) + if (!enabled || chained) return; - if (nc->state != NCSI_CHANNEL_INACTIVE && - nc->state != NCSI_CHANNEL_ACTIVE) + if (state != NCSI_CHANNEL_INACTIVE && + state != NCSI_CHANNEL_ACTIVE) return; - if (!(timeout % 2)) { + switch (monitor_state) { + case NCSI_CHANNEL_MONITOR_START: + case NCSI_CHANNEL_MONITOR_RETRY: nca.ndp = ndp; nca.package = np->id; nca.channel = nc->id; nca.type = NCSI_PKT_CMD_GLS; - nca.driven = false; + nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); if (ret) { netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); return; } - } - if (timeout + 1 >= 3) { + break; + case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: + break; + default: if (!(ndp->flags & NCSI_DEV_HWA) && - nc->state == NCSI_CHANNEL_ACTIVE) + state == NCSI_CHANNEL_ACTIVE) { ncsi_report_link(ndp, true); + ndp->flags |= NCSI_DEV_RESHUFFLE; + } + + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); @@ -207,10 +227,9 @@ static void ncsi_channel_monitor(unsigned long data) } spin_lock_irqsave(&nc->lock, flags); - nc->timeout = timeout + 1; - nc->enabled = true; + nc->monitor.state++; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_start_channel_monitor(struct ncsi_channel *nc) @@ -218,12 +237,12 @@ void ncsi_start_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - WARN_ON_ONCE(nc->enabled); - nc->timeout = 0; - nc->enabled = true; + WARN_ON_ONCE(nc->monitor.enabled); + nc->monitor.enabled = true; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); - mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); + mod_timer(&nc->monitor.timer, jiffies + HZ); } void ncsi_stop_channel_monitor(struct ncsi_channel *nc) @@ -231,14 +250,14 @@ void ncsi_stop_channel_monitor(struct ncsi_channel *nc) unsigned long flags; spin_lock_irqsave(&nc->lock, flags); - if (!nc->enabled) { + if (!nc->monitor.enabled) { spin_unlock_irqrestore(&nc->lock, flags); return; } - nc->enabled = false; + nc->monitor.enabled = false; spin_unlock_irqrestore(&nc->lock, flags); - del_timer_sync(&nc->timer); + del_timer_sync(&nc->monitor.timer); } struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, @@ -267,8 +286,9 @@ struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) nc->id = id; nc->package = np; nc->state = NCSI_CHANNEL_INACTIVE; - nc->enabled = false; - setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc); + nc->monitor.enabled = false; + setup_timer(&nc->monitor.timer, + ncsi_channel_monitor, (unsigned long)nc); spin_lock_init(&nc->lock); INIT_LIST_HEAD(&nc->link); for (index = 0; index < NCSI_CAP_MAX; index++) @@ -405,7 +425,8 @@ void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, * be same. Otherwise, the bogus response might be replied. So * the available IDs are allocated in round-robin fashion. */ -struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, + unsigned int req_flags) { struct ncsi_request *nr = NULL; int i, limit = ARRAY_SIZE(ndp->requests); @@ -413,30 +434,31 @@ struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) /* Check if there is one available request until the ceiling */ spin_lock_irqsave(&ndp->lock, flags); - for (i = ndp->request_id; !nr && i < limit; i++) { + for (i = ndp->request_id; i < limit; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + nr->flags = req_flags; + ndp->request_id = i + 1; + goto found; } /* Fail back to check from the starting cursor */ - for (i = 0; !nr && i < ndp->request_id; i++) { + for (i = NCSI_REQ_START_IDX; i < ndp->request_id; i++) { if (ndp->requests[i].used) continue; nr = &ndp->requests[i]; nr->used = true; - nr->driven = driven; - if (++ndp->request_id >= limit) - ndp->request_id = 0; + nr->flags = req_flags; + ndp->request_id = i + 1; + goto found; } - spin_unlock_irqrestore(&ndp->lock, flags); +found: + spin_unlock_irqrestore(&ndp->lock, flags); return nr; } @@ -458,7 +480,7 @@ void ncsi_free_request(struct ncsi_request *nr) nr->cmd = NULL; nr->rsp = NULL; nr->used = false; - driven = nr->driven; + driven = !!(nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN); spin_unlock_irqrestore(&ndp->lock, flags); if (driven && cmd && --ndp->pending_req_num == 0) @@ -508,10 +530,11 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) struct ncsi_package *np = ndp->active_package; struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; + unsigned long flags; int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_suspend: nd->state = ncsi_dev_state_suspend_select; @@ -527,7 +550,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; if (nd->state == ncsi_dev_state_suspend_select) { nca.type = NCSI_PKT_CMD_SP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; if (ndp->flags & NCSI_DEV_HWA) nca.bytes[0] = 0; else @@ -544,7 +567,7 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_suspend_deselect; } else if (nd->state == ncsi_dev_state_suspend_deselect) { nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; nd->state = ncsi_dev_state_suspend_done; } @@ -556,7 +579,9 @@ static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) break; case ncsi_dev_state_suspend_done: - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_process_next_channel(ndp); break; @@ -574,10 +599,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) struct ncsi_channel *nc = ndp->active_channel; struct ncsi_cmd_arg nca; unsigned char index; + unsigned long flags; int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_config: case ncsi_dev_state_config_sp: @@ -590,7 +616,7 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) else nca.bytes[0] = 1; nca.package = np->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -675,10 +701,12 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) goto error; break; case ncsi_dev_state_config_done: + spin_lock_irqsave(&nc->lock, flags); if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) - xchg(&nc->state, NCSI_CHANNEL_ACTIVE); + nc->state = NCSI_CHANNEL_ACTIVE; else - xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); ncsi_start_channel_monitor(nc); ncsi_process_next_channel(ndp); @@ -707,18 +735,25 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) found = NULL; NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + if (!list_empty(&nc->link) || - nc->state != NCSI_CHANNEL_INACTIVE) + nc->state != NCSI_CHANNEL_INACTIVE) { + spin_unlock_irqrestore(&nc->lock, flags); continue; + } if (!found) found = nc; ncm = &nc->modes[NCSI_MODE_LINK]; if (ncm->data[2] & 0x1) { + spin_unlock_irqrestore(&nc->lock, flags); found = nc; goto out; } + + spin_unlock_irqrestore(&nc->lock, flags); } } @@ -797,7 +832,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) int ret; nca.ndp = ndp; - nca.driven = true; + nca.req_flags = NCSI_REQ_FLAG_EVENT_DRIVEN; switch (nd->state) { case ncsi_dev_state_probe: nd->state = ncsi_dev_state_probe_deselect; @@ -807,7 +842,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect all possible packages */ nca.type = NCSI_PKT_CMD_DP; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -823,7 +858,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Select all possible packages */ nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; for (index = 0; index < 8; index++) { nca.package = index; ret = ncsi_xmit_cmd(&nca); @@ -876,7 +911,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nca.type = NCSI_PKT_CMD_SP; nca.bytes[0] = 1; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -884,12 +919,12 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) nd->state = ncsi_dev_state_probe_cis; break; case ncsi_dev_state_probe_cis: - ndp->pending_req_num = 32; + ndp->pending_req_num = NCSI_RESERVED_CHANNEL; /* Clear initial state */ nca.type = NCSI_PKT_CMD_CIS; nca.package = ndp->active_package->id; - for (index = 0; index < 0x20; index++) { + for (index = 0; index < NCSI_RESERVED_CHANNEL; index++) { nca.channel = index; ret = ncsi_xmit_cmd(&nca); if (ret) @@ -933,7 +968,7 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) /* Deselect the active package */ nca.type = NCSI_PKT_CMD_DP; nca.package = ndp->active_package->id; - nca.channel = 0x1f; + nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); if (ret) goto error; @@ -987,11 +1022,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) goto out; } - old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE); list_del_init(&nc->link); - spin_unlock_irqrestore(&ndp->lock, flags); + spin_lock_irqsave(&nc->lock, flags); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INVISIBLE; + spin_unlock_irqrestore(&nc->lock, flags); + ndp->active_channel = nc; ndp->active_package = nc->package; @@ -1006,7 +1044,7 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) break; default: netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", - nc->state, nc->package->id, nc->id); + old_state, nc->package->id, nc->id); ncsi_report_link(ndp, false); return -EINVAL; } @@ -1070,7 +1108,7 @@ static int ncsi_inet6addr_event(struct notifier_block *this, return NOTIFY_OK; nca.ndp = ndp; - nca.driven = false; + nca.req_flags = 0; nca.package = np->id; nca.channel = nc->id; nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; @@ -1118,7 +1156,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, /* Initialize private NCSI device */ spin_lock_init(&ndp->lock); INIT_LIST_HEAD(&ndp->packages); - ndp->request_id = 0; + ndp->request_id = NCSI_REQ_START_IDX; for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { ndp->requests[i].id = i; ndp->requests[i].ndp = ndp; @@ -1149,9 +1187,7 @@ EXPORT_SYMBOL_GPL(ncsi_register_dev); int ncsi_start_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); - struct ncsi_package *np; - struct ncsi_channel *nc; - int old_state, ret; + int ret; if (nd->state != ncsi_dev_state_registered && nd->state != ncsi_dev_state_functional) @@ -1163,15 +1199,6 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - /* Reset channel's state and start over */ - NCSI_FOR_EACH_PACKAGE(ndp, np) { - NCSI_FOR_EACH_CHANNEL(np, nc) { - old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE); - WARN_ON_ONCE(!list_empty(&nc->link) || - old_state == NCSI_CHANNEL_INVISIBLE); - } - } - if (ndp->flags & NCSI_DEV_HWA) ret = ncsi_enable_hwa(ndp); else @@ -1181,6 +1208,35 @@ int ncsi_start_dev(struct ncsi_dev *nd) } EXPORT_SYMBOL_GPL(ncsi_start_dev); +void ncsi_stop_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + bool chained; + int old_state; + unsigned long flags; + + /* Stop the channel monitor and reset channel's state */ + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + ncsi_stop_channel_monitor(nc); + + spin_lock_irqsave(&nc->lock, flags); + chained = !list_empty(&nc->link); + old_state = nc->state; + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + WARN_ON_ONCE(chained || + old_state == NCSI_CHANNEL_INVISIBLE); + } + } + + ncsi_report_link(ndp, true); +} +EXPORT_SYMBOL_GPL(ncsi_stop_dev); + void ncsi_unregister_dev(struct ncsi_dev *nd) { struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index af84389a6bf1..087db775b3dc 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -317,12 +317,12 @@ static int ncsi_rsp_handler_gls(struct ncsi_request *nr) ncm->data[3] = ntohl(rsp->other); ncm->data[4] = ntohl(rsp->oem_status); - if (nr->driven) + if (nr->flags & NCSI_REQ_FLAG_EVENT_DRIVEN) return 0; /* Reset the channel monitor if it has been enabled */ spin_lock_irqsave(&nc->lock, flags); - nc->timeout = 0; + nc->monitor.state = NCSI_CHANNEL_MONITOR_START; spin_unlock_irqrestore(&nc->lock, flags); return 0; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9266ceebd112..e8d56d9a4df2 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -474,6 +474,12 @@ config NFT_META This option adds the "meta" expression that you can use to match and to set packet metainformation such as the packet mark. +config NFT_NUMGEN + tristate "Netfilter nf_tables number generator module" + help + This option adds the number generator expression used to perform + incremental counting and random numbers bound to a upper limit. + config NFT_CT depends on NF_CONNTRACK tristate "Netfilter nf_tables conntrack module" @@ -481,13 +487,13 @@ config NFT_CT This option adds the "meta" expression that you can use to match connection tracking information such as the flow state. -config NFT_RBTREE +config NFT_SET_RBTREE tristate "Netfilter nf_tables rbtree set module" help This option adds the "rbtree" set type (Red Black tree) that is used to build interval-based sets. -config NFT_HASH +config NFT_SET_HASH tristate "Netfilter nf_tables hash set module" help This option adds the "hash" set type that is used to build one-way @@ -542,6 +548,12 @@ config NFT_QUEUE This is required if you intend to use the userspace queueing infrastructure (also known as NFQUEUE) from nftables. +config NFT_QUOTA + tristate "Netfilter nf_tables quota module" + help + This option adds the "quota" expression that you can use to match + enforce bytes quotas. + config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" @@ -563,6 +575,12 @@ config NFT_COMPAT x_tables match/target extensions over the nf_tables framework. +config NFT_HASH + tristate "Netfilter nf_tables hash module" + help + This option adds the "hash" expression that you can use to perform + a hash operation on registers. + if NF_TABLES_NETDEV config NF_DUP_NETDEV diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 69134541d65b..c23c3c84416f 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -71,8 +71,9 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o # nf_tables nf_tables-objs += nf_tables_core.o nf_tables_api.o nf_tables_trace.o -nf_tables-objs += nft_immediate.o nft_cmp.o nft_lookup.o nft_dynset.o +nf_tables-objs += nft_immediate.o nft_cmp.o nft_range.o nf_tables-objs += nft_bitwise.o nft_byteorder.o nft_payload.o +nf_tables-objs += nft_lookup.o nft_dynset.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o @@ -80,18 +81,21 @@ obj-$(CONFIG_NF_TABLES_NETDEV) += nf_tables_netdev.o obj-$(CONFIG_NFT_COMPAT) += nft_compat.o obj-$(CONFIG_NFT_EXTHDR) += nft_exthdr.o obj-$(CONFIG_NFT_META) += nft_meta.o +obj-$(CONFIG_NFT_NUMGEN) += nft_numgen.o obj-$(CONFIG_NFT_CT) += nft_ct.o obj-$(CONFIG_NFT_LIMIT) += nft_limit.o obj-$(CONFIG_NFT_NAT) += nft_nat.o obj-$(CONFIG_NFT_QUEUE) += nft_queue.o +obj-$(CONFIG_NFT_QUOTA) += nft_quota.o obj-$(CONFIG_NFT_REJECT) += nft_reject.o obj-$(CONFIG_NFT_REJECT_INET) += nft_reject_inet.o -obj-$(CONFIG_NFT_RBTREE) += nft_rbtree.o -obj-$(CONFIG_NFT_HASH) += nft_hash.o +obj-$(CONFIG_NFT_SET_RBTREE) += nft_set_rbtree.o +obj-$(CONFIG_NFT_SET_HASH) += nft_set_hash.o obj-$(CONFIG_NFT_COUNTER) += nft_counter.o obj-$(CONFIG_NFT_LOG) += nft_log.o obj-$(CONFIG_NFT_MASQ) += nft_masq.o obj-$(CONFIG_NFT_REDIR) += nft_redir.o +obj-$(CONFIG_NFT_HASH) += nft_hash.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index f39276d1c2d7..fa6715db4581 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -61,33 +62,55 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); +#define nf_entry_dereference(e) \ + rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct list_head *nf_find_hook_list(struct net *net, - const struct nf_hook_ops *reg) +static struct nf_hook_entry *nf_hook_entry_head(struct net *net, + const struct nf_hook_ops *reg) { - struct list_head *hook_list = NULL; + struct nf_hook_entry *hook_head = NULL; if (reg->pf != NFPROTO_NETDEV) - hook_list = &net->nf.hooks[reg->pf][reg->hooknum]; + hook_head = nf_entry_dereference(net->nf.hooks[reg->pf] + [reg->hooknum]); else if (reg->hooknum == NF_NETDEV_INGRESS) { #ifdef CONFIG_NETFILTER_INGRESS if (reg->dev && dev_net(reg->dev) == net) - hook_list = ®->dev->nf_hooks_ingress; + hook_head = + nf_entry_dereference( + reg->dev->nf_hooks_ingress); #endif } - return hook_list; + return hook_head; } -struct nf_hook_entry { - const struct nf_hook_ops *orig_ops; - struct nf_hook_ops ops; -}; +/* must hold nf_hook_mutex */ +static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg, + struct nf_hook_entry *entry) +{ + switch (reg->pf) { + case NFPROTO_NETDEV: + /* We already checked in nf_register_net_hook() that this is + * used from ingress. + */ + rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry); + break; + default: + rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum], + entry); + break; + } +} int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; + struct nf_hook_entry *hooks_entry; struct nf_hook_entry *entry; - struct nf_hook_ops *elem; + + if (reg->pf == NFPROTO_NETDEV && + (reg->hooknum != NF_NETDEV_INGRESS || + !reg->dev || dev_net(reg->dev) != net)) + return -EINVAL; entry = kmalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -95,19 +118,30 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) entry->orig_ops = reg; entry->ops = *reg; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) { - kfree(entry); - return -ENOENT; - } + entry->next = NULL; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - if (reg->priority < elem->priority) - break; + hooks_entry = nf_hook_entry_head(net, reg); + + if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) { + /* This is the case where we need to insert at the head */ + entry->next = hooks_entry; + hooks_entry = NULL; } - list_add_rcu(&entry->ops.list, elem->list.prev); + + while (hooks_entry && + reg->priority >= hooks_entry->orig_ops->priority && + nf_entry_dereference(hooks_entry->next)) { + hooks_entry = nf_entry_dereference(hooks_entry->next); + } + + if (hooks_entry) { + entry->next = nf_entry_dereference(hooks_entry->next); + rcu_assign_pointer(hooks_entry->next, entry); + } else { + nf_set_hooks_head(net, reg, entry); + } + mutex_unlock(&nf_hook_mutex); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) @@ -122,24 +156,33 @@ EXPORT_SYMBOL(nf_register_net_hook); void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct list_head *hook_list; - struct nf_hook_entry *entry; - struct nf_hook_ops *elem; - - hook_list = nf_find_hook_list(net, reg); - if (!hook_list) - return; + struct nf_hook_entry *hooks_entry; mutex_lock(&nf_hook_mutex); - list_for_each_entry(elem, hook_list, list) { - entry = container_of(elem, struct nf_hook_entry, ops); - if (entry->orig_ops == reg) { - list_del_rcu(&entry->ops.list); - break; - } + hooks_entry = nf_hook_entry_head(net, reg); + if (hooks_entry->orig_ops == reg) { + nf_set_hooks_head(net, reg, + nf_entry_dereference(hooks_entry->next)); + goto unlock; } + while (hooks_entry && nf_entry_dereference(hooks_entry->next)) { + struct nf_hook_entry *next = + nf_entry_dereference(hooks_entry->next); + struct nf_hook_entry *nnext; + + if (next->orig_ops != reg) { + hooks_entry = next; + continue; + } + nnext = nf_entry_dereference(next->next); + rcu_assign_pointer(hooks_entry->next, nnext); + hooks_entry = next; + break; + } + +unlock: mutex_unlock(&nf_hook_mutex); - if (&elem->list == hook_list) { + if (!hooks_entry) { WARN(1, "nf_unregister_net_hook: hook not found!\n"); return; } @@ -151,10 +194,10 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif synchronize_net(); - nf_queue_nf_hook_drop(net, &entry->ops); + nf_queue_nf_hook_drop(net, hooks_entry); /* other cpu might still process nfqueue verdict that used reg */ synchronize_net(); - kfree(entry); + kfree(hooks_entry); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -188,19 +231,17 @@ EXPORT_SYMBOL(nf_unregister_net_hooks); static LIST_HEAD(nf_hook_list); -int nf_register_hook(struct nf_hook_ops *reg) +static int _nf_register_hook(struct nf_hook_ops *reg) { struct net *net, *last; int ret; - rtnl_lock(); for_each_net(net) { ret = nf_register_net_hook(net, reg); if (ret && ret != -ENOENT) goto rollback; } list_add_tail(®->list, &nf_hook_list); - rtnl_unlock(); return 0; rollback: @@ -210,19 +251,34 @@ rollback: break; nf_unregister_net_hook(net, reg); } + return ret; +} + +int nf_register_hook(struct nf_hook_ops *reg) +{ + int ret; + + rtnl_lock(); + ret = _nf_register_hook(reg); rtnl_unlock(); + return ret; } EXPORT_SYMBOL(nf_register_hook); -void nf_unregister_hook(struct nf_hook_ops *reg) +static void _nf_unregister_hook(struct nf_hook_ops *reg) { struct net *net; - rtnl_lock(); list_del(®->list); for_each_net(net) nf_unregister_net_hook(net, reg); +} + +void nf_unregister_hook(struct nf_hook_ops *reg) +{ + rtnl_lock(); + _nf_unregister_hook(reg); rtnl_unlock(); } EXPORT_SYMBOL(nf_unregister_hook); @@ -246,6 +302,26 @@ err: } EXPORT_SYMBOL(nf_register_hooks); +/* Caller MUST take rtnl_lock() */ +int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + unsigned int i; + int err = 0; + + for (i = 0; i < n; i++) { + err = _nf_register_hook(®[i]); + if (err) + goto err; + } + return err; + +err: + if (i > 0) + _nf_unregister_hooks(reg, i); + return err; +} +EXPORT_SYMBOL(_nf_register_hooks); + void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) { while (n-- > 0) @@ -253,10 +329,17 @@ void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) } EXPORT_SYMBOL(nf_unregister_hooks); -unsigned int nf_iterate(struct list_head *head, - struct sk_buff *skb, +/* Caller MUST take rtnl_lock */ +void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n) +{ + while (n-- > 0) + _nf_unregister_hook(®[n]); +} +EXPORT_SYMBOL(_nf_unregister_hooks); + +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_ops **elemp) + struct nf_hook_entry **entryp) { unsigned int verdict; @@ -264,20 +347,23 @@ unsigned int nf_iterate(struct list_head *head, * The caller must not block between calls to this * function because of risk of continuing from deleted element. */ - list_for_each_entry_continue_rcu((*elemp), head, list) { - if (state->thresh > (*elemp)->priority) + while (*entryp) { + if (state->thresh > (*entryp)->ops.priority) { + *entryp = rcu_dereference((*entryp)->next); continue; + } /* Optimization: we don't need to hold module reference here, since function can't sleep. --RR */ repeat: - verdict = (*elemp)->hook((*elemp)->priv, skb, state); + verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state); if (verdict != NF_ACCEPT) { #ifdef CONFIG_NETFILTER_DEBUG if (unlikely((verdict & NF_VERDICT_MASK) > NF_MAX_VERDICT)) { NFDEBUG("Evil return from %p(%u).\n", - (*elemp)->hook, state->hook); + (*entryp)->ops.hook, state->hook); + *entryp = rcu_dereference((*entryp)->next); continue; } #endif @@ -285,25 +371,23 @@ repeat: return verdict; goto repeat; } + *entryp = rcu_dereference((*entryp)->next); } return NF_ACCEPT; } /* Returns 1 if okfn() needs to be executed by the caller, - * -EPERM for NF_DROP, 0 otherwise. */ + * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state) { - struct nf_hook_ops *elem; + struct nf_hook_entry *entry; unsigned int verdict; int ret = 0; - /* We may already have this, but read-locks nest anyway */ - rcu_read_lock(); - - elem = list_entry_rcu(state->hook_list, struct nf_hook_ops, list); + entry = rcu_dereference(state->hook_entries); next_hook: - verdict = nf_iterate(state->hook_list, skb, state, &elem); + verdict = nf_iterate(skb, state, &entry); if (verdict == NF_ACCEPT || verdict == NF_STOP) { ret = 1; } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) { @@ -312,8 +396,10 @@ next_hook: if (ret == 0) ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { - int err = nf_queue(skb, elem, state, - verdict >> NF_VERDICT_QBITS); + int err; + + RCU_INIT_POINTER(state->hook_entries, entry); + err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) @@ -321,7 +407,6 @@ next_hook: kfree_skb(skb); } } - rcu_read_unlock(); return ret; } EXPORT_SYMBOL(nf_hook_slow); @@ -441,7 +526,7 @@ static int __net_init netfilter_net_init(struct net *net) for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) { for (h = 0; h < NF_MAX_HOOKS; h++) - INIT_LIST_HEAD(&net->nf.hooks[i][h]); + RCU_INIT_POINTER(net->nf.hooks[i][h], NULL); } #ifdef CONFIG_PROC_FS diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index f04fd8df210b..fc230d99aa3b 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -281,13 +281,10 @@ void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) h = nf_conntrack_find_get(cp->ipvs->net, &nf_ct_zone_dflt, &tuple); if (h) { ct = nf_ct_tuplehash_to_ctrack(h); - /* Show what happens instead of calling nf_ct_kill() */ - if (del_timer(&ct->timeout)) { - IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" + if (nf_ct_kill(ct)) { + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack for tuple=" FMT_TUPLE "\n", __func__, ct, ARG_TUPLE(&tuple)); - if (ct->timeout.function) - ct->timeout.function(ct->timeout.data); } else { IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" FMT_TUPLE "\n", diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9934b0c93c1e..ba6a1d421222 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -72,12 +72,24 @@ EXPORT_SYMBOL_GPL(nf_conntrack_expect_lock); struct hlist_nulls_head *nf_conntrack_hash __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_hash); +struct conntrack_gc_work { + struct delayed_work dwork; + u32 last_bucket; + bool exiting; +}; + static __read_mostly struct kmem_cache *nf_conntrack_cachep; static __read_mostly spinlock_t nf_conntrack_locks_all_lock; -static __read_mostly seqcount_t nf_conntrack_generation; static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock); static __read_mostly bool nf_conntrack_locks_all; +#define GC_MAX_BUCKETS_DIV 64u +#define GC_MAX_BUCKETS 8192u +#define GC_INTERVAL (5 * HZ) +#define GC_MAX_EVICTS 256u + +static struct conntrack_gc_work conntrack_gc_work; + void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { spin_lock(lock); @@ -164,7 +176,7 @@ unsigned int nf_conntrack_htable_size __read_mostly; EXPORT_SYMBOL_GPL(nf_conntrack_htable_size); unsigned int nf_conntrack_max __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_max); +seqcount_t nf_conntrack_generation __read_mostly; DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); @@ -367,12 +379,10 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct net *net = nf_ct_net(ct); struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); NF_CT_ASSERT(atomic_read(&nfct->use) == 0); - NF_CT_ASSERT(!timer_pending(&ct->timeout)); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); @@ -395,7 +405,6 @@ destroy_conntrack(struct nf_conntrack *nfct) nf_ct_del_from_dying_or_unconfirmed_list(ct); - NF_CT_STAT_INC(net, delete); local_bh_enable(); if (ct->master) @@ -427,7 +436,6 @@ static void nf_ct_delete_from_lists(struct nf_conn *ct) nf_ct_add_to_dying_list(ct); - NF_CT_STAT_INC(net, delete_list); local_bh_enable(); } @@ -435,35 +443,30 @@ bool nf_ct_delete(struct nf_conn *ct, u32 portid, int report) { struct nf_conn_tstamp *tstamp; + if (test_and_set_bit(IPS_DYING_BIT, &ct->status)) + return false; + tstamp = nf_conn_tstamp_find(ct); if (tstamp && tstamp->stop == 0) tstamp->stop = ktime_get_real_ns(); - if (nf_ct_is_dying(ct)) - goto delete; - if (nf_conntrack_event_report(IPCT_DESTROY, ct, portid, report) < 0) { - /* destroy event was not delivered */ + /* destroy event was not delivered. nf_ct_put will + * be done by event cache worker on redelivery. + */ nf_ct_delete_from_lists(ct); nf_conntrack_ecache_delayed_work(nf_ct_net(ct)); return false; } nf_conntrack_ecache_work(nf_ct_net(ct)); - set_bit(IPS_DYING_BIT, &ct->status); - delete: nf_ct_delete_from_lists(ct); nf_ct_put(ct); return true; } EXPORT_SYMBOL_GPL(nf_ct_delete); -static void death_by_timeout(unsigned long ul_conntrack) -{ - nf_ct_delete((struct nf_conn *)ul_conntrack, 0, 0); -} - static inline bool nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, const struct nf_conntrack_tuple *tuple, @@ -481,22 +484,17 @@ nf_ct_key_equal(struct nf_conntrack_tuple_hash *h, net_eq(net, nf_ct_net(ct)); } -/* must be called with rcu read lock held */ -void nf_conntrack_get_ht(struct hlist_nulls_head **hash, unsigned int *hsize) +/* caller must hold rcu readlock and none of the nf_conntrack_locks */ +static void nf_ct_gc_expired(struct nf_conn *ct) { - struct hlist_nulls_head *hptr; - unsigned int sequence, hsz; + if (!atomic_inc_not_zero(&ct->ct_general.use)) + return; - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hsz = nf_conntrack_htable_size; - hptr = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + if (nf_ct_should_gc(ct)) + nf_ct_kill(ct); - *hash = hptr; - *hsize = hsz; + nf_ct_put(ct); } -EXPORT_SYMBOL_GPL(nf_conntrack_get_ht); /* * Warning : @@ -510,21 +508,26 @@ ____nf_conntrack_find(struct net *net, const struct nf_conntrack_zone *zone, struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; struct hlist_nulls_node *n; - unsigned int bucket, sequence; + unsigned int bucket, hsize; begin: - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - bucket = scale_hash(hash); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + bucket = reciprocal_scale(hash, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[bucket], hnnode) { - if (nf_ct_key_equal(h, tuple, zone, net)) { - NF_CT_STAT_INC_ATOMIC(net, found); - return h; + struct nf_conn *ct; + + ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; } - NF_CT_STAT_INC_ATOMIC(net, searched); + + if (nf_ct_is_dying(ct)) + continue; + + if (nf_ct_key_equal(h, tuple, zone, net)) + return h; } /* * if the nulls value we got at the end of this lookup is @@ -618,7 +621,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct) zone, net)) goto out; - add_timer(&ct->timeout); smp_wmb(); /* The caller holds a reference to this object */ atomic_set(&ct->ct_general.use, 2); @@ -771,8 +773,7 @@ __nf_conntrack_confirm(struct sk_buff *skb) /* Timer relative to confirmation time, not original setting time, otherwise we'd get timer wrap in weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); + ct->timeout += nfct_time_stamp; atomic_inc(&ct->ct_general.use); ct->status |= IPS_CONFIRMED; @@ -791,7 +792,6 @@ __nf_conntrack_confirm(struct sk_buff *skb) */ __nf_conntrack_hash_insert(ct, hash, reply_hash); nf_conntrack_double_unlock(hash, reply_hash); - NF_CT_STAT_INC(net, insert); local_bh_enable(); help = nfct_help(ct); @@ -823,29 +823,40 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple_hash *h; struct hlist_nulls_head *ct_hash; - unsigned int hash, sequence; + unsigned int hash, hsize; struct hlist_nulls_node *n; struct nf_conn *ct; zone = nf_ct_zone(ignored_conntrack); rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = hash_conntrack(net, tuple); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + begin: + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = __hash_conntrack(net, tuple, hsize); hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[hash], hnnode) { ct = nf_ct_tuplehash_to_ctrack(h); - if (ct != ignored_conntrack && - nf_ct_key_equal(h, tuple, zone, net)) { + + if (ct == ignored_conntrack) + continue; + + if (nf_ct_is_expired(ct)) { + nf_ct_gc_expired(ct); + continue; + } + + if (nf_ct_key_equal(h, tuple, zone, net)) { NF_CT_STAT_INC_ATOMIC(net, found); rcu_read_unlock(); return 1; } - NF_CT_STAT_INC_ATOMIC(net, searched); } + + if (get_nulls_value(n) != hash) { + NF_CT_STAT_INC_ATOMIC(net, search_restart); + goto begin; + } + rcu_read_unlock(); return 0; @@ -867,6 +878,11 @@ static unsigned int early_drop_list(struct net *net, hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) { tmp = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + continue; + } + if (test_bit(IPS_ASSURED_BIT, &tmp->status) || !net_eq(nf_ct_net(tmp), net) || nf_ct_is_dying(tmp)) @@ -884,7 +900,6 @@ static unsigned int early_drop_list(struct net *net, */ if (net_eq(nf_ct_net(tmp), net) && nf_ct_is_confirmed(tmp) && - del_timer(&tmp->timeout) && nf_ct_delete(tmp, 0, 0)) drops++; @@ -900,14 +915,11 @@ static noinline int early_drop(struct net *net, unsigned int _hash) for (i = 0; i < NF_CT_EVICTION_RANGE; i++) { struct hlist_nulls_head *ct_hash; - unsigned hash, sequence, drops; + unsigned int hash, hsize, drops; rcu_read_lock(); - do { - sequence = read_seqcount_begin(&nf_conntrack_generation); - hash = scale_hash(_hash++); - ct_hash = nf_conntrack_hash; - } while (read_seqcount_retry(&nf_conntrack_generation, sequence)); + nf_conntrack_get_ht(&ct_hash, &hsize); + hash = reciprocal_scale(_hash++, hsize); drops = early_drop_list(net, &ct_hash[hash]); rcu_read_unlock(); @@ -921,6 +933,69 @@ static noinline int early_drop(struct net *net, unsigned int _hash) return false; } +static void gc_worker(struct work_struct *work) +{ + unsigned int i, goal, buckets = 0, expired_count = 0; + unsigned long next_run = GC_INTERVAL; + unsigned int ratio, scanned = 0; + struct conntrack_gc_work *gc_work; + + gc_work = container_of(work, struct conntrack_gc_work, dwork.work); + + goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS); + i = gc_work->last_bucket; + + do { + struct nf_conntrack_tuple_hash *h; + struct hlist_nulls_head *ct_hash; + struct hlist_nulls_node *n; + unsigned int hashsz; + struct nf_conn *tmp; + + i++; + rcu_read_lock(); + + nf_conntrack_get_ht(&ct_hash, &hashsz); + if (i >= hashsz) + i = 0; + + hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { + tmp = nf_ct_tuplehash_to_ctrack(h); + + scanned++; + if (nf_ct_is_expired(tmp)) { + nf_ct_gc_expired(tmp); + expired_count++; + continue; + } + } + + /* could check get_nulls_value() here and restart if ct + * was moved to another chain. But given gc is best-effort + * we will just continue with next hash slot. + */ + rcu_read_unlock(); + cond_resched_rcu_qs(); + } while (++buckets < goal && + expired_count < GC_MAX_EVICTS); + + if (gc_work->exiting) + return; + + ratio = scanned ? expired_count * 100 / scanned : 0; + if (ratio >= 90) + next_run = 0; + + gc_work->last_bucket = i; + schedule_delayed_work(&gc_work->dwork, next_run); +} + +static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) +{ + INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + gc_work->exiting = false; +} + static struct nf_conn * __nf_conntrack_alloc(struct net *net, const struct nf_conntrack_zone *zone, @@ -957,8 +1032,6 @@ __nf_conntrack_alloc(struct net *net, /* save hash for reusing when confirming */ *(unsigned long *)(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode.pprev) = hash; ct->status = 0; - /* Don't set timer yet: wait for confirmation */ - setup_timer(&ct->timeout, death_by_timeout, (unsigned long)ct); write_pnet(&ct->ct_net, net); memset(&ct->__nfct_init_offset[0], 0, offsetof(struct nf_conn, proto) - @@ -1096,10 +1169,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } spin_unlock(&nf_conntrack_expect_lock); } - if (!exp) { + if (!exp) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); - NF_CT_STAT_INC(net, new); - } /* Now it is inserted into the unconfirmed list, bump refcount */ nf_conntrack_get(&ct->ct_general); @@ -1204,7 +1275,7 @@ nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, skb->nfct = NULL; } - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ l3proto = __nf_ct_l3proto_find(pf); ret = l3proto->get_l4proto(skb, skb_network_offset(skb), &dataoff, &protonum); @@ -1332,7 +1403,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct); NF_CT_ASSERT(skb); /* Only update if this is not a fixed timeout */ @@ -1340,39 +1410,25 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, goto acct; /* If not in hash table, timer will not be active yet */ - if (!nf_ct_is_confirmed(ct)) { - ct->timeout.expires = extra_jiffies; - } else { - unsigned long newtime = jiffies + extra_jiffies; - - /* Only update the timeout if the new timeout is at least - HZ jiffies from the old timeout. Need del_timer for race - avoidance (may already be dying). */ - if (newtime - ct->timeout.expires >= HZ) - mod_timer_pending(&ct->timeout, newtime); - } + if (nf_ct_is_confirmed(ct)) + extra_jiffies += nfct_time_stamp; + ct->timeout = extra_jiffies; acct: if (do_acct) nf_ct_acct_update(ct, ctinfo, skb->len); } EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct); -bool __nf_ct_kill_acct(struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - int do_acct) +bool nf_ct_kill_acct(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + const struct sk_buff *skb) { - if (do_acct) - nf_ct_acct_update(ct, ctinfo, skb->len); + nf_ct_acct_update(ct, ctinfo, skb->len); - if (del_timer(&ct->timeout)) { - ct->timeout.function((unsigned long)ct); - return true; - } - return false; + return nf_ct_delete(ct, 0, 0); } -EXPORT_SYMBOL_GPL(__nf_ct_kill_acct); +EXPORT_SYMBOL_GPL(nf_ct_kill_acct); #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -1505,11 +1561,8 @@ void nf_ct_iterate_cleanup(struct net *net, while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { /* Time to push up daises... */ - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, portid, report); - - /* ... else the timer will get him soon. */ + nf_ct_delete(ct, portid, report); nf_ct_put(ct); cond_resched(); } @@ -1545,6 +1598,7 @@ static int untrack_refs(void) void nf_conntrack_cleanup_start(void) { + conntrack_gc_work.exiting = true; RCU_INIT_POINTER(ip_ct_attach, NULL); } @@ -1554,6 +1608,7 @@ void nf_conntrack_cleanup_end(void) while (untrack_refs() > 0) schedule(); + cancel_delayed_work_sync(&conntrack_gc_work.dwork); nf_ct_free_hashtable(nf_conntrack_hash, nf_conntrack_htable_size); nf_conntrack_proto_fini(); @@ -1828,6 +1883,10 @@ int nf_conntrack_init_start(void) } /* - and look it like as a confirmed connection */ nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED); + + conntrack_gc_work_init(&conntrack_gc_work); + schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL); + return 0; err_proto: diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index d28011b42845..da9df2d56e66 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -49,8 +49,13 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) hlist_nulls_for_each_entry(h, n, &pcpu->dying, hnnode) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); + struct nf_conntrack_ecache *e; - if (nf_ct_is_dying(ct)) + if (!nf_ct_is_confirmed(ct)) + continue; + + e = nf_ct_ecache_find(ct); + if (!e || e->state != NFCT_ECACHE_DESTROY_FAIL) continue; if (nf_conntrack_event(IPCT_DESTROY, ct)) { @@ -58,8 +63,7 @@ static enum retry_state ecache_work_evict_list(struct ct_pcpu *pcpu) break; } - /* we've got the event delivered, now it's dying */ - set_bit(IPS_DYING_BIT, &ct->status); + e->state = NFCT_ECACHE_DESTROY_SENT; refs[evicted] = ct; if (++evicted >= ARRAY_SIZE(refs)) { @@ -130,7 +134,7 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, if (!e) goto out_unlock; - if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { + if (nf_ct_is_confirmed(ct)) { struct nf_ct_event item = { .ct = ct, .portid = e->portid ? e->portid : portid, @@ -150,11 +154,13 @@ int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, * triggered by a process, we store the PORTID * to include it in the retransmission. */ - if (eventmask & (1 << IPCT_DESTROY) && - e->portid == 0 && portid != 0) - e->portid = portid; - else + if (eventmask & (1 << IPCT_DESTROY)) { + if (e->portid == 0 && portid != 0) + e->portid = portid; + e->state = NFCT_ECACHE_DESTROY_FAIL; + } else { e->missed |= eventmask; + } } else { e->missed &= ~missed; } diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 43147005bea3..e3ed20060878 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -237,7 +237,7 @@ static int try_eprt(const char *data, size_t dlen, struct nf_conntrack_man *cmd, } delim = data[0]; if (isdigit(delim) || delim < 33 || delim > 126 || data[2] != delim) { - pr_debug("try_eprt: invalid delimitter.\n"); + pr_debug("try_eprt: invalid delimiter.\n"); return 0; } @@ -301,8 +301,6 @@ static int find_pattern(const char *data, size_t dlen, size_t i = plen; pr_debug("find_pattern `%s': dlen = %Zu\n", pattern, dlen); - if (dlen == 0) - return 0; if (dlen <= plen) { /* Short packet: try for partial? */ @@ -311,19 +309,8 @@ static int find_pattern(const char *data, size_t dlen, else return 0; } - if (strncasecmp(data, pattern, plen) != 0) { -#if 0 - size_t i; - - pr_debug("ftp: string mismatch\n"); - for (i = 0; i < plen; i++) { - pr_debug("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); - } -#endif + if (strncasecmp(data, pattern, plen) != 0) return 0; - } pr_debug("Pattern matches!\n"); /* Now we've found the constant string, try to skip diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index 5c0db5c64734..f65d93639d12 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -736,7 +736,7 @@ static int callforward_do_filter(struct net *net, const struct nf_afinfo *afinfo; int ret = 0; - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ afinfo = nf_get_afinfo(family); if (!afinfo) return 0; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index b989b81ac156..336e21559e01 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -189,7 +189,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; struct net *net = nf_ct_net(ct); - int ret = 0; /* We already got a helper explicitly attached. The function * nf_conntrack_alter_reply - in case NAT is in use - asks for looking @@ -223,15 +222,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (helper == NULL) { if (help) RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } if (help == NULL) { help = nf_ct_helper_ext_add(ct, helper, flags); - if (help == NULL) { - ret = -ENOMEM; - goto out; - } + if (help == NULL) + return -ENOMEM; } else { /* We only allow helper re-assignment of the same sort since * we cannot reallocate the helper extension area. @@ -240,13 +237,13 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (tmp && tmp->help != helper->help) { RCU_INIT_POINTER(help->helper, NULL); - goto out; + return 0; } } rcu_assign_pointer(help->helper, helper); -out: - return ret; + + return 0; } EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper); @@ -349,7 +346,7 @@ void nf_ct_helper_log(struct sk_buff *skb, const struct nf_conn *ct, /* Called from the helper function, this call never fails */ help = nfct_help(ct); - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); nf_log_packet(nf_ct_net(ct), nf_ct_l3num(ct), 0, skb, NULL, NULL, NULL, diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index fdfc71f416b7..27540455dc62 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -149,10 +149,7 @@ nla_put_failure: static int ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct) { - long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ; - - if (timeout < 0) - timeout = 0; + long timeout = nf_ct_expires(ct) / HZ; if (nla_put_be32(skb, CTA_TIMEOUT, htonl(timeout))) goto nla_put_failure; @@ -818,14 +815,23 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) struct hlist_nulls_node *n; struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh); u_int8_t l3proto = nfmsg->nfgen_family; - int res; + struct nf_conn *nf_ct_evict[8]; + int res, i; spinlock_t *lockp; last = (struct nf_conn *)cb->args[1]; + i = 0; local_bh_disable(); for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) { restart: + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + lockp = &nf_conntrack_locks[cb->args[0] % CONNTRACK_LOCKS]; nf_conntrack_lock(lockp); if (cb->args[0] >= nf_conntrack_htable_size) { @@ -837,6 +843,13 @@ restart: if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) continue; ct = nf_ct_tuplehash_to_ctrack(h); + if (nf_ct_is_expired(ct)) { + if (i < ARRAY_SIZE(nf_ct_evict) && + atomic_inc_not_zero(&ct->ct_general.use)) + nf_ct_evict[i++] = ct; + continue; + } + if (!net_eq(net, nf_ct_net(ct))) continue; @@ -878,6 +891,13 @@ out: if (last) nf_ct_put(last); + while (i) { + i--; + if (nf_ct_should_gc(nf_ct_evict[i])) + nf_ct_kill(nf_ct_evict[i]); + nf_ct_put(nf_ct_evict[i]); + } + return skb->len; } @@ -1147,9 +1167,7 @@ static int ctnetlink_del_conntrack(struct net *net, struct sock *ctnl, } } - if (del_timer(&ct->timeout)) - nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); - + nf_ct_delete(ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); return 0; @@ -1517,11 +1535,10 @@ static int ctnetlink_change_timeout(struct nf_conn *ct, { u_int32_t timeout = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - if (!del_timer(&ct->timeout)) - return -ETIME; + ct->timeout = nfct_time_stamp + timeout * HZ; - ct->timeout.expires = jiffies + timeout * HZ; - add_timer(&ct->timeout); + if (test_bit(IPS_DYING_BIT, &ct->status)) + return -ETIME; return 0; } @@ -1719,9 +1736,8 @@ ctnetlink_create_conntrack(struct net *net, if (!cda[CTA_TIMEOUT]) goto err1; - ct->timeout.expires = ntohl(nla_get_be32(cda[CTA_TIMEOUT])); - ct->timeout.expires = jiffies + ct->timeout.expires * HZ; + ct->timeout = nfct_time_stamp + ntohl(nla_get_be32(cda[CTA_TIMEOUT])) * HZ; rcu_read_lock(); if (cda[CTA_HELP]) { @@ -1968,13 +1984,9 @@ ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(cpu); - if (nla_put_be32(skb, CTA_STATS_SEARCHED, htonl(st->searched)) || - nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || - nla_put_be32(skb, CTA_STATS_NEW, htonl(st->new)) || + if (nla_put_be32(skb, CTA_STATS_FOUND, htonl(st->found)) || nla_put_be32(skb, CTA_STATS_INVALID, htonl(st->invalid)) || nla_put_be32(skb, CTA_STATS_IGNORE, htonl(st->ignore)) || - nla_put_be32(skb, CTA_STATS_DELETE, htonl(st->delete)) || - nla_put_be32(skb, CTA_STATS_DELETE_LIST, htonl(st->delete_list)) || nla_put_be32(skb, CTA_STATS_INSERT, htonl(st->insert)) || nla_put_be32(skb, CTA_STATS_INSERT_FAILED, htonl(st->insert_failed)) || diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 5588c7ae1ac2..f60a4755d71e 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -157,8 +157,7 @@ static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, pr_debug("setting timeout of conntrack %p to 0\n", sibling); sibling->proto.gre.timeout = 0; sibling->proto.gre.stream_timeout = 0; - if (del_timer(&sibling->timeout)) - sibling->timeout.function((unsigned long)sibling); + nf_ct_kill(sibling); nf_ct_put(sibling); return 1; } else { diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index b65d5864b6d9..8d2c7d8c666a 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -159,54 +159,6 @@ static int kill_l4proto(struct nf_conn *i, void *data) nf_ct_l3num(i) == l4proto->l3proto; } -static struct nf_ip_net *nf_ct_l3proto_net(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - if (l3proto->l3proto == PF_INET) - return &net->ct.nf_ct_proto; - else - return NULL; -} - -static int nf_ct_l3proto_register_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - int err = 0; - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - /* nf_conntrack_l3proto_ipv6 doesn't support sysctl */ - if (in == NULL) - return 0; - -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table != NULL) { - err = nf_ct_register_sysctl(net, - &in->ctl_table_header, - l3proto->ctl_table_path, - in->ctl_table); - if (err < 0) { - kfree(in->ctl_table); - in->ctl_table = NULL; - } - } -#endif - return err; -} - -static void nf_ct_l3proto_unregister_sysctl(struct net *net, - struct nf_conntrack_l3proto *l3proto) -{ - struct nf_ip_net *in = nf_ct_l3proto_net(net, l3proto); - - if (in == NULL) - return; -#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) - if (in->ctl_table_header != NULL) - nf_ct_unregister_sysctl(&in->ctl_table_header, - &in->ctl_table, - 0); -#endif -} - int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) { int ret = 0; @@ -241,7 +193,7 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); int nf_ct_l3proto_pernet_register(struct net *net, struct nf_conntrack_l3proto *proto) { - int ret = 0; + int ret; if (proto->init_net) { ret = proto->init_net(net); @@ -249,7 +201,7 @@ int nf_ct_l3proto_pernet_register(struct net *net, return ret; } - return nf_ct_l3proto_register_sysctl(net, proto); + return 0; } EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); @@ -272,8 +224,6 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); void nf_ct_l3proto_pernet_unregister(struct net *net, struct nf_conntrack_l3proto *proto) { - nf_ct_l3proto_unregister_sysctl(net, proto); - /* Remove all contrack entries for this protocol */ nf_ct_iterate_cleanup(net, kill_l3proto, proto, 0, 0); } @@ -312,26 +262,6 @@ int nf_ct_l4proto_register_sysctl(struct net *net, } } } -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_table != NULL) { - if (err < 0) { - nf_ct_kfree_compat_sysctl_table(pn); - goto out; - } - err = nf_ct_register_sysctl(net, - &pn->ctl_compat_header, - "net/ipv4/netfilter", - pn->ctl_compat_table); - if (err == 0) - goto out; - - nf_ct_kfree_compat_sysctl_table(pn); - nf_ct_unregister_sysctl(&pn->ctl_table_header, - &pn->ctl_table, - pn->users); - } -out: -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ return err; } @@ -346,13 +276,6 @@ void nf_ct_l4proto_unregister_sysctl(struct net *net, nf_ct_unregister_sysctl(&pn->ctl_table_header, &pn->ctl_table, pn->users); - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - if (l4proto->l3proto != AF_INET6 && pn->ctl_compat_header != NULL) - nf_ct_unregister_sysctl(&pn->ctl_compat_header, - &pn->ctl_compat_table, - 0); -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 399a38fd685a..a45bee52dccc 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -402,7 +402,8 @@ static bool dccp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, { struct dccp_hdr _hdr, *dh; - dh = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + dh = skb_header_pointer(skb, dataoff, 4, &_hdr); if (dh == NULL) return false; diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index 86dc752e5349..d5868bad33a7 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -151,17 +151,6 @@ static struct ctl_table generic_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table generic_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_generic_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -179,40 +168,14 @@ static int generic_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int generic_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_generic_net *gn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(generic_compat_sysctl_table, - sizeof(generic_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &gn->timeout; -#endif -#endif - return 0; -} - static int generic_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_generic_net *gn = generic_pernet(net); struct nf_proto_net *pn = &gn->pn; gn->timeout = nf_ct_generic_timeout; - ret = generic_kmemdup_compat_sysctl_table(pn, gn); - if (ret < 0) - return ret; - - ret = generic_kmemdup_sysctl_table(pn, gn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - - return ret; + return generic_kmemdup_sysctl_table(pn, gn); } static struct nf_proto_net *generic_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index a96451a7af20..9a715f88b2f1 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -192,15 +192,15 @@ static bool gre_invert_tuple(struct nf_conntrack_tuple *tuple, static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, struct net *net, struct nf_conntrack_tuple *tuple) { - const struct gre_hdr_pptp *pgrehdr; - struct gre_hdr_pptp _pgrehdr; + const struct pptp_gre_header *pgrehdr; + struct pptp_gre_header _pgrehdr; __be16 srckey; - const struct gre_hdr *grehdr; - struct gre_hdr _grehdr; + const struct gre_base_hdr *grehdr; + struct gre_base_hdr _grehdr; /* first only delinearize old RFC1701 GRE header */ grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { + if (!grehdr || (grehdr->flags & GRE_VERSION) != GRE_VERSION_1) { /* try to behave like "nf_conntrack_proto_generic" */ tuple->src.u.all = 0; tuple->dst.u.all = 0; @@ -212,8 +212,8 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, if (!pgrehdr) return true; - if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { - pr_debug("GRE_VERSION_PPTP but unknown proto\n"); + if (grehdr->protocol != GRE_PROTO_PPP) { + pr_debug("Unsupported GRE proto(0x%x)\n", ntohs(grehdr->protocol)); return false; } diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 1d7ab960a9e6..982ea62606c7 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -161,8 +161,8 @@ static bool sctp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct sctphdr *hp; struct sctphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -705,54 +705,6 @@ static struct ctl_table sctp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table sctp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_sctp_timeout_closed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_cookie_echoed", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_recd", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -781,32 +733,8 @@ static int sctp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int sctp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct sctp_net *sn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(sctp_compat_sysctl_table, - sizeof(sctp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &sn->timeouts[SCTP_CONNTRACK_CLOSED]; - pn->ctl_compat_table[1].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_WAIT]; - pn->ctl_compat_table[2].data = &sn->timeouts[SCTP_CONNTRACK_COOKIE_ECHOED]; - pn->ctl_compat_table[3].data = &sn->timeouts[SCTP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_SENT]; - pn->ctl_compat_table[5].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_RECD]; - pn->ctl_compat_table[6].data = &sn->timeouts[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT]; -#endif -#endif - return 0; -} - static int sctp_init_net(struct net *net, u_int16_t proto) { - int ret; struct sctp_net *sn = sctp_pernet(net); struct nf_proto_net *pn = &sn->pn; @@ -817,18 +745,7 @@ static int sctp_init_net(struct net *net, u_int16_t proto) sn->timeouts[i] = sctp_timeouts[i]; } - if (proto == AF_INET) { - ret = sctp_kmemdup_compat_sysctl_table(pn, sn); - if (ret < 0) - return ret; - - ret = sctp_kmemdup_sysctl_table(pn, sn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = sctp_kmemdup_sysctl_table(pn, sn); - - return ret; + return sctp_kmemdup_sysctl_table(pn, sn); } static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 70c8381641a7..69f687740c76 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -282,8 +282,8 @@ static bool tcp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, const struct tcphdr *hp; struct tcphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -1481,90 +1481,6 @@ static struct ctl_table tcp_sysctl_table[] = { }, { } }; - -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table tcp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_tcp_timeout_syn_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_sent2", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_syn_recv", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_established", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_fin_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_last_ack", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_time_wait", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_close", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_timeout_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_tcp_loose", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_be_liberal", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "ip_conntrack_tcp_max_retrans", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -1597,38 +1513,8 @@ static int tcp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int tcp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_tcp_net *tn) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(tcp_compat_sysctl_table, - sizeof(tcp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT]; - pn->ctl_compat_table[1].data = &tn->timeouts[TCP_CONNTRACK_SYN_SENT2]; - pn->ctl_compat_table[2].data = &tn->timeouts[TCP_CONNTRACK_SYN_RECV]; - pn->ctl_compat_table[3].data = &tn->timeouts[TCP_CONNTRACK_ESTABLISHED]; - pn->ctl_compat_table[4].data = &tn->timeouts[TCP_CONNTRACK_FIN_WAIT]; - pn->ctl_compat_table[5].data = &tn->timeouts[TCP_CONNTRACK_CLOSE_WAIT]; - pn->ctl_compat_table[6].data = &tn->timeouts[TCP_CONNTRACK_LAST_ACK]; - pn->ctl_compat_table[7].data = &tn->timeouts[TCP_CONNTRACK_TIME_WAIT]; - pn->ctl_compat_table[8].data = &tn->timeouts[TCP_CONNTRACK_CLOSE]; - pn->ctl_compat_table[9].data = &tn->timeouts[TCP_CONNTRACK_RETRANS]; - pn->ctl_compat_table[10].data = &tn->tcp_loose; - pn->ctl_compat_table[11].data = &tn->tcp_be_liberal; - pn->ctl_compat_table[12].data = &tn->tcp_max_retrans; -#endif -#endif - return 0; -} - static int tcp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_tcp_net *tn = tcp_pernet(net); struct nf_proto_net *pn = &tn->pn; @@ -1643,18 +1529,7 @@ static int tcp_init_net(struct net *net, u_int16_t proto) tn->tcp_max_retrans = nf_ct_tcp_max_retrans; } - if (proto == AF_INET) { - ret = tcp_kmemdup_compat_sysctl_table(pn, tn); - if (ret < 0) - return ret; - - ret = tcp_kmemdup_sysctl_table(pn, tn); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = tcp_kmemdup_sysctl_table(pn, tn); - - return ret; + return tcp_kmemdup_sysctl_table(pn, tn); } static struct nf_proto_net *tcp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 4fd040575ffe..20f35ed68030 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -44,8 +44,8 @@ static bool udp_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; @@ -218,23 +218,6 @@ static struct ctl_table udp_sysctl_table[] = { }, { } }; -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT -static struct ctl_table udp_compat_sysctl_table[] = { - { - .procname = "ip_conntrack_udp_timeout", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { - .procname = "ip_conntrack_udp_timeout_stream", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - { } -}; -#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ #endif /* CONFIG_SYSCTL */ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, @@ -254,27 +237,8 @@ static int udp_kmemdup_sysctl_table(struct nf_proto_net *pn, return 0; } -static int udp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, - struct nf_udp_net *un) -{ -#ifdef CONFIG_SYSCTL -#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT - pn->ctl_compat_table = kmemdup(udp_compat_sysctl_table, - sizeof(udp_compat_sysctl_table), - GFP_KERNEL); - if (!pn->ctl_compat_table) - return -ENOMEM; - - pn->ctl_compat_table[0].data = &un->timeouts[UDP_CT_UNREPLIED]; - pn->ctl_compat_table[1].data = &un->timeouts[UDP_CT_REPLIED]; -#endif -#endif - return 0; -} - static int udp_init_net(struct net *net, u_int16_t proto) { - int ret; struct nf_udp_net *un = udp_pernet(net); struct nf_proto_net *pn = &un->pn; @@ -285,18 +249,7 @@ static int udp_init_net(struct net *net, u_int16_t proto) un->timeouts[i] = udp_timeouts[i]; } - if (proto == AF_INET) { - ret = udp_kmemdup_compat_sysctl_table(pn, un); - if (ret < 0) - return ret; - - ret = udp_kmemdup_sysctl_table(pn, un); - if (ret < 0) - nf_ct_kfree_compat_sysctl_table(pn); - } else - ret = udp_kmemdup_sysctl_table(pn, un); - - return ret; + return udp_kmemdup_sysctl_table(pn, un); } static struct nf_proto_net *udp_get_net_proto(struct net *net) diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 9d692f5adb94..029206e8dec4 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -54,7 +54,8 @@ static bool udplite_pkt_to_tuple(const struct sk_buff *skb, const struct udphdr *hp; struct udphdr _hdr; - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); + /* Actually only need first 4 bytes to get ports. */ + hp = skb_header_pointer(skb, dataoff, 4, &_hdr); if (hp == NULL) return false; diff --git a/net/netfilter/nf_conntrack_seqadj.c b/net/netfilter/nf_conntrack_seqadj.c index dff0f0cc59e4..ef7063eced7c 100644 --- a/net/netfilter/nf_conntrack_seqadj.c +++ b/net/netfilter/nf_conntrack_seqadj.c @@ -169,7 +169,7 @@ int nf_ct_seq_adjust(struct sk_buff *skb, s32 seqoff, ackoff; struct nf_conn_seqadj *seqadj = nfct_seqadj(ct); struct nf_ct_seqadj *this_way, *other_way; - int res; + int res = 1; this_way = &seqadj->seq[dir]; other_way = &seqadj->seq[!dir]; @@ -184,27 +184,31 @@ int nf_ct_seq_adjust(struct sk_buff *skb, else seqoff = this_way->offset_before; + newseq = htonl(ntohl(tcph->seq) + seqoff); + inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); + pr_debug("Adjusting sequence number from %u->%u\n", + ntohl(tcph->seq), ntohl(newseq)); + tcph->seq = newseq; + + if (!tcph->ack) + goto out; + if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) ackoff = other_way->offset_after; else ackoff = other_way->offset_before; - newseq = htonl(ntohl(tcph->seq) + seqoff); newack = htonl(ntohl(tcph->ack_seq) - ackoff); - - inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, false); inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, false); - - pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n", + pr_debug("Adjusting ack number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), ntohl(newack)); - - tcph->seq = newseq; tcph->ack_seq = newack; res = nf_ct_sack_adjust(skb, protoff, tcph, ct, ctinfo); +out: spin_unlock_bh(&ct->lock); return res; diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 7d77217de6a3..621b81c7bddc 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -83,9 +83,10 @@ static int digits_len(const struct nf_conn *ct, const char *dptr, static int iswordc(const char c) { if (isalnum(c) || c == '!' || c == '"' || c == '%' || - (c >= '(' && c <= '/') || c == ':' || c == '<' || c == '>' || + (c >= '(' && c <= '+') || c == ':' || c == '<' || c == '>' || c == '?' || (c >= '[' && c <= ']') || c == '_' || c == '`' || - c == '{' || c == '}' || c == '~') + c == '{' || c == '}' || c == '~' || (c >= '-' && c <= '/') || + c == '\'') return 1; return 0; } @@ -329,13 +330,12 @@ static const char *sip_follow_continuation(const char *dptr, const char *limit) static const char *sip_skip_whitespace(const char *dptr, const char *limit) { for (; dptr < limit; dptr++) { - if (*dptr == ' ') + if (*dptr == ' ' || *dptr == '\t') continue; if (*dptr != '\r' && *dptr != '\n') break; dptr = sip_follow_continuation(dptr, limit); - if (dptr == NULL) - return NULL; + break; } return dptr; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9f267c3ffb39..5f446cd9f3fd 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -212,6 +212,11 @@ static int ct_seq_show(struct seq_file *s, void *v) if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) return 0; + if (nf_ct_should_gc(ct)) { + nf_ct_kill(ct); + goto release; + } + /* we only want to print DIR_ORIGINAL */ if (NF_CT_DIRECTION(hash)) goto release; @@ -228,8 +233,7 @@ static int ct_seq_show(struct seq_file *s, void *v) seq_printf(s, "%-8s %u %-8s %u %ld ", l3proto->name, nf_ct_l3num(ct), l4proto->name, nf_ct_protonum(ct), - timer_pending(&ct->timeout) - ? (long)(ct->timeout.expires - jiffies)/HZ : 0); + nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) l4proto->print_conntrack(s, ct); @@ -353,13 +357,13 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", nr_conntracks, - st->searched, + 0, st->found, - st->new, + 0, st->invalid, st->ignore, - st->delete, - st->delete_list, + 0, + 0, st->insert, st->insert_failed, st->drop, diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index 065522564ac6..e0adb5959342 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -13,13 +13,13 @@ /* core.c */ -unsigned int nf_iterate(struct list_head *head, struct sk_buff *skb, - struct nf_hook_state *state, struct nf_hook_ops **elemp); +unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, + struct nf_hook_entry **entryp); /* nf_queue.c */ -int nf_queue(struct sk_buff *skb, struct nf_hook_ops *elem, - struct nf_hook_state *state, unsigned int queuenum); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops); +int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, + unsigned int queuenum); +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry); int __init netfilter_queue_init(void); /* nf_log.c */ diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index aa5847a16713..30a17d649a83 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -39,12 +39,12 @@ static struct nf_logger *__find_logger(int pf, const char *str_logger) return NULL; } -void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) +int nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) { const struct nf_logger *log; - if (pf == NFPROTO_UNSPEC) - return; + if (pf == NFPROTO_UNSPEC || pf >= ARRAY_SIZE(net->nf.nf_loggers)) + return -EOPNOTSUPP; mutex_lock(&nf_log_mutex); log = nft_log_dereference(net->nf.nf_loggers[pf]); @@ -52,6 +52,8 @@ void nf_log_set(struct net *net, u_int8_t pf, const struct nf_logger *logger) rcu_assign_pointer(net->nf.nf_loggers[pf], logger); mutex_unlock(&nf_log_mutex); + + return 0; } EXPORT_SYMBOL(nf_log_set); diff --git a/net/netfilter/nf_log_common.c b/net/netfilter/nf_log_common.c index a5aa5967b8e1..119fe1cb1ea9 100644 --- a/net/netfilter/nf_log_common.c +++ b/net/netfilter/nf_log_common.c @@ -77,7 +77,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, nf_log_buf_add(m, "SPT=%u DPT=%u ", ntohs(th->source), ntohs(th->dest)); /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & XT_LOG_TCPSEQ) { + if (logflags & NF_LOG_TCPSEQ) { nf_log_buf_add(m, "SEQ=%u ACK=%u ", ntohl(th->seq), ntohl(th->ack_seq)); } @@ -107,7 +107,7 @@ int nf_log_dump_tcp_header(struct nf_log_buf *m, const struct sk_buff *skb, /* Max length: 11 "URGP=65535 " */ nf_log_buf_add(m, "URGP=%u ", ntohs(th->urg_ptr)); - if ((logflags & XT_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { + if ((logflags & NF_LOG_TCPOPT) && th->doff*4 > sizeof(struct tcphdr)) { u_int8_t _opt[60 - sizeof(struct tcphdr)]; const u_int8_t *op; unsigned int i; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index ecee105bbada..bbb8f3df79f7 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -566,16 +566,10 @@ static int nf_nat_proto_clean(struct nf_conn *ct, void *data) * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - if (!del_timer(&ct->timeout)) - return 1; - ct->status &= ~IPS_NAT_DONE_MASK; - rhashtable_remove_fast(&nf_nat_bysource_table, &ct->nat_bysource, nf_nat_bysource_params); - add_timer(&ct->timeout); - /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. */ diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index b19ad20a705c..96964a0070e1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -96,14 +96,14 @@ void nf_queue_entry_get_refs(struct nf_queue_entry *entry) } EXPORT_SYMBOL_GPL(nf_queue_entry_get_refs); -void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) +void nf_queue_nf_hook_drop(struct net *net, const struct nf_hook_entry *entry) { const struct nf_queue_handler *qh; rcu_read_lock(); qh = rcu_dereference(net->nf.queue_handler); if (qh) - qh->nf_hook_drop(net, ops); + qh->nf_hook_drop(net, entry); rcu_read_unlock(); } @@ -112,7 +112,6 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) * through nf_reinject(). */ int nf_queue(struct sk_buff *skb, - struct nf_hook_ops *elem, struct nf_hook_state *state, unsigned int queuenum) { @@ -141,7 +140,6 @@ int nf_queue(struct sk_buff *skb, *entry = (struct nf_queue_entry) { .skb = skb, - .elem = elem, .state = *state, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -165,11 +163,15 @@ err: void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { + struct nf_hook_entry *hook_entry; struct sk_buff *skb = entry->skb; - struct nf_hook_ops *elem = entry->elem; const struct nf_afinfo *afinfo; + struct nf_hook_ops *elem; int err; + hook_entry = rcu_dereference(entry->state.hook_entries); + elem = &hook_entry->ops; + nf_queue_entry_release_refs(entry); /* Continue traversal iff userspace said ok... */ @@ -186,8 +188,7 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) if (verdict == NF_ACCEPT) { next_hook: - verdict = nf_iterate(entry->state.hook_list, - skb, &entry->state, &elem); + verdict = nf_iterate(skb, &entry->state, &hook_entry); } switch (verdict & NF_VERDICT_MASK) { @@ -198,7 +199,8 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, elem, &entry->state, + RCU_INIT_POINTER(entry->state.hook_entries, hook_entry); + err = nf_queue(skb, &entry->state, verdict >> NF_VERDICT_QBITS); if (err < 0) { if (err == -ESRCH && diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7e1c876c7608..b70d3ea1430e 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1196,6 +1196,83 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) } } +struct nft_chain_hook { + u32 num; + u32 priority; + const struct nf_chain_type *type; + struct net_device *dev; +}; + +static int nft_chain_parse_hook(struct net *net, + const struct nlattr * const nla[], + struct nft_af_info *afi, + struct nft_chain_hook *hook, bool create) +{ + struct nlattr *ha[NFTA_HOOK_MAX + 1]; + const struct nf_chain_type *type; + struct net_device *dev; + int err; + + err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], + nft_hook_policy); + if (err < 0) + return err; + + if (ha[NFTA_HOOK_HOOKNUM] == NULL || + ha[NFTA_HOOK_PRIORITY] == NULL) + return -EINVAL; + + hook->num = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); + if (hook->num >= afi->nhooks) + return -EINVAL; + + hook->priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); + + type = chain_type[afi->family][NFT_CHAIN_T_DEFAULT]; + if (nla[NFTA_CHAIN_TYPE]) { + type = nf_tables_chain_type_lookup(afi, nla[NFTA_CHAIN_TYPE], + create); + if (IS_ERR(type)) + return PTR_ERR(type); + } + if (!(type->hook_mask & (1 << hook->num))) + return -EOPNOTSUPP; + if (!try_module_get(type->owner)) + return -ENOENT; + + hook->type = type; + + hook->dev = NULL; + if (afi->flags & NFT_AF_NEEDS_DEV) { + char ifname[IFNAMSIZ]; + + if (!ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); + dev = dev_get_by_name(net, ifname); + if (!dev) { + module_put(type->owner); + return -ENOENT; + } + hook->dev = dev; + } else if (ha[NFTA_HOOK_DEV]) { + module_put(type->owner); + return -EOPNOTSUPP; + } + + return 0; +} + +static void nft_chain_release_hook(struct nft_chain_hook *hook) +{ + module_put(hook->type->owner); + if (hook->dev != NULL) + dev_put(hook->dev); +} + static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[]) @@ -1206,10 +1283,8 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_chain *chain; struct nft_base_chain *basechain = NULL; - struct nlattr *ha[NFTA_HOOK_MAX + 1]; u8 genmask = nft_genmask_next(net); int family = nfmsg->nfgen_family; - struct net_device *dev = NULL; u8 policy = NF_ACCEPT; u64 handle = 0; unsigned int i; @@ -1273,6 +1348,37 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (nlh->nlmsg_flags & NLM_F_REPLACE) return -EOPNOTSUPP; + if (nla[NFTA_CHAIN_HOOK]) { + struct nft_base_chain *basechain; + struct nft_chain_hook hook; + struct nf_hook_ops *ops; + + if (!(chain->flags & NFT_BASE_CHAIN)) + return -EBUSY; + + err = nft_chain_parse_hook(net, nla, afi, &hook, + create); + if (err < 0) + return err; + + basechain = nft_base_chain(chain); + if (basechain->type != hook.type) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + } + nft_chain_release_hook(&hook); + } + if (nla[NFTA_CHAIN_HANDLE] && name) { struct nft_chain *chain2; @@ -1320,102 +1426,53 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { - const struct nf_chain_type *type; + struct nft_chain_hook hook; struct nf_hook_ops *ops; nf_hookfn *hookfn; - u32 hooknum, priority; - type = chain_type[family][NFT_CHAIN_T_DEFAULT]; - if (nla[NFTA_CHAIN_TYPE]) { - type = nf_tables_chain_type_lookup(afi, - nla[NFTA_CHAIN_TYPE], - create); - if (IS_ERR(type)) - return PTR_ERR(type); - } - - err = nla_parse_nested(ha, NFTA_HOOK_MAX, nla[NFTA_CHAIN_HOOK], - nft_hook_policy); + err = nft_chain_parse_hook(net, nla, afi, &hook, create); if (err < 0) return err; - if (ha[NFTA_HOOK_HOOKNUM] == NULL || - ha[NFTA_HOOK_PRIORITY] == NULL) - return -EINVAL; - - hooknum = ntohl(nla_get_be32(ha[NFTA_HOOK_HOOKNUM])); - if (hooknum >= afi->nhooks) - return -EINVAL; - priority = ntohl(nla_get_be32(ha[NFTA_HOOK_PRIORITY])); - - if (!(type->hook_mask & (1 << hooknum))) - return -EOPNOTSUPP; - if (!try_module_get(type->owner)) - return -ENOENT; - hookfn = type->hooks[hooknum]; - - if (afi->flags & NFT_AF_NEEDS_DEV) { - char ifname[IFNAMSIZ]; - - if (!ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } - - nla_strlcpy(ifname, ha[NFTA_HOOK_DEV], IFNAMSIZ); - dev = dev_get_by_name(net, ifname); - if (!dev) { - module_put(type->owner); - return -ENOENT; - } - } else if (ha[NFTA_HOOK_DEV]) { - module_put(type->owner); - return -EOPNOTSUPP; - } basechain = kzalloc(sizeof(*basechain), GFP_KERNEL); if (basechain == NULL) { - module_put(type->owner); - if (dev != NULL) - dev_put(dev); + nft_chain_release_hook(&hook); return -ENOMEM; } - if (dev != NULL) - strncpy(basechain->dev_name, dev->name, IFNAMSIZ); + if (hook.dev != NULL) + strncpy(basechain->dev_name, hook.dev->name, IFNAMSIZ); if (nla[NFTA_CHAIN_COUNTERS]) { stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); if (IS_ERR(stats)) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return PTR_ERR(stats); } basechain->stats = stats; } else { stats = netdev_alloc_pcpu_stats(struct nft_stats); if (stats == NULL) { - module_put(type->owner); + nft_chain_release_hook(&hook); kfree(basechain); - if (dev != NULL) - dev_put(dev); return -ENOMEM; } rcu_assign_pointer(basechain->stats, stats); } - basechain->type = type; + hookfn = hook.type->hooks[hook.num]; + basechain->type = hook.type; chain = &basechain->chain; for (i = 0; i < afi->nops; i++) { ops = &basechain->ops[i]; ops->pf = family; - ops->hooknum = hooknum; - ops->priority = priority; + ops->hooknum = hook.num; + ops->priority = hook.priority; ops->priv = chain; ops->hook = afi->hooks[ops->hooknum]; - ops->dev = dev; + ops->dev = hook.dev; if (hookfn) ops->hook = hookfn; if (afi->hook_ops_init) @@ -3426,12 +3483,12 @@ static int nft_setelem_parse_flags(const struct nft_set *set, } static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, - const struct nlattr *attr) + const struct nlattr *attr, u32 nlmsg_flags) { struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; struct nft_data_desc d1, d2; struct nft_set_ext_tmpl tmpl; - struct nft_set_ext *ext; + struct nft_set_ext *ext, *ext2; struct nft_set_elem elem; struct nft_set_binding *binding; struct nft_userdata *udata; @@ -3558,9 +3615,19 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, goto err4; ext->genmask = nft_genmask_cur(ctx->net) | NFT_SET_ELEM_BUSY_MASK; - err = set->ops->insert(ctx->net, set, &elem); - if (err < 0) + err = set->ops->insert(ctx->net, set, &elem, &ext2); + if (err) { + if (err == -EEXIST) { + if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && + nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && + memcmp(nft_set_ext_data(ext), + nft_set_ext_data(ext2), set->dlen) != 0) + err = -EBUSY; + else if (!(nlmsg_flags & NLM_F_EXCL)) + err = 0; + } goto err5; + } nft_trans_elem(trans) = elem; list_add_tail(&trans->list, &ctx->net->nft.commit_list); @@ -3616,7 +3683,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) return -ENFILE; - err = nft_add_set_elem(&ctx, set, attr); + err = nft_add_set_elem(&ctx, set, attr, nlh->nlmsg_flags); if (err < 0) { atomic_dec(&set->nelems); break; @@ -4342,6 +4409,31 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, return 0; } +/** + * nft_parse_u32_check - fetch u32 attribute and check for maximum value + * + * @attr: netlink attribute to fetch value from + * @max: maximum value to be stored in dest + * @dest: pointer to the variable + * + * Parse, check and store a given u32 netlink attribute into variable. + * This function returns -ERANGE if the value goes over maximum value. + * Otherwise a 0 is returned and the attribute value is stored in the + * destination variable. + */ +unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest) +{ + int val; + + val = ntohl(nla_get_be32(attr)); + if (val > max) + return -ERANGE; + + *dest = val; + return 0; +} +EXPORT_SYMBOL_GPL(nft_parse_u32_check); + /** * nft_parse_register - parse a register value from a netlink attribute * diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index fb8b5892b5ff..0dd5c695482f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -34,7 +34,7 @@ static struct nf_loginfo trace_loginfo = { .u = { .log = { .level = LOGLEVEL_WARNING, - .logflags = NF_LOG_MASK, + .logflags = NF_LOG_DEFAULT_MASK, }, }, }; @@ -93,12 +93,15 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) ptr = skb_network_header(skb); - else + else { + if (!pkt->tprot_set) + return false; ptr = skb_network_header(skb) + pkt->xt.thoff; + } ptr += priv->offset; - if (unlikely(ptr + priv->len >= skb_tail_pointer(skb))) + if (unlikely(ptr + priv->len > skb_tail_pointer(skb))) return false; *dest = 0; @@ -260,8 +263,13 @@ int __init nf_tables_core_module_init(void) if (err < 0) goto err7; - return 0; + err = nft_range_module_init(); + if (err < 0) + goto err8; + return 0; +err8: + nft_dynset_module_exit(); err7: nft_payload_module_exit(); err6: diff --git a/net/netfilter/nf_tables_inet.c b/net/netfilter/nf_tables_inet.c index 6b5f76295d3d..f713cc205669 100644 --- a/net/netfilter/nf_tables_inet.c +++ b/net/netfilter/nf_tables_inet.c @@ -82,7 +82,10 @@ static int __init nf_tables_inet_init(void) { int ret; - nft_register_chain_type(&filter_inet); + ret = nft_register_chain_type(&filter_inet); + if (ret < 0) + return ret; + ret = register_pernet_subsys(&nf_tables_inet_net_ops); if (ret < 0) nft_unregister_chain_type(&filter_inet); diff --git a/net/netfilter/nf_tables_netdev.c b/net/netfilter/nf_tables_netdev.c index 75d696f11045..9e2ae424b640 100644 --- a/net/netfilter/nf_tables_netdev.c +++ b/net/netfilter/nf_tables_netdev.c @@ -15,78 +15,6 @@ #include #include -static inline void -nft_netdev_set_pktinfo_ipv4(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - struct iphdr *iph, _iph; - u32 len, thoff; - - nft_set_pktinfo(pkt, skb, state); - - iph = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*iph), - &_iph); - if (!iph) - return; - - if (iph->ihl < 5 || iph->version != 4) - return; - - len = ntohs(iph->tot_len); - thoff = iph->ihl * 4; - if (skb->len < len) - return; - else if (len < thoff) - return; - - pkt->tprot = iph->protocol; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = ntohs(iph->frag_off) & IP_OFFSET; -} - -static inline void -__nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ -#if IS_ENABLED(CONFIG_IPV6) - struct ipv6hdr *ip6h, _ip6h; - unsigned int thoff = 0; - unsigned short frag_off; - int protohdr; - u32 pkt_len; - - ip6h = skb_header_pointer(skb, skb_network_offset(skb), sizeof(*ip6h), - &_ip6h); - if (!ip6h) - return; - - if (ip6h->version != 6) - return; - - pkt_len = ntohs(ip6h->payload_len); - if (pkt_len + sizeof(*ip6h) > skb->len) - return; - - protohdr = ipv6_find_hdr(pkt->skb, &thoff, -1, &frag_off, NULL); - if (protohdr < 0) - return; - - pkt->tprot = protohdr; - pkt->xt.thoff = thoff; - pkt->xt.fragoff = frag_off; -#endif -} - -static inline void nft_netdev_set_pktinfo_ipv6(struct nft_pktinfo *pkt, - struct sk_buff *skb, - const struct nf_hook_state *state) -{ - nft_set_pktinfo(pkt, skb, state); - __nft_netdev_set_pktinfo_ipv6(pkt, skb, state); -} - static unsigned int nft_do_chain_netdev(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) @@ -95,13 +23,13 @@ nft_do_chain_netdev(void *priv, struct sk_buff *skb, switch (skb->protocol) { case htons(ETH_P_IP): - nft_netdev_set_pktinfo_ipv4(&pkt, skb, state); + nft_set_pktinfo_ipv4_validate(&pkt, skb, state); break; case htons(ETH_P_IPV6): - nft_netdev_set_pktinfo_ipv6(&pkt, skb, state); + nft_set_pktinfo_ipv6_validate(&pkt, skb, state); break; default: - nft_set_pktinfo(&pkt, skb, state); + nft_set_pktinfo_unspec(&pkt, skb, state); break; } @@ -221,14 +149,25 @@ static int __init nf_tables_netdev_init(void) { int ret; - nft_register_chain_type(&nft_filter_chain_netdev); - ret = register_pernet_subsys(&nf_tables_netdev_net_ops); - if (ret < 0) { - nft_unregister_chain_type(&nft_filter_chain_netdev); + ret = nft_register_chain_type(&nft_filter_chain_netdev); + if (ret) return ret; - } - register_netdevice_notifier(&nf_tables_netdev_notifier); + + ret = register_pernet_subsys(&nf_tables_netdev_net_ops); + if (ret) + goto err1; + + ret = register_netdevice_notifier(&nf_tables_netdev_notifier); + if (ret) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&nf_tables_netdev_net_ops); +err1: + nft_unregister_chain_type(&nft_filter_chain_netdev); + return ret; } static void __exit nf_tables_netdev_exit(void) diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index fa24a5b398b1..ab695f8e2d29 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -113,20 +113,22 @@ static int nf_trace_fill_pkt_info(struct sk_buff *nlskb, const struct nft_pktinfo *pkt) { const struct sk_buff *skb = pkt->skb; - unsigned int len = min_t(unsigned int, - pkt->xt.thoff - skb_network_offset(skb), - NFT_TRACETYPE_NETWORK_HSIZE); int off = skb_network_offset(skb); + unsigned int len, nh_end; + nh_end = pkt->tprot_set ? pkt->xt.thoff : skb->len; + len = min_t(unsigned int, nh_end - skb_network_offset(skb), + NFT_TRACETYPE_NETWORK_HSIZE); if (trace_fill_header(nlskb, NFTA_TRACE_NETWORK_HEADER, skb, off, len)) return -1; - len = min_t(unsigned int, skb->len - pkt->xt.thoff, - NFT_TRACETYPE_TRANSPORT_HSIZE); - - if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, - pkt->xt.thoff, len)) - return -1; + if (pkt->tprot_set) { + len = min_t(unsigned int, skb->len - pkt->xt.thoff, + NFT_TRACETYPE_TRANSPORT_HSIZE); + if (trace_fill_header(nlskb, NFTA_TRACE_TRANSPORT_HEADER, skb, + pkt->xt.thoff, len)) + return -1; + } if (!skb_mac_header_was_set(skb)) return 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index e924e95fcc7f..3b79f34b5095 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -43,7 +43,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, if (help == NULL) return NF_DROP; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(help->helper); if (helper == NULL) return NF_DROP; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 6577db524ef6..eb086a192c5a 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -442,7 +442,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSINDEV, htonl(indev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_INDEV, htonl(br_port_get_rcu(indev)->br->dev->ifindex))) goto nla_put_failure; @@ -477,7 +479,9 @@ __build_packet_message(struct nfnl_log_net *log, if (nla_put_be32(inst->skb, NFULA_IFINDEX_PHYSOUTDEV, htonl(outdev->ifindex)) || /* this is the bridge group "brX" */ - /* rcu_read_lock()ed by nf_hook_slow or nf_log_packet */ + /* rcu_read_lock()ed by nf_hook_thresh or + * nf_log_packet. + */ nla_put_be32(inst->skb, NFULA_IFINDEX_OUTDEV, htonl(br_port_get_rcu(outdev)->br->dev->ifindex))) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index f49f45081acb..af832c526048 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -740,7 +740,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) struct net *net = entry->state.net; struct nfnl_queue_net *q = nfnl_queue_pernet(net); - /* rcu_read_lock()ed by nf_hook_slow() */ + /* rcu_read_lock()ed by nf_hook_thresh */ queue = instance_lookup(q, queuenum); if (!queue) return -ESRCH; @@ -917,12 +917,14 @@ static struct notifier_block nfqnl_dev_notifier = { .notifier_call = nfqnl_rcv_dev_event, }; -static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long ops_ptr) +static int nf_hook_cmp(struct nf_queue_entry *entry, unsigned long entry_ptr) { - return entry->elem == (struct nf_hook_ops *)ops_ptr; + return rcu_access_pointer(entry->state.hook_entries) == + (struct nf_hook_entry *)entry_ptr; } -static void nfqnl_nf_hook_drop(struct net *net, struct nf_hook_ops *hook) +static void nfqnl_nf_hook_drop(struct net *net, + const struct nf_hook_entry *hook) { struct nfnl_queue_net *q = nfnl_queue_pernet(net); int i; @@ -1522,9 +1524,16 @@ static int __init nfnetlink_queue_init(void) goto cleanup_netlink_notifier; } - register_netdevice_notifier(&nfqnl_dev_notifier); + status = register_netdevice_notifier(&nfqnl_dev_notifier); + if (status < 0) { + pr_err("nf_queue: failed to register netdevice notifier\n"); + goto cleanup_netlink_subsys; + } + return status; +cleanup_netlink_subsys: + nfnetlink_subsys_unregister(&nfqnl_subsys); cleanup_netlink_notifier: netlink_unregister_notifier(&nfqnl_rtnl_notifier); unregister_pernet_subsys(&nfnl_queue_net_ops); diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d71cc18fa35d..31c15ed2e5fc 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -52,6 +52,7 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, { struct nft_bitwise *priv = nft_expr_priv(expr); struct nft_data_desc d1, d2; + u32 len; int err; if (tb[NFTA_BITWISE_SREG] == NULL || @@ -61,7 +62,12 @@ static int nft_bitwise_init(const struct nft_ctx *ctx, tb[NFTA_BITWISE_XOR] == NULL) return -EINVAL; - priv->len = ntohl(nla_get_be32(tb[NFTA_BITWISE_LEN])); + err = nft_parse_u32_check(tb[NFTA_BITWISE_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_BITWISE_SREG]); err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index b78c28ba465f..ee63d981268d 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -99,6 +99,7 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_byteorder *priv = nft_expr_priv(expr); + u32 size, len; int err; if (tb[NFTA_BYTEORDER_SREG] == NULL || @@ -117,7 +118,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, return -EINVAL; } - priv->size = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_SIZE])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_SIZE], U8_MAX, &size); + if (err < 0) + return err; + + priv->size = size; + switch (priv->size) { case 2: case 4: @@ -128,7 +134,12 @@ static int nft_byteorder_init(const struct nft_ctx *ctx, } priv->sreg = nft_parse_register(tb[NFTA_BYTEORDER_SREG]); - priv->len = ntohl(nla_get_be32(tb[NFTA_BYTEORDER_LEN])); + err = nft_parse_u32_check(tb[NFTA_BYTEORDER_LEN], U8_MAX, &len); + if (err < 0) + return err; + + priv->len = len; + err = nft_validate_register_load(priv->sreg, priv->len); if (err < 0) return err; diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index e25b35d70e4d..2e53739812b1 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -84,6 +84,9 @@ static int nft_cmp_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (err < 0) return err; + if (desc.len > U8_MAX) + return -ERANGE; + priv->op = ntohl(nla_get_be32(tb[NFTA_CMP_OP])); priv->len = desc.len; return 0; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 51e180f2a003..d7b0d171172a 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -128,15 +128,18 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, &count, sizeof(count)); return; } + case NFT_CT_L3PROTOCOL: + *dest = nf_ct_l3num(ct); + return; + case NFT_CT_PROTOCOL: + *dest = nf_ct_protonum(ct); + return; default: break; } tuple = &ct->tuplehash[priv->dir].tuple; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - *dest = nf_ct_l3num(ct); - return; case NFT_CT_SRC: memcpy(dest, tuple->src.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); @@ -145,9 +148,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, memcpy(dest, tuple->dst.u3.all, nf_ct_l3num(ct) == NFPROTO_IPV4 ? 4 : 16); return; - case NFT_CT_PROTOCOL: - *dest = nf_ct_protonum(ct); - return; case NFT_CT_PROTO_SRC: *dest = (__force __u16)tuple->src.u.all; return; @@ -283,8 +283,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, case NFT_CT_L3PROTOCOL: case NFT_CT_PROTOCOL: - if (tb[NFTA_CT_DIRECTION] == NULL) - return -EINVAL; + /* For compatibility, do not report error if NFTA_CT_DIRECTION + * attribute is specified. + */ len = sizeof(u8); break; case NFT_CT_SRC: @@ -363,6 +364,8 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, switch (priv->key) { #ifdef CONFIG_NF_CONNTRACK_MARK case NFT_CT_MARK: + if (tb[NFTA_CT_DIRECTION]) + return -EINVAL; len = FIELD_SIZEOF(struct nf_conn, mark); break; #endif @@ -432,8 +435,6 @@ static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; switch (priv->key) { - case NFT_CT_L3PROTOCOL: - case NFT_CT_PROTOCOL: case NFT_CT_SRC: case NFT_CT_DST: case NFT_CT_PROTO_SRC: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 0af26699bf04..e3b83c31da2e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -22,6 +22,7 @@ struct nft_dynset { enum nft_dynset_ops op:8; enum nft_registers sreg_key:8; enum nft_registers sreg_data:8; + bool invert; u64 timeout; struct nft_expr *expr; struct nft_set_binding binding; @@ -82,10 +83,14 @@ static void nft_dynset_eval(const struct nft_expr *expr, if (sexpr != NULL) sexpr->ops->eval(sexpr, regs, pkt); + + if (priv->invert) + regs->verdict.code = NFT_BREAK; return; } out: - regs->verdict.code = NFT_BREAK; + if (!priv->invert) + regs->verdict.code = NFT_BREAK; } static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { @@ -96,6 +101,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, + [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, }; static int nft_dynset_init(const struct nft_ctx *ctx, @@ -113,6 +119,15 @@ static int nft_dynset_init(const struct nft_ctx *ctx, tb[NFTA_DYNSET_SREG_KEY] == NULL) return -EINVAL; + if (tb[NFTA_DYNSET_FLAGS]) { + u32 flags = ntohl(nla_get_be32(tb[NFTA_DYNSET_FLAGS])); + + if (flags & ~NFT_DYNSET_F_INV) + return -EINVAL; + if (flags & NFT_DYNSET_F_INV) + priv->invert = true; + } + set = nf_tables_set_lookup(ctx->table, tb[NFTA_DYNSET_SET_NAME], genmask); if (IS_ERR(set)) { @@ -220,6 +235,7 @@ static void nft_dynset_destroy(const struct nft_ctx *ctx, static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_dynset *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_DYNSET_F_INV : 0; if (nft_dump_register(skb, NFTA_DYNSET_SREG_KEY, priv->sreg_key)) goto nla_put_failure; @@ -235,6 +251,8 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) goto nla_put_failure; + if (nla_put_be32(skb, NFTA_DYNSET_FLAGS, htonl(flags))) + goto nla_put_failure; return 0; nla_put_failure: diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 82c264e40278..a84cf3d66056 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -59,7 +59,7 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, const struct nlattr * const tb[]) { struct nft_exthdr *priv = nft_expr_priv(expr); - u32 offset, len; + u32 offset, len, err; if (tb[NFTA_EXTHDR_DREG] == NULL || tb[NFTA_EXTHDR_TYPE] == NULL || @@ -67,11 +67,13 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, tb[NFTA_EXTHDR_LEN] == NULL) return -EINVAL; - offset = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OFFSET])); - len = ntohl(nla_get_be32(tb[NFTA_EXTHDR_LEN])); + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; - if (offset > U8_MAX || len > U8_MAX) - return -ERANGE; + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); priv->offset = offset; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 564fa7929ed5..09473b415b95 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -1,395 +1,145 @@ /* - * Copyright (c) 2008-2014 Patrick McHardy + * Copyright (c) 2016 Laura Garcia * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * Development of this code funded by Astaro AG (http://www.astaro.com/) */ #include #include #include -#include -#include -#include #include -#include -#include #include #include #include - -/* We target a hash table size of 4, element hint is 75% of final size */ -#define NFT_HASH_ELEMENT_HINT 3 +#include +#include struct nft_hash { - struct rhashtable ht; - struct delayed_work gc_work; + enum nft_registers sreg:8; + enum nft_registers dreg:8; + u8 len; + u32 modulus; + u32 seed; + u32 offset; }; -struct nft_hash_elem { - struct rhash_head node; - struct nft_set_ext ext; +static void nft_hash_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_hash *priv = nft_expr_priv(expr); + const void *data = ®s->data[priv->sreg]; + u32 h; + + h = reciprocal_scale(jhash(data, priv->len, priv->seed), priv->modulus); + regs->data[priv->dreg] = h + priv->offset; +} + +static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { + [NFTA_HASH_SREG] = { .type = NLA_U32 }, + [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_LEN] = { .type = NLA_U32 }, + [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, + [NFTA_HASH_SEED] = { .type = NLA_U32 }, }; -struct nft_hash_cmp_arg { - const struct nft_set *set; - const u32 *key; - u8 genmask; -}; - -static const struct rhashtable_params nft_hash_params; - -static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) -{ - const struct nft_hash_cmp_arg *arg = data; - - return jhash(arg->key, len, seed); -} - -static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) -{ - const struct nft_hash_elem *he = data; - - return jhash(nft_set_ext_key(&he->ext), len, seed); -} - -static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, - const void *ptr) -{ - const struct nft_hash_cmp_arg *x = arg->key; - const struct nft_hash_elem *he = ptr; - - if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) - return 1; - if (nft_set_elem_expired(&he->ext)) - return 1; - if (!nft_set_elem_active(&he->ext, x->genmask)) - return 1; - return 0; -} - -static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) -{ - struct nft_hash *priv = nft_set_priv(set); - const struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_cur(net), - .set = set, - .key = key, - }; - - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) - *ext = &he->ext; - - return !!he; -} - -static bool nft_hash_update(struct nft_set *set, const u32 *key, - void *(*new)(struct nft_set *, - const struct nft_expr *, - struct nft_regs *regs), - const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_set_ext **ext) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = NFT_GENMASK_ANY, - .set = set, - .key = key, - }; - - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) - goto out; - - he = new(set, expr, regs); - if (he == NULL) - goto err1; - if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params)) - goto err2; -out: - *ext = &he->ext; - return true; - -err2: - nft_set_elem_destroy(set, he); -err1: - return false; -} - -static int nft_hash_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(net), - .set = set, - .key = elem->key.val.data, - }; - - return rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, - nft_hash_params); -} - -static void nft_hash_activate(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash_elem *he = elem->priv; - - nft_set_elem_change_active(net, set, &he->ext); - nft_set_elem_clear_busy(&he->ext); -} - -static void *nft_hash_deactivate(const struct net *net, - const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct nft_hash_cmp_arg arg = { - .genmask = nft_genmask_next(net), - .set = set, - .key = elem->key.val.data, - }; - - rcu_read_lock(); - he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); - if (he != NULL) { - if (!nft_set_elem_mark_busy(&he->ext) || - !nft_is_active(net, &he->ext)) - nft_set_elem_change_active(net, set, &he->ext); - else - he = NULL; - } - rcu_read_unlock(); - - return he; -} - -static void nft_hash_remove(const struct nft_set *set, - const struct nft_set_elem *elem) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he = elem->priv; - - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); -} - -static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, - struct nft_set_iter *iter) -{ - struct nft_hash *priv = nft_set_priv(set); - struct nft_hash_elem *he; - struct rhashtable_iter hti; - struct nft_set_elem elem; - int err; - - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); - iter->err = err; - if (err) - return; - - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) { - iter->err = err; - goto out; - } - - while ((he = rhashtable_walk_next(&hti))) { - if (IS_ERR(he)) { - err = PTR_ERR(he); - if (err != -EAGAIN) { - iter->err = err; - goto out; - } - - continue; - } - - if (iter->count < iter->skip) - goto cont; - if (nft_set_elem_expired(&he->ext)) - goto cont; - if (!nft_set_elem_active(&he->ext, iter->genmask)) - goto cont; - - elem.priv = he; - - iter->err = iter->fn(ctx, set, iter, &elem); - if (iter->err < 0) - goto out; - -cont: - iter->count++; - } - -out: - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); -} - -static void nft_hash_gc(struct work_struct *work) -{ - struct nft_set *set; - struct nft_hash_elem *he; - struct nft_hash *priv; - struct nft_set_gc_batch *gcb = NULL; - struct rhashtable_iter hti; - int err; - - priv = container_of(work, struct nft_hash, gc_work.work); - set = nft_set_container_of(priv); - - err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); - if (err) - goto schedule; - - err = rhashtable_walk_start(&hti); - if (err && err != -EAGAIN) - goto out; - - while ((he = rhashtable_walk_next(&hti))) { - if (IS_ERR(he)) { - if (PTR_ERR(he) != -EAGAIN) - goto out; - continue; - } - - if (!nft_set_elem_expired(&he->ext)) - continue; - if (nft_set_elem_mark_busy(&he->ext)) - continue; - - gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); - if (gcb == NULL) - goto out; - rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); - atomic_dec(&set->nelems); - nft_set_gc_batch_add(gcb, he); - } -out: - rhashtable_walk_stop(&hti); - rhashtable_walk_exit(&hti); - - nft_set_gc_batch_complete(gcb); -schedule: - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); -} - -static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) -{ - return sizeof(struct nft_hash); -} - -static const struct rhashtable_params nft_hash_params = { - .head_offset = offsetof(struct nft_hash_elem, node), - .hashfn = nft_hash_key, - .obj_hashfn = nft_hash_obj, - .obj_cmpfn = nft_hash_cmp, - .automatic_shrinking = true, -}; - -static int nft_hash_init(const struct nft_set *set, - const struct nft_set_desc *desc, +static int nft_hash_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, const struct nlattr * const tb[]) { - struct nft_hash *priv = nft_set_priv(set); - struct rhashtable_params params = nft_hash_params; - int err; + struct nft_hash *priv = nft_expr_priv(expr); + u32 len; - params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; - params.key_len = set->klen; + if (!tb[NFTA_HASH_SREG] || + !tb[NFTA_HASH_DREG] || + !tb[NFTA_HASH_LEN] || + !tb[NFTA_HASH_SEED] || + !tb[NFTA_HASH_MODULUS]) + return -EINVAL; - err = rhashtable_init(&priv->ht, ¶ms); - if (err < 0) - return err; + if (tb[NFTA_HASH_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_HASH_OFFSET])); - INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); - if (set->flags & NFT_SET_TIMEOUT) - queue_delayed_work(system_power_efficient_wq, &priv->gc_work, - nft_set_gc_interval(set)); + priv->sreg = nft_parse_register(tb[NFTA_HASH_SREG]); + priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]); + + len = ntohl(nla_get_be32(tb[NFTA_HASH_LEN])); + if (len == 0 || len > U8_MAX) + return -ERANGE; + + priv->len = len; + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS])); + if (priv->modulus <= 1) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + priv->seed = ntohl(nla_get_be32(tb[NFTA_HASH_SEED])); + + return nft_validate_register_load(priv->sreg, len) && + nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_hash_dump(struct sk_buff *skb, + const struct nft_expr *expr) +{ + const struct nft_hash *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_HASH_SREG, priv->sreg)) + goto nla_put_failure; + if (nft_dump_register(skb, NFTA_HASH_DREG, priv->dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_LEN, htonl(priv->len))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_MODULUS, htonl(priv->modulus))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_HASH_SEED, htonl(priv->seed))) + goto nla_put_failure; + if (priv->offset != 0) + if (nla_put_be32(skb, NFTA_HASH_OFFSET, htonl(priv->offset))) + goto nla_put_failure; return 0; + +nla_put_failure: + return -1; } -static void nft_hash_elem_destroy(void *ptr, void *arg) -{ - nft_set_elem_destroy((const struct nft_set *)arg, ptr); -} - -static void nft_hash_destroy(const struct nft_set *set) -{ - struct nft_hash *priv = nft_set_priv(set); - - cancel_delayed_work_sync(&priv->gc_work); - rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, - (void *)set); -} - -static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, - struct nft_set_estimate *est) -{ - unsigned int esize; - - esize = sizeof(struct nft_hash_elem); - if (desc->size) { - est->size = sizeof(struct nft_hash) + - roundup_pow_of_two(desc->size * 4 / 3) * - sizeof(struct nft_hash_elem *) + - desc->size * esize; - } else { - /* Resizing happens when the load drops below 30% or goes - * above 75%. The average of 52.5% load (approximated by 50%) - * is used for the size estimation of the hash buckets, - * meaning we calculate two buckets per element. - */ - est->size = esize + 2 * sizeof(struct nft_hash_elem *); - } - - est->class = NFT_SET_CLASS_O_1; - - return true; -} - -static struct nft_set_ops nft_hash_ops __read_mostly = { - .privsize = nft_hash_privsize, - .elemsize = offsetof(struct nft_hash_elem, ext), - .estimate = nft_hash_estimate, +static struct nft_expr_type nft_hash_type; +static const struct nft_expr_ops nft_hash_ops = { + .type = &nft_hash_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_hash)), + .eval = nft_hash_eval, .init = nft_hash_init, - .destroy = nft_hash_destroy, - .insert = nft_hash_insert, - .activate = nft_hash_activate, - .deactivate = nft_hash_deactivate, - .remove = nft_hash_remove, - .lookup = nft_hash_lookup, - .update = nft_hash_update, - .walk = nft_hash_walk, - .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .dump = nft_hash_dump, +}; + +static struct nft_expr_type nft_hash_type __read_mostly = { + .name = "hash", + .ops = &nft_hash_ops, + .policy = nft_hash_policy, + .maxattr = NFTA_HASH_MAX, .owner = THIS_MODULE, }; static int __init nft_hash_module_init(void) { - return nft_register_set(&nft_hash_ops); + return nft_register_expr(&nft_hash_type); } static void __exit nft_hash_module_exit(void) { - nft_unregister_set(&nft_hash_ops); + nft_unregister_expr(&nft_hash_type); } module_init(nft_hash_module_init); module_exit(nft_hash_module_exit); MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_ALIAS_NFT_SET(); +MODULE_AUTHOR("Laura Garcia "); +MODULE_ALIAS_NFT_EXPR("hash"); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index db3b746858e3..d17018ff54e6 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -53,6 +53,10 @@ static int nft_immediate_init(const struct nft_ctx *ctx, tb[NFTA_IMMEDIATE_DATA]); if (err < 0) return err; + + if (desc.len > U8_MAX) + return -ERANGE; + priv->dlen = desc.len; priv->dreg = nft_parse_register(tb[NFTA_IMMEDIATE_DREG]); diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index 24a73bb26e94..1b01404bb33f 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -58,8 +58,11 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_LEVEL] != NULL && tb[NFTA_LOG_GROUP] != NULL) return -EINVAL; - if (tb[NFTA_LOG_GROUP] != NULL) + if (tb[NFTA_LOG_GROUP] != NULL) { li->type = NF_LOG_TYPE_ULOG; + if (tb[NFTA_LOG_FLAGS] != NULL) + return -EINVAL; + } nla = tb[NFTA_LOG_PREFIX]; if (nla != NULL) { @@ -87,6 +90,10 @@ static int nft_log_init(const struct nft_ctx *ctx, if (tb[NFTA_LOG_FLAGS] != NULL) { li->u.log.logflags = ntohl(nla_get_be32(tb[NFTA_LOG_FLAGS])); + if (li->u.log.logflags & ~NF_LOG_MASK) { + err = -EINVAL; + goto err1; + } } break; case NF_LOG_TYPE_ULOG: diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index e164325d1bc0..8166b6994cc7 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -43,7 +43,7 @@ static void nft_lookup_eval(const struct nft_expr *expr, return; } - if (found && set->flags & NFT_SET_MAP) + if (set->flags & NFT_SET_MAP) nft_data_copy(®s->data[priv->dreg], nft_set_ext_data(ext), set->dlen); diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 8a6bc7630912..6c1e0246706e 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -52,6 +52,8 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = pkt->pf; break; case NFT_META_L4PROTO: + if (!pkt->tprot_set) + goto err; *dest = pkt->tprot; break; case NFT_META_PRIORITY: diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c new file mode 100644 index 000000000000..55bc5ab78d4a --- /dev/null +++ b/net/netfilter/nft_numgen.c @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2016 Laura Garcia + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state); + +struct nft_ng_inc { + enum nft_registers dreg:8; + u32 modulus; + atomic_t counter; + u32 offset; +}; + +static void nft_ng_inc_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + u32 nval, oval; + + do { + oval = atomic_read(&priv->counter); + nval = (oval + 1 < priv->modulus) ? oval + 1 : 0; + } while (atomic_cmpxchg(&priv->counter, oval, nval) != oval); + + regs->data[priv->dreg] = nval + priv->offset; +} + +static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { + [NFTA_NG_DREG] = { .type = NLA_U32 }, + [NFTA_NG_MODULUS] = { .type = NLA_U32 }, + [NFTA_NG_TYPE] = { .type = NLA_U32 }, + [NFTA_NG_OFFSET] = { .type = NLA_U32 }, +}; + +static int nft_ng_inc_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_inc *priv = nft_expr_priv(expr); + + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + atomic_set(&priv->counter, 0); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, + u32 modulus, enum nft_ng_types type, u32 offset) +{ + if (nft_dump_register(skb, NFTA_NG_DREG, dreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_MODULUS, htonl(modulus))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_TYPE, htonl(type))) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_NG_OFFSET, htonl(offset))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + +static int nft_ng_inc_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_inc *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_INCREMENTAL, + priv->offset); +} + +struct nft_ng_random { + enum nft_registers dreg:8; + u32 modulus; + u32 offset; +}; + +static void nft_ng_random_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state); + u32 val; + + val = reciprocal_scale(prandom_u32_state(state), priv->modulus); + regs->data[priv->dreg] = val + priv->offset; +} + +static int nft_ng_random_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_ng_random *priv = nft_expr_priv(expr); + + if (tb[NFTA_NG_OFFSET]) + priv->offset = ntohl(nla_get_be32(tb[NFTA_NG_OFFSET])); + + priv->modulus = ntohl(nla_get_be32(tb[NFTA_NG_MODULUS])); + if (priv->modulus == 0) + return -ERANGE; + + if (priv->offset + priv->modulus - 1 < priv->offset) + return -EOVERFLOW; + + prandom_init_once(&nft_numgen_prandom_state); + + priv->dreg = nft_parse_register(tb[NFTA_NG_DREG]); + + return nft_validate_register_store(ctx, priv->dreg, NULL, + NFT_DATA_VALUE, sizeof(u32)); +} + +static int nft_ng_random_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_ng_random *priv = nft_expr_priv(expr); + + return nft_ng_dump(skb, priv->dreg, priv->modulus, NFT_NG_RANDOM, + priv->offset); +} + +static struct nft_expr_type nft_ng_type; +static const struct nft_expr_ops nft_ng_inc_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_inc)), + .eval = nft_ng_inc_eval, + .init = nft_ng_inc_init, + .dump = nft_ng_inc_dump, +}; + +static const struct nft_expr_ops nft_ng_random_ops = { + .type = &nft_ng_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_ng_random)), + .eval = nft_ng_random_eval, + .init = nft_ng_random_init, + .dump = nft_ng_random_dump, +}; + +static const struct nft_expr_ops * +nft_ng_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) +{ + u32 type; + + if (!tb[NFTA_NG_DREG] || + !tb[NFTA_NG_MODULUS] || + !tb[NFTA_NG_TYPE]) + return ERR_PTR(-EINVAL); + + type = ntohl(nla_get_be32(tb[NFTA_NG_TYPE])); + + switch (type) { + case NFT_NG_INCREMENTAL: + return &nft_ng_inc_ops; + case NFT_NG_RANDOM: + return &nft_ng_random_ops; + } + + return ERR_PTR(-EINVAL); +} + +static struct nft_expr_type nft_ng_type __read_mostly = { + .name = "numgen", + .select_ops = &nft_ng_select_ops, + .policy = nft_ng_policy, + .maxattr = NFTA_NG_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_ng_module_init(void) +{ + return nft_register_expr(&nft_ng_type); +} + +static void __exit nft_ng_module_exit(void) +{ + nft_unregister_expr(&nft_ng_type); +} + +module_init(nft_ng_module_init); +module_exit(nft_ng_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Laura Garcia "); +MODULE_ALIAS_NFT_EXPR("numgen"); diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 12cd4bf16d17..b2f88617611a 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -92,6 +92,8 @@ static void nft_payload_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: @@ -184,6 +186,8 @@ static void nft_payload_set_eval(const struct nft_expr *expr, offset = skb_network_offset(skb); break; case NFT_PAYLOAD_TRANSPORT_HEADER: + if (!pkt->tprot_set) + goto err; offset = pkt->xt.thoff; break; default: diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 61d216eb7917..393d359a1889 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -22,9 +22,10 @@ static u32 jhash_initval __read_mostly; struct nft_queue { - u16 queuenum; - u16 queues_total; - u16 flags; + enum nft_registers sreg_qnum:8; + u16 queuenum; + u16 queues_total; + u16 flags; }; static void nft_queue_eval(const struct nft_expr *expr, @@ -54,27 +55,51 @@ static void nft_queue_eval(const struct nft_expr *expr, regs->verdict.code = ret; } +static void nft_queue_sreg_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_queue *priv = nft_expr_priv(expr); + u32 queue, ret; + + queue = regs->data[priv->sreg_qnum]; + + ret = NF_QUEUE_NR(queue); + if (priv->flags & NFT_QUEUE_FLAG_BYPASS) + ret |= NF_VERDICT_FLAG_QUEUE_BYPASS; + + regs->verdict.code = ret; +} + static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; static int nft_queue_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_queue *priv = nft_expr_priv(expr); + u32 maxid; - if (tb[NFTA_QUEUE_NUM] == NULL) - return -EINVAL; - - init_hashrandom(&jhash_initval); priv->queuenum = ntohs(nla_get_be16(tb[NFTA_QUEUE_NUM])); - if (tb[NFTA_QUEUE_TOTAL] != NULL) + if (tb[NFTA_QUEUE_TOTAL]) priv->queues_total = ntohs(nla_get_be16(tb[NFTA_QUEUE_TOTAL])); - if (tb[NFTA_QUEUE_FLAGS] != NULL) { + else + priv->queues_total = 1; + + if (priv->queues_total == 0) + return -EINVAL; + + maxid = priv->queues_total - 1 + priv->queuenum; + if (maxid > U16_MAX) + return -ERANGE; + + if (tb[NFTA_QUEUE_FLAGS]) { priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); if (priv->flags & ~NFT_QUEUE_FLAG_MASK) return -EINVAL; @@ -82,6 +107,29 @@ static int nft_queue_init(const struct nft_ctx *ctx, return 0; } +static int nft_queue_sreg_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_queue *priv = nft_expr_priv(expr); + int err; + + priv->sreg_qnum = nft_parse_register(tb[NFTA_QUEUE_SREG_QNUM]); + err = nft_validate_register_load(priv->sreg_qnum, sizeof(u32)); + if (err < 0) + return err; + + if (tb[NFTA_QUEUE_FLAGS]) { + priv->flags = ntohs(nla_get_be16(tb[NFTA_QUEUE_FLAGS])); + if (priv->flags & ~NFT_QUEUE_FLAG_MASK) + return -EINVAL; + if (priv->flags & NFT_QUEUE_FLAG_CPU_FANOUT) + return -EOPNOTSUPP; + } + + return 0; +} + static int nft_queue_dump(struct sk_buff *skb, const struct nft_expr *expr) { const struct nft_queue *priv = nft_expr_priv(expr); @@ -97,6 +145,21 @@ nla_put_failure: return -1; } +static int +nft_queue_sreg_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_queue *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_QUEUE_SREG_QNUM, priv->sreg_qnum) || + nla_put_be16(skb, NFTA_QUEUE_FLAGS, htons(priv->flags))) + goto nla_put_failure; + + return 0; + +nla_put_failure: + return -1; +} + static struct nft_expr_type nft_queue_type; static const struct nft_expr_ops nft_queue_ops = { .type = &nft_queue_type, @@ -106,9 +169,35 @@ static const struct nft_expr_ops nft_queue_ops = { .dump = nft_queue_dump, }; +static const struct nft_expr_ops nft_queue_sreg_ops = { + .type = &nft_queue_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_queue)), + .eval = nft_queue_sreg_eval, + .init = nft_queue_sreg_init, + .dump = nft_queue_sreg_dump, +}; + +static const struct nft_expr_ops * +nft_queue_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (tb[NFTA_QUEUE_NUM] && tb[NFTA_QUEUE_SREG_QNUM]) + return ERR_PTR(-EINVAL); + + init_hashrandom(&jhash_initval); + + if (tb[NFTA_QUEUE_NUM]) + return &nft_queue_ops; + + if (tb[NFTA_QUEUE_SREG_QNUM]) + return &nft_queue_sreg_ops; + + return ERR_PTR(-EINVAL); +} + static struct nft_expr_type nft_queue_type __read_mostly = { .name = "queue", - .ops = &nft_queue_ops, + .select_ops = &nft_queue_select_ops, .policy = nft_queue_policy, .maxattr = NFTA_QUEUE_MAX, .owner = THIS_MODULE, diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c new file mode 100644 index 000000000000..c00104c07095 --- /dev/null +++ b/net/netfilter/nft_quota.c @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_quota { + u64 quota; + bool invert; + atomic64_t remain; +}; + +static inline bool nft_overquota(struct nft_quota *priv, + const struct nft_pktinfo *pkt) +{ + return atomic64_sub_return(pkt->skb->len, &priv->remain) < 0; +} + +static void nft_quota_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_quota *priv = nft_expr_priv(expr); + + if (nft_overquota(priv, pkt) ^ priv->invert) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { + [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, + [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, +}; + +static int nft_quota_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = 0; + u64 quota; + + if (!tb[NFTA_QUOTA_BYTES]) + return -EINVAL; + + quota = be64_to_cpu(nla_get_be64(tb[NFTA_QUOTA_BYTES])); + if (quota > S64_MAX) + return -EOVERFLOW; + + if (tb[NFTA_QUOTA_FLAGS]) { + flags = ntohl(nla_get_be32(tb[NFTA_QUOTA_FLAGS])); + if (flags & ~NFT_QUOTA_F_INV) + return -EINVAL; + } + + priv->quota = quota; + priv->invert = (flags & NFT_QUOTA_F_INV) ? true : false; + atomic64_set(&priv->remain, quota); + + return 0; +} + +static int nft_quota_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_quota *priv = nft_expr_priv(expr); + u32 flags = priv->invert ? NFT_QUOTA_F_INV : 0; + + if (nla_put_be64(skb, NFTA_QUOTA_BYTES, cpu_to_be64(priv->quota), + NFTA_QUOTA_PAD) || + nla_put_be32(skb, NFTA_QUOTA_FLAGS, htonl(flags))) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_quota_type; +static const struct nft_expr_ops nft_quota_ops = { + .type = &nft_quota_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_quota)), + .eval = nft_quota_eval, + .init = nft_quota_init, + .dump = nft_quota_dump, +}; + +static struct nft_expr_type nft_quota_type __read_mostly = { + .name = "quota", + .ops = &nft_quota_ops, + .policy = nft_quota_policy, + .maxattr = NFTA_QUOTA_MAX, + .flags = NFT_EXPR_STATEFUL, + .owner = THIS_MODULE, +}; + +static int __init nft_quota_module_init(void) +{ + return nft_register_expr(&nft_quota_type); +} + +static void __exit nft_quota_module_exit(void) +{ + nft_unregister_expr(&nft_quota_type); +} + +module_init(nft_quota_module_init); +module_exit(nft_quota_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso "); +MODULE_ALIAS_NFT_EXPR("quota"); diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c new file mode 100644 index 000000000000..c6d5358482d1 --- /dev/null +++ b/net/netfilter/nft_range.c @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2016 Pablo Neira Ayuso + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +struct nft_range_expr { + struct nft_data data_from; + struct nft_data data_to; + enum nft_registers sreg:8; + u8 len; + enum nft_range_ops op:8; +}; + +static void nft_range_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + bool mismatch; + int d1, d2; + + d1 = memcmp(®s->data[priv->sreg], &priv->data_from, priv->len); + d2 = memcmp(®s->data[priv->sreg], &priv->data_to, priv->len); + switch (priv->op) { + case NFT_RANGE_EQ: + mismatch = (d1 < 0 || d2 > 0); + break; + case NFT_RANGE_NEQ: + mismatch = (d1 >= 0 && d2 <= 0); + break; + } + + if (mismatch) + regs->verdict.code = NFT_BREAK; +} + +static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { + [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_OP] = { .type = NLA_U32 }, + [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, + [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, +}; + +static int nft_range_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_range_expr *priv = nft_expr_priv(expr); + struct nft_data_desc desc_from, desc_to; + int err; + + err = nft_data_init(NULL, &priv->data_from, sizeof(priv->data_from), + &desc_from, tb[NFTA_RANGE_FROM_DATA]); + if (err < 0) + return err; + + err = nft_data_init(NULL, &priv->data_to, sizeof(priv->data_to), + &desc_to, tb[NFTA_RANGE_TO_DATA]); + if (err < 0) + goto err1; + + if (desc_from.len != desc_to.len) { + err = -EINVAL; + goto err2; + } + + priv->sreg = nft_parse_register(tb[NFTA_RANGE_SREG]); + err = nft_validate_register_load(priv->sreg, desc_from.len); + if (err < 0) + goto err2; + + priv->op = ntohl(nla_get_be32(tb[NFTA_RANGE_OP])); + priv->len = desc_from.len; + return 0; +err2: + nft_data_uninit(&priv->data_to, desc_to.type); +err1: + nft_data_uninit(&priv->data_from, desc_from.type); + return err; +} + +static int nft_range_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_range_expr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_RANGE_SREG, priv->sreg)) + goto nla_put_failure; + if (nla_put_be32(skb, NFTA_RANGE_OP, htonl(priv->op))) + goto nla_put_failure; + + if (nft_data_dump(skb, NFTA_RANGE_FROM_DATA, &priv->data_from, + NFT_DATA_VALUE, priv->len) < 0 || + nft_data_dump(skb, NFTA_RANGE_TO_DATA, &priv->data_to, + NFT_DATA_VALUE, priv->len) < 0) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -1; +} + +static struct nft_expr_type nft_range_type; +static const struct nft_expr_ops nft_range_ops = { + .type = &nft_range_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_range_expr)), + .eval = nft_range_eval, + .init = nft_range_init, + .dump = nft_range_dump, +}; + +static struct nft_expr_type nft_range_type __read_mostly = { + .name = "range", + .ops = &nft_range_ops, + .policy = nft_range_policy, + .maxattr = NFTA_RANGE_MAX, + .owner = THIS_MODULE, +}; + +int __init nft_range_module_init(void) +{ + return nft_register_expr(&nft_range_type); +} + +void nft_range_module_exit(void) +{ + nft_unregister_expr(&nft_range_type); +} diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c new file mode 100644 index 000000000000..3794cb2fc788 --- /dev/null +++ b/net/netfilter/nft_set_hash.c @@ -0,0 +1,404 @@ +/* + * Copyright (c) 2008-2014 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* We target a hash table size of 4, element hint is 75% of final size */ +#define NFT_HASH_ELEMENT_HINT 3 + +struct nft_hash { + struct rhashtable ht; + struct delayed_work gc_work; +}; + +struct nft_hash_elem { + struct rhash_head node; + struct nft_set_ext ext; +}; + +struct nft_hash_cmp_arg { + const struct nft_set *set; + const u32 *key; + u8 genmask; +}; + +static const struct rhashtable_params nft_hash_params; + +static inline u32 nft_hash_key(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_cmp_arg *arg = data; + + return jhash(arg->key, len, seed); +} + +static inline u32 nft_hash_obj(const void *data, u32 len, u32 seed) +{ + const struct nft_hash_elem *he = data; + + return jhash(nft_set_ext_key(&he->ext), len, seed); +} + +static inline int nft_hash_cmp(struct rhashtable_compare_arg *arg, + const void *ptr) +{ + const struct nft_hash_cmp_arg *x = arg->key; + const struct nft_hash_elem *he = ptr; + + if (memcmp(nft_set_ext_key(&he->ext), x->key, x->set->klen)) + return 1; + if (nft_set_elem_expired(&he->ext)) + return 1; + if (!nft_set_elem_active(&he->ext, x->genmask)) + return 1; + return 0; +} + +static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + const struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_cur(net), + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + *ext = &he->ext; + + return !!he; +} + +static bool nft_hash_update(struct nft_set *set, const u32 *key, + void *(*new)(struct nft_set *, + const struct nft_expr *, + struct nft_regs *regs), + const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = NFT_GENMASK_ANY, + .set = set, + .key = key, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) + goto out; + + he = new(set, expr, regs); + if (he == NULL) + goto err1; + if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params)) + goto err2; +out: + *ext = &he->ext; + return true; + +err2: + nft_set_elem_destroy(set, he); +err1: + return false; +} + +static int nft_hash_insert(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, + struct nft_set_ext **ext) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, + }; + struct nft_hash_elem *prev; + + prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node, + nft_hash_params); + if (IS_ERR(prev)) + return PTR_ERR(prev); + if (prev) { + *ext = &prev->ext; + return -EEXIST; + } + return 0; +} + +static void nft_hash_activate(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash_elem *he = elem->priv; + + nft_set_elem_change_active(net, set, &he->ext); + nft_set_elem_clear_busy(&he->ext); +} + +static void *nft_hash_deactivate(const struct net *net, + const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct nft_hash_cmp_arg arg = { + .genmask = nft_genmask_next(net), + .set = set, + .key = elem->key.val.data, + }; + + rcu_read_lock(); + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_hash_params); + if (he != NULL) { + if (!nft_set_elem_mark_busy(&he->ext) || + !nft_is_active(net, &he->ext)) + nft_set_elem_change_active(net, set, &he->ext); + else + he = NULL; + } + rcu_read_unlock(); + + return he; +} + +static void nft_hash_remove(const struct nft_set *set, + const struct nft_set_elem *elem) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he = elem->priv; + + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); +} + +static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, + struct nft_set_iter *iter) +{ + struct nft_hash *priv = nft_set_priv(set); + struct nft_hash_elem *he; + struct rhashtable_iter hti; + struct nft_set_elem elem; + int err; + + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + iter->err = err; + if (err) + return; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) { + iter->err = err; + goto out; + } + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + err = PTR_ERR(he); + if (err != -EAGAIN) { + iter->err = err; + goto out; + } + + continue; + } + + if (iter->count < iter->skip) + goto cont; + if (nft_set_elem_expired(&he->ext)) + goto cont; + if (!nft_set_elem_active(&he->ext, iter->genmask)) + goto cont; + + elem.priv = he; + + iter->err = iter->fn(ctx, set, iter, &elem); + if (iter->err < 0) + goto out; + +cont: + iter->count++; + } + +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); +} + +static void nft_hash_gc(struct work_struct *work) +{ + struct nft_set *set; + struct nft_hash_elem *he; + struct nft_hash *priv; + struct nft_set_gc_batch *gcb = NULL; + struct rhashtable_iter hti; + int err; + + priv = container_of(work, struct nft_hash, gc_work.work); + set = nft_set_container_of(priv); + + err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); + if (err) + goto schedule; + + err = rhashtable_walk_start(&hti); + if (err && err != -EAGAIN) + goto out; + + while ((he = rhashtable_walk_next(&hti))) { + if (IS_ERR(he)) { + if (PTR_ERR(he) != -EAGAIN) + goto out; + continue; + } + + if (!nft_set_elem_expired(&he->ext)) + continue; + if (nft_set_elem_mark_busy(&he->ext)) + continue; + + gcb = nft_set_gc_batch_check(set, gcb, GFP_ATOMIC); + if (gcb == NULL) + goto out; + rhashtable_remove_fast(&priv->ht, &he->node, nft_hash_params); + atomic_dec(&set->nelems); + nft_set_gc_batch_add(gcb, he); + } +out: + rhashtable_walk_stop(&hti); + rhashtable_walk_exit(&hti); + + nft_set_gc_batch_complete(gcb); +schedule: + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); +} + +static unsigned int nft_hash_privsize(const struct nlattr * const nla[]) +{ + return sizeof(struct nft_hash); +} + +static const struct rhashtable_params nft_hash_params = { + .head_offset = offsetof(struct nft_hash_elem, node), + .hashfn = nft_hash_key, + .obj_hashfn = nft_hash_obj, + .obj_cmpfn = nft_hash_cmp, + .automatic_shrinking = true, +}; + +static int nft_hash_init(const struct nft_set *set, + const struct nft_set_desc *desc, + const struct nlattr * const tb[]) +{ + struct nft_hash *priv = nft_set_priv(set); + struct rhashtable_params params = nft_hash_params; + int err; + + params.nelem_hint = desc->size ?: NFT_HASH_ELEMENT_HINT; + params.key_len = set->klen; + + err = rhashtable_init(&priv->ht, ¶ms); + if (err < 0) + return err; + + INIT_DEFERRABLE_WORK(&priv->gc_work, nft_hash_gc); + if (set->flags & NFT_SET_TIMEOUT) + queue_delayed_work(system_power_efficient_wq, &priv->gc_work, + nft_set_gc_interval(set)); + return 0; +} + +static void nft_hash_elem_destroy(void *ptr, void *arg) +{ + nft_set_elem_destroy((const struct nft_set *)arg, ptr); +} + +static void nft_hash_destroy(const struct nft_set *set) +{ + struct nft_hash *priv = nft_set_priv(set); + + cancel_delayed_work_sync(&priv->gc_work); + rhashtable_free_and_destroy(&priv->ht, nft_hash_elem_destroy, + (void *)set); +} + +static bool nft_hash_estimate(const struct nft_set_desc *desc, u32 features, + struct nft_set_estimate *est) +{ + unsigned int esize; + + esize = sizeof(struct nft_hash_elem); + if (desc->size) { + est->size = sizeof(struct nft_hash) + + roundup_pow_of_two(desc->size * 4 / 3) * + sizeof(struct nft_hash_elem *) + + desc->size * esize; + } else { + /* Resizing happens when the load drops below 30% or goes + * above 75%. The average of 52.5% load (approximated by 50%) + * is used for the size estimation of the hash buckets, + * meaning we calculate two buckets per element. + */ + est->size = esize + 2 * sizeof(struct nft_hash_elem *); + } + + est->class = NFT_SET_CLASS_O_1; + + return true; +} + +static struct nft_set_ops nft_hash_ops __read_mostly = { + .privsize = nft_hash_privsize, + .elemsize = offsetof(struct nft_hash_elem, ext), + .estimate = nft_hash_estimate, + .init = nft_hash_init, + .destroy = nft_hash_destroy, + .insert = nft_hash_insert, + .activate = nft_hash_activate, + .deactivate = nft_hash_deactivate, + .remove = nft_hash_remove, + .lookup = nft_hash_lookup, + .update = nft_hash_update, + .walk = nft_hash_walk, + .features = NFT_SET_MAP | NFT_SET_TIMEOUT, + .owner = THIS_MODULE, +}; + +static int __init nft_hash_module_init(void) +{ + return nft_register_set(&nft_hash_ops); +} + +static void __exit nft_hash_module_exit(void) +{ + nft_unregister_set(&nft_hash_ops); +} + +module_init(nft_hash_module_init); +module_exit(nft_hash_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS_NFT_SET(); diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_set_rbtree.c similarity index 97% rename from net/netfilter/nft_rbtree.c rename to net/netfilter/nft_set_rbtree.c index ffe9ae062d23..38b5bda242f8 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -96,7 +96,8 @@ out: } static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, - struct nft_rbtree_elem *new) + struct nft_rbtree_elem *new, + struct nft_set_ext **ext) { struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); @@ -124,8 +125,10 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, else if (!nft_rbtree_interval_end(rbe) && nft_rbtree_interval_end(new)) p = &parent->rb_right; - else + else { + *ext = &rbe->ext; return -EEXIST; + } } } } @@ -135,13 +138,14 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, - const struct nft_set_elem *elem) + const struct nft_set_elem *elem, + struct nft_set_ext **ext) { struct nft_rbtree_elem *rbe = elem->priv; int err; spin_lock_bh(&nft_rbtree_lock); - err = __nft_rbtree_insert(net, set, rbe); + err = __nft_rbtree_insert(net, set, rbe, ext); spin_unlock_bh(&nft_rbtree_lock); return err; diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 515131f9e021..dbd6c4a12b97 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -24,7 +24,6 @@ static DEFINE_MUTEX(xt_rateest_mutex); #define RATEEST_HSIZE 16 static struct hlist_head rateest_hash[RATEEST_HSIZE] __read_mostly; static unsigned int jhash_rnd __read_mostly; -static bool rnd_inited __read_mostly; static unsigned int xt_rateest_hash(const char *name) { @@ -99,10 +98,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) } cfg; int ret; - if (unlikely(!rnd_inited)) { - get_random_bytes(&jhash_rnd, sizeof(jhash_rnd)); - rnd_inited = true; - } + net_get_random_once(&jhash_rnd, sizeof(jhash_rnd)); est = xt_rateest_lookup(info->name); if (est) { diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index e118397254af..872db2d0e2a9 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -110,18 +110,14 @@ tcpmss_mangle_packet(struct sk_buff *skb, if (info->mss == XT_TCPMSS_CLAMP_PMTU) { struct net *net = par->net; unsigned int in_mtu = tcpmss_reverse_mtu(net, skb, family); + unsigned int min_mtu = min(dst_mtu(skb_dst(skb)), in_mtu); - if (dst_mtu(skb_dst(skb)) <= minlen) { + if (min_mtu <= minlen) { net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - dst_mtu(skb_dst(skb))); + min_mtu); return -1; } - if (in_mtu <= minlen) { - net_err_ratelimited("unknown or invalid path-MTU (%u)\n", - in_mtu); - return -1; - } - newmss = min(dst_mtu(skb_dst(skb)), in_mtu) - minlen; + newmss = min_mtu - minlen; } else newmss = info->mss; diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 6e57a3966dc5..0471db4032c5 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -89,6 +89,8 @@ static int tee_tg_check(const struct xt_tgchk_param *par) return -EINVAL; if (info->oif[0]) { + int ret; + if (info->oif[sizeof(info->oif)-1] != '\0') return -EINVAL; @@ -101,7 +103,11 @@ static int tee_tg_check(const struct xt_tgchk_param *par) priv->notifier.notifier_call = tee_netdev_event; info->priv = priv; - register_netdevice_notifier(&priv->notifier); + ret = register_netdevice_notifier(&priv->notifier); + if (ret) { + kfree(priv); + return ret; + } } else info->priv = NULL; diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 99bbc829868d..b6dc322593a3 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -366,14 +366,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) unsigned int i; int ret; - if (unlikely(!connlimit_rnd)) { - u_int32_t rand; + net_get_random_once(&connlimit_rnd, sizeof(connlimit_rnd)); - do { - get_random_bytes(&rand, sizeof(rand)); - } while (!rand); - cmpxchg(&connlimit_rnd, 0, rand); - } ret = nf_ct_l3proto_try_module_get(par->family); if (ret < 0) { pr_info("cannot load conntrack support for " diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 188404b9b002..a3b8f697cfc5 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -233,10 +233,8 @@ conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par, return false; if (info->match_flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = 0; + unsigned long expires = nf_ct_expires(ct) / HZ; - if (timer_pending(&ct->timeout)) - expires = (ct->timeout.expires - jiffies) / HZ; if ((expires >= info->expires_min && expires <= info->expires_max) ^ !(info->invert_flags & XT_CONNTRACK_EXPIRES)) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 178696852bde..44a095ecc7b7 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -56,6 +56,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) } /* need to declare this at the top */ +static const struct file_operations dl_file_ops_v1; static const struct file_operations dl_file_ops; /* hash table crap */ @@ -86,8 +87,8 @@ struct dsthash_ent { unsigned long expires; /* precalculated expiry time */ struct { unsigned long prev; /* last modification */ - u_int32_t credit; - u_int32_t credit_cap, cost; + u_int64_t credit; + u_int64_t credit_cap, cost; } rateinfo; struct rcu_head rcu; }; @@ -98,7 +99,7 @@ struct xt_hashlimit_htable { u_int8_t family; bool rnd_initialized; - struct hashlimit_cfg1 cfg; /* config */ + struct hashlimit_cfg2 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -114,6 +115,30 @@ struct xt_hashlimit_htable { struct hlist_head hash[0]; /* hashtable itself */ }; +static int +cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) +{ + if (revision == 1) { + struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; + + to->mode = cfg->mode; + to->avg = cfg->avg; + to->burst = cfg->burst; + to->size = cfg->size; + to->max = cfg->max; + to->gc_interval = cfg->gc_interval; + to->expire = cfg->expire; + to->srcmask = cfg->srcmask; + to->dstmask = cfg->dstmask; + } else if (revision == 2) { + memcpy(to, from, sizeof(struct hashlimit_cfg2)); + } else { + return -EINVAL; + } + + return 0; +} + static DEFINE_MUTEX(hashlimit_mutex); /* protects htables list */ static struct kmem_cache *hashlimit_cachep __read_mostly; @@ -215,16 +240,18 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, - u_int8_t family) +static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, + const char *name, u_int8_t family, + struct xt_hashlimit_htable **out_hinfo, + int revision) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; - unsigned int size; - unsigned int i; + unsigned int size, i; + int ret; - if (minfo->cfg.size) { - size = minfo->cfg.size; + if (cfg->size) { + size = cfg->size; } else { size = (totalram_pages << PAGE_SHIFT) / 16384 / sizeof(struct list_head); @@ -238,10 +265,14 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, sizeof(struct list_head) * size); if (hinfo == NULL) return -ENOMEM; - minfo->hinfo = hinfo; + *out_hinfo = hinfo; /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); + ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2); + + if (ret) + return ret; + hinfo->cfg.size = size; if (hinfo->cfg.max == 0) hinfo->cfg.max = 8 * hinfo->cfg.size; @@ -255,17 +286,18 @@ static int htable_create(struct net *net, struct xt_hashlimit_mtinfo1 *minfo, hinfo->count = 0; hinfo->family = family; hinfo->rnd_initialized = false; - hinfo->name = kstrdup(minfo->name, GFP_KERNEL); + hinfo->name = kstrdup(name, GFP_KERNEL); if (!hinfo->name) { vfree(hinfo); return -ENOMEM; } spin_lock_init(&hinfo->lock); - hinfo->pde = proc_create_data(minfo->name, 0, + hinfo->pde = proc_create_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - &dl_file_ops, hinfo); + (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops, + hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -398,7 +430,8 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) (slowest userspace tool allows), which means CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. */ -#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ_v1 (0xFFFFFFFF / (HZ*60*60*24)) +#define MAX_CPJ (0xFFFFFFFFFFFFFFFF / (HZ*60*60*24)) /* Repeated shift and or gives us all 1s, final shift and add 1 gives * us the power of 2 below the theoretical max, so GCC simply does a @@ -408,9 +441,12 @@ static void htable_put(struct xt_hashlimit_htable *hinfo) #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) +#define _POW2_BELOW64(x) (_POW2_BELOW32(x)|_POW2_BELOW32((x)>>32)) #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) +#define POW2_BELOW64(x) ((_POW2_BELOW64(x)>>1) + 1) -#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) +#define CREDITS_PER_JIFFY POW2_BELOW64(MAX_CPJ) +#define CREDITS_PER_JIFFY_v1 POW2_BELOW32(MAX_CPJ_v1) /* in byte mode, the lowest possible rate is one packet/second. * credit_cap is used as a counter that tells us how many times we can @@ -425,14 +461,24 @@ static u32 xt_hashlimit_len_to_chunks(u32 len) } /* Precision saver. */ -static u32 user2credits(u32 user) +static u64 user2credits(u64 user, int revision) { - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) - /* Divide first. */ - return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; + if (revision == 1) { + /* If multiplying would overflow... */ + if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY_v1)) + /* Divide first. */ + return (user / XT_HASHLIMIT_SCALE) *\ + HZ * CREDITS_PER_JIFFY_v1; - return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE; + return (user * HZ * CREDITS_PER_JIFFY_v1) \ + / XT_HASHLIMIT_SCALE; + } else { + if (user > 0xFFFFFFFFFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) + return (user / XT_HASHLIMIT_SCALE_v2) *\ + HZ * CREDITS_PER_JIFFY; + + return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE_v2; + } } static u32 user2credits_byte(u32 user) @@ -442,10 +488,11 @@ static u32 user2credits_byte(u32 user) return (u32) (us >> 32); } -static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) +static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, + u32 mode, int revision) { unsigned long delta = now - dh->rateinfo.prev; - u32 cap; + u64 cap, cpj; if (delta == 0) return; @@ -453,7 +500,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) dh->rateinfo.prev = now; if (mode & XT_HASHLIMIT_BYTES) { - u32 tmp = dh->rateinfo.credit; + u64 tmp = dh->rateinfo.credit; dh->rateinfo.credit += CREDITS_PER_JIFFY_BYTES * delta; cap = CREDITS_PER_JIFFY_BYTES * HZ; if (tmp >= dh->rateinfo.credit) {/* overflow */ @@ -461,7 +508,9 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) return; } } else { - dh->rateinfo.credit += delta * CREDITS_PER_JIFFY; + cpj = (revision == 1) ? + CREDITS_PER_JIFFY_v1 : CREDITS_PER_JIFFY; + dh->rateinfo.credit += delta * cpj; cap = dh->rateinfo.credit_cap; } if (dh->rateinfo.credit > cap) @@ -469,7 +518,7 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode) } static void rateinfo_init(struct dsthash_ent *dh, - struct xt_hashlimit_htable *hinfo) + struct xt_hashlimit_htable *hinfo, int revision) { dh->rateinfo.prev = jiffies; if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { @@ -478,8 +527,8 @@ static void rateinfo_init(struct dsthash_ent *dh, dh->rateinfo.credit_cap = hinfo->cfg.burst; } else { dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); + hinfo->cfg.burst, revision); + dh->rateinfo.cost = user2credits(hinfo->cfg.avg, revision); dh->rateinfo.credit_cap = dh->rateinfo.credit; } } @@ -603,15 +652,15 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) } static bool -hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, + struct xt_hashlimit_htable *hinfo, + const struct hashlimit_cfg2 *cfg, int revision) { - const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; - struct xt_hashlimit_htable *hinfo = info->hinfo; unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; bool race = false; - u32 cost; + u64 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; @@ -626,18 +675,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) } else if (race) { /* Already got an entry, update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } else { dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); + rateinfo_init(dh, hinfo, revision); } } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_recalc(dh, now, hinfo->cfg.mode); + rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } - if (info->cfg.mode & XT_HASHLIMIT_BYTES) + if (cfg->mode & XT_HASHLIMIT_BYTES) cost = hashlimit_byte_cost(skb->len, dh); else cost = dh->rateinfo.cost; @@ -647,73 +696,136 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); rcu_read_unlock_bh(); - return !(info->cfg.mode & XT_HASHLIMIT_INVERT); + return !(cfg->mode & XT_HASHLIMIT_INVERT); } spin_unlock(&dh->lock); rcu_read_unlock_bh(); /* default match is underlimit - so over the limit, we need to invert */ - return info->cfg.mode & XT_HASHLIMIT_INVERT; + return cfg->mode & XT_HASHLIMIT_INVERT; hotdrop: par->hotdrop = true; return false; } -static int hashlimit_mt_check(const struct xt_mtchk_param *par) +static bool +hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { - struct net *net = par->net; - struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + struct hashlimit_cfg2 cfg = {}; int ret; - if (info->cfg.gc_interval == 0 || info->cfg.expire == 0) - return -EINVAL; - if (info->name[sizeof(info->name)-1] != '\0') + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_common(skb, par, hinfo, &cfg, 1); +} + +static bool +hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; + + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2); +} + +static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, + struct xt_hashlimit_htable **hinfo, + struct hashlimit_cfg2 *cfg, + const char *name, int revision) +{ + struct net *net = par->net; + int ret; + + if (cfg->gc_interval == 0 || cfg->expire == 0) return -EINVAL; if (par->family == NFPROTO_IPV4) { - if (info->cfg.srcmask > 32 || info->cfg.dstmask > 32) + if (cfg->srcmask > 32 || cfg->dstmask > 32) return -EINVAL; } else { - if (info->cfg.srcmask > 128 || info->cfg.dstmask > 128) + if (cfg->srcmask > 128 || cfg->dstmask > 128) return -EINVAL; } - if (info->cfg.mode & ~XT_HASHLIMIT_ALL) { + if (cfg->mode & ~XT_HASHLIMIT_ALL) { pr_info("Unknown mode mask %X, kernel too old?\n", - info->cfg.mode); + cfg->mode); return -EINVAL; } /* Check for overflow. */ - if (info->cfg.mode & XT_HASHLIMIT_BYTES) { - if (user2credits_byte(info->cfg.avg) == 0) { - pr_info("overflow, rate too high: %u\n", info->cfg.avg); + if (cfg->mode & XT_HASHLIMIT_BYTES) { + if (user2credits_byte(cfg->avg) == 0) { + pr_info("overflow, rate too high: %llu\n", cfg->avg); return -EINVAL; } - } else if (info->cfg.burst == 0 || - user2credits(info->cfg.avg * info->cfg.burst) < - user2credits(info->cfg.avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->cfg.avg, info->cfg.burst); + } else if (cfg->burst == 0 || + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); return -ERANGE; } mutex_lock(&hashlimit_mutex); - info->hinfo = htable_find_get(net, info->name, par->family); - if (info->hinfo == NULL) { - ret = htable_create(net, info, par->family); + *hinfo = htable_find_get(net, name, par->family); + if (*hinfo == NULL) { + ret = htable_create(net, cfg, name, par->family, + hinfo, revision); if (ret < 0) { mutex_unlock(&hashlimit_mutex); return ret; } } mutex_unlock(&hashlimit_mutex); + return 0; } +static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + struct hashlimit_cfg2 cfg = {}; + int ret; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 1); + + if (ret) + return ret; + + return hashlimit_mt_check_common(par, &info->hinfo, + &cfg, info->name, 1); +} + +static int hashlimit_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, + info->name, 2); +} + +static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) +{ + const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + + htable_put(info->hinfo); +} + static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; htable_put(info->hinfo); } @@ -723,8 +835,18 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV4, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV4, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -734,8 +856,18 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 1, .family = NFPROTO_IPV6, - .match = hashlimit_mt, + .match = hashlimit_mt_v1, .matchsize = sizeof(struct xt_hashlimit_mtinfo1), + .checkentry = hashlimit_mt_check_v1, + .destroy = hashlimit_mt_destroy_v1, + .me = THIS_MODULE, + }, + { + .name = "hashlimit", + .revision = 2, + .family = NFPROTO_IPV6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo2), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -786,18 +918,12 @@ static void dl_seq_stop(struct seq_file *s, void *v) spin_unlock_bh(&htable->lock); } -static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, - struct seq_file *s) +static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) { - const struct xt_hashlimit_htable *ht = s->private; - - spin_lock(&ent->lock); - /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies, ht->cfg.mode); - switch (family) { case NFPROTO_IPV4: - seq_printf(s, "%ld %pI4:%u->%pI4:%u %u %u %u\n", + seq_printf(s, "%ld %pI4:%u->%pI4:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip.src, ntohs(ent->dst.src_port), @@ -808,7 +934,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, break; #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) case NFPROTO_IPV6: - seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n", + seq_printf(s, "%ld %pI6:%u->%pI6:%u %llu %llu %llu\n", (long)(ent->expires - jiffies)/HZ, &ent->dst.ip6.src, ntohs(ent->dst.src_port), @@ -821,10 +947,52 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, default: BUG(); } +} + +static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 1); + + dl_seq_print(ent, family, s); + spin_unlock(&ent->lock); return seq_has_overflowed(s); } +static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + + dl_seq_print(ent, family, s); + + spin_unlock(&ent->lock); + return seq_has_overflowed(s); +} + +static int dl_seq_show_v1(struct seq_file *s, void *v) +{ + struct xt_hashlimit_htable *htable = s->private; + unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *ent; + + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, &htable->hash[*bucket], node) + if (dl_seq_real_show_v1(ent, htable->family, s)) + return -1; + } + return 0; +} + static int dl_seq_show(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; @@ -839,6 +1007,13 @@ static int dl_seq_show(struct seq_file *s, void *v) return 0; } +static const struct seq_operations dl_seq_ops_v1 = { + .start = dl_seq_start, + .next = dl_seq_next, + .stop = dl_seq_stop, + .show = dl_seq_show_v1 +}; + static const struct seq_operations dl_seq_ops = { .start = dl_seq_start, .next = dl_seq_next, @@ -846,9 +1021,9 @@ static const struct seq_operations dl_seq_ops = { .show = dl_seq_show }; -static int dl_proc_open(struct inode *inode, struct file *file) +static int dl_proc_open_v1(struct inode *inode, struct file *file) { - int ret = seq_open(file, &dl_seq_ops); + int ret = seq_open(file, &dl_seq_ops_v1); if (!ret) { struct seq_file *sf = file->private_data; @@ -857,6 +1032,26 @@ static int dl_proc_open(struct inode *inode, struct file *file) return ret; } +static int dl_proc_open(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &dl_seq_ops); + + if (!ret) { + struct seq_file *sf = file->private_data; + + sf->private = PDE_DATA(inode); + } + return ret; +} + +static const struct file_operations dl_file_ops_v1 = { + .owner = THIS_MODULE, + .open = dl_proc_open_v1, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + static const struct file_operations dl_file_ops = { .owner = THIS_MODULE, .open = dl_proc_open, diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 9f4ab00c8050..f679dd4c272a 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -41,7 +41,7 @@ helper_mt(const struct sk_buff *skb, struct xt_action_param *par) if (!master_help) return ret; - /* rcu_read_lock()ed by nf_hook_slow */ + /* rcu_read_lock()ed by nf_hook_thresh */ helper = rcu_dereference(master_help->helper); if (!helper) return ret; @@ -65,7 +65,7 @@ static int helper_mt_check(const struct xt_mtchk_param *par) par->family); return ret; } - info->name[29] = '\0'; + info->name[sizeof(info->name) - 1] = '\0'; return 0; } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index e5f18988aee0..bb33598e4530 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,8 +107,8 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only" - "supported in the FORWARD and POSTROUTING chains with" + pr_info("using --physdev-out and --physdev-is-out are only " + "supported in the FORWARD and POSTROUTING chains with " "bridged traffic.\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index d725a27743a1..e3b7a09b103e 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -110,7 +110,6 @@ static const struct file_operations recent_old_fops, recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; -static bool hash_rnd_inited __read_mostly; static inline unsigned int recent_entry_hash4(const union nf_inet_addr *addr) { @@ -340,10 +339,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, int ret = -EINVAL; size_t sz; - if (unlikely(!hash_rnd_inited)) { - get_random_bytes(&hash_rnd, sizeof(hash_rnd)); - hash_rnd_inited = true; - } + net_get_random_once(&hash_rnd, sizeof(hash_rnd)); + if (info->check_set & ~XT_RECENT_VALID_FLAGS) { pr_info("Unsupported user space flags (%08x)\n", info->check_set); diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c index ef36a56a02c6..4dedb96d1a06 100644 --- a/net/netfilter/xt_sctp.c +++ b/net/netfilter/xt_sctp.c @@ -68,7 +68,7 @@ match_packet(const struct sk_buff *skb, ++i, offset, sch->type, htons(sch->length), sch->flags); #endif - offset += WORD_ROUND(ntohs(sch->length)); + offset += SCTP_PAD4(ntohs(sch->length)); pr_debug("skb->len: %d\toffset: %d\n", skb->len, offset); diff --git a/net/netlink/diag.c b/net/netlink/diag.c index 8dd836a8dd60..b2f0e986a6f4 100644 --- a/net/netlink/diag.c +++ b/net/netlink/diag.c @@ -63,43 +63,74 @@ out_nlmsg_trim: static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, int protocol, int s_num) { + struct rhashtable_iter *hti = (void *)cb->args[2]; struct netlink_table *tbl = &nl_table[protocol]; - struct rhashtable *ht = &tbl->hash; - const struct bucket_table *htbl = rht_dereference_rcu(ht->tbl, ht); struct net *net = sock_net(skb->sk); struct netlink_diag_req *req; struct netlink_sock *nlsk; struct sock *sk; - int ret = 0, num = 0, i; + int num = 2; + int ret = 0; req = nlmsg_data(cb->nlh); - for (i = 0; i < htbl->size; i++) { - struct rhash_head *pos; + if (s_num > 1) + goto mc_list; - rht_for_each_entry_rcu(nlsk, pos, htbl, i, node) { - sk = (struct sock *)nlsk; + num--; - if (!net_eq(sock_net(sk), net)) - continue; - if (num < s_num) { - num++; + if (!hti) { + hti = kmalloc(sizeof(*hti), GFP_KERNEL); + if (!hti) + return -ENOMEM; + + cb->args[2] = (long)hti; + } + + if (!s_num) + rhashtable_walk_enter(&tbl->hash, hti); + + ret = rhashtable_walk_start(hti); + if (ret == -EAGAIN) + ret = 0; + if (ret) + goto stop; + + while ((nlsk = rhashtable_walk_next(hti))) { + if (IS_ERR(nlsk)) { + ret = PTR_ERR(nlsk); + if (ret == -EAGAIN) { + ret = 0; continue; } + break; + } - if (sk_diag_fill(sk, skb, req, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - NLM_F_MULTI, - sock_i_ino(sk)) < 0) { - ret = 1; - goto done; - } + sk = (struct sock *)nlsk; - num++; + if (!net_eq(sock_net(sk), net)) + continue; + + if (sk_diag_fill(sk, skb, req, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI, + sock_i_ino(sk)) < 0) { + ret = 1; + break; } } +stop: + rhashtable_walk_stop(hti); + if (ret) + goto done; + + rhashtable_walk_exit(hti); + num++; + +mc_list: + read_lock(&nl_table_lock); sk_for_each_bound(sk, &tbl->mc_list) { if (sk_hashed(sk)) continue; @@ -116,13 +147,14 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, NLM_F_MULTI, sock_i_ino(sk)) < 0) { ret = 1; - goto done; + break; } num++; } + read_unlock(&nl_table_lock); + done: cb->args[0] = num; - cb->args[1] = protocol; return ret; } @@ -131,20 +163,20 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) { struct netlink_diag_req *req; int s_num = cb->args[0]; + int err = 0; req = nlmsg_data(cb->nlh); - rcu_read_lock(); - read_lock(&nl_table_lock); - if (req->sdiag_protocol == NDIAG_PROTO_ALL) { int i; for (i = cb->args[1]; i < MAX_LINKS; i++) { - if (__netlink_diag_dump(skb, cb, i, s_num)) + err = __netlink_diag_dump(skb, cb, i, s_num); + if (err) break; s_num = 0; } + cb->args[1] = i; } else { if (req->sdiag_protocol >= MAX_LINKS) { read_unlock(&nl_table_lock); @@ -152,13 +184,22 @@ static int netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) return -ENOENT; } - __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); + err = __netlink_diag_dump(skb, cb, req->sdiag_protocol, s_num); } - read_unlock(&nl_table_lock); - rcu_read_unlock(); + return err < 0 ? err : skb->len; +} - return skb->len; +static int netlink_diag_dump_done(struct netlink_callback *cb) +{ + struct rhashtable_iter *hti = (void *)cb->args[2]; + + if (cb->args[0] == 1) + rhashtable_walk_exit(hti); + + kfree(hti); + + return 0; } static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) @@ -172,6 +213,7 @@ static int netlink_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (h->nlmsg_flags & NLM_F_DUMP) { struct netlink_dump_control c = { .dump = netlink_diag_dump, + .done = netlink_diag_dump_done, }; return netlink_dump_start(net->diag_nlsk, skb, h, &c); } else diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a09132a69869..23cc12639ba7 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -977,7 +977,7 @@ static int genl_ctrl_event(int event, struct genl_family *family, return 0; } -static struct genl_ops genl_ctrl_ops[] = { +static const struct genl_ops genl_ctrl_ops[] = { { .cmd = CTRL_CMD_GETFAMILY, .doit = ctrl_getfamily, @@ -986,7 +986,7 @@ static struct genl_ops genl_ctrl_ops[] = { }, }; -static struct genl_multicast_group genl_ctrl_groups[] = { +static const struct genl_multicast_group genl_ctrl_groups[] = { { .name = "notify", }, }; diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 1ecbd7715f6d..4e03f64709bc 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -71,6 +71,8 @@ struct ovs_frag_data { static DEFINE_PER_CPU(struct ovs_frag_data, ovs_frag_data_storage); #define DEFERRED_ACTION_FIFO_SIZE 10 +#define OVS_RECURSION_LIMIT 5 +#define OVS_DEFERRED_ACTION_THRESHOLD (OVS_RECURSION_LIMIT - 2) struct action_fifo { int head; int tail; @@ -78,7 +80,12 @@ struct action_fifo { struct deferred_action fifo[DEFERRED_ACTION_FIFO_SIZE]; }; +struct recirc_keys { + struct sw_flow_key key[OVS_DEFERRED_ACTION_THRESHOLD]; +}; + static struct action_fifo __percpu *action_fifos; +static struct recirc_keys __percpu *recirc_keys; static DEFINE_PER_CPU(int, exec_actions_level); static void action_fifo_init(struct action_fifo *fifo) @@ -153,7 +160,7 @@ static void update_ethertype(struct sk_buff *skb, struct ethhdr *hdr, static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_mpls *mpls) { - __be32 *new_mpls_lse; + struct mpls_shim_hdr *new_mpls_lse; /* Networking stack do not allow simultaneous Tunnel and MPLS GSO. */ if (skb->encapsulation) @@ -162,19 +169,23 @@ static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (skb_cow_head(skb, MPLS_HLEN) < 0) return -ENOMEM; + if (!skb->inner_protocol) { + skb_set_inner_network_header(skb, skb->mac_len); + skb_set_inner_protocol(skb, skb->protocol); + } + skb_push(skb, MPLS_HLEN); memmove(skb_mac_header(skb) - MPLS_HLEN, skb_mac_header(skb), skb->mac_len); skb_reset_mac_header(skb); + skb_set_network_header(skb, skb->mac_len); - new_mpls_lse = (__be32 *)skb_mpls_header(skb); - *new_mpls_lse = mpls->mpls_lse; + new_mpls_lse = mpls_hdr(skb); + new_mpls_lse->label_stack_entry = mpls->mpls_lse; skb_postpush_rcsum(skb, new_mpls_lse, MPLS_HLEN); update_ethertype(skb, eth_hdr(skb), mpls->mpls_ethertype); - if (!skb->inner_protocol) - skb_set_inner_protocol(skb, skb->protocol); skb->protocol = mpls->mpls_ethertype; invalidate_flow_key(key); @@ -191,18 +202,19 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, if (unlikely(err)) return err; - skb_postpull_rcsum(skb, skb_mpls_header(skb), MPLS_HLEN); + skb_postpull_rcsum(skb, mpls_hdr(skb), MPLS_HLEN); memmove(skb_mac_header(skb) + MPLS_HLEN, skb_mac_header(skb), skb->mac_len); __skb_pull(skb, MPLS_HLEN); skb_reset_mac_header(skb); + skb_set_network_header(skb, skb->mac_len); - /* skb_mpls_header() is used to locate the ethertype - * field correctly in the presence of VLAN tags. + /* mpls_hdr() is used to locate the ethertype field correctly in the + * presence of VLAN tags. */ - hdr = (struct ethhdr *)(skb_mpls_header(skb) - ETH_HLEN); + hdr = (struct ethhdr *)((void *)mpls_hdr(skb) - ETH_HLEN); update_ethertype(skb, hdr, ethertype); if (eth_p_mpls(skb->protocol)) skb->protocol = ethertype; @@ -214,7 +226,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key, static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, const __be32 *mpls_lse, const __be32 *mask) { - __be32 *stack; + struct mpls_shim_hdr *stack; __be32 lse; int err; @@ -222,16 +234,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key, if (unlikely(err)) return err; - stack = (__be32 *)skb_mpls_header(skb); - lse = OVS_MASKED(*stack, *mpls_lse, *mask); + stack = mpls_hdr(skb); + lse = OVS_MASKED(stack->label_stack_entry, *mpls_lse, *mask); if (skb->ip_summed == CHECKSUM_COMPLETE) { - __be32 diff[] = { ~(*stack), lse }; + __be32 diff[] = { ~(stack->label_stack_entry), lse }; skb->csum = ~csum_partial((char *)diff, sizeof(diff), ~skb->csum); } - *stack = lse; + stack->label_stack_entry = lse; flow_key->mpls.top_lse = lse; return 0; } @@ -241,20 +253,24 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key) int err; err = skb_vlan_pop(skb); - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = 0; + } else { + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + } return err; } static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key, const struct ovs_action_push_vlan *vlan) { - if (skb_vlan_tag_present(skb)) + if (skb_vlan_tag_present(skb)) { invalidate_flow_key(key); - else - key->eth.tci = vlan->vlan_tci; + } else { + key->eth.vlan.tci = vlan->vlan_tci; + key->eth.vlan.tpid = vlan->vlan_tpid; + } return skb_vlan_push(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT); } @@ -1011,6 +1027,7 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, const struct nlattr *a, int rem) { struct deferred_action *da; + int level; if (!is_flow_key_valid(key)) { int err; @@ -1034,6 +1051,18 @@ static int execute_recirc(struct datapath *dp, struct sk_buff *skb, return 0; } + level = this_cpu_read(exec_actions_level); + if (level <= OVS_DEFERRED_ACTION_THRESHOLD) { + struct recirc_keys *rks = this_cpu_ptr(recirc_keys); + struct sw_flow_key *recirc_key = &rks->key[level - 1]; + + *recirc_key = *key; + recirc_key->recirc_id = nla_get_u32(a); + ovs_dp_process_packet(skb, recirc_key); + + return 0; + } + da = add_deferred_actions(skb, key, NULL); if (da) { da->pkt_key.recirc_id = nla_get_u32(a); @@ -1200,11 +1229,10 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *acts, struct sw_flow_key *key) { - static const int ovs_recursion_limit = 5; int err, level; level = __this_cpu_inc_return(exec_actions_level); - if (unlikely(level > ovs_recursion_limit)) { + if (unlikely(level > OVS_RECURSION_LIMIT)) { net_crit_ratelimited("ovs: recursion limit reached on datapath %s, probable configuration error\n", ovs_dp_name(dp)); kfree_skb(skb); @@ -1229,10 +1257,17 @@ int action_fifos_init(void) if (!action_fifos) return -ENOMEM; + recirc_keys = alloc_percpu(struct recirc_keys); + if (!recirc_keys) { + free_percpu(action_fifos); + return -ENOMEM; + } + return 0; } void action_fifos_exit(void) { free_percpu(action_fifos); + free_percpu(recirc_keys); } diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e054a748ff25..31045ef44a82 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1367,7 +1367,7 @@ static void __ovs_ct_free_action(struct ovs_conntrack_info *ct_info) if (ct_info->helper) module_put(ct_info->helper->me); if (ct_info->ct) - nf_ct_put(ct_info->ct); + nf_ct_tmpl_free(ct_info->ct); } void ovs_ct_init(struct net *net) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 524c0fd3078e..4d67ea856067 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -928,7 +928,6 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) struct sw_flow_mask mask; struct sk_buff *reply; struct datapath *dp; - struct sw_flow_key key; struct sw_flow_actions *acts; struct sw_flow_match match; u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]); @@ -956,20 +955,24 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) } /* Extract key. */ - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &new_flow->key, false, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); if (error) goto err_kfree_flow; - ovs_flow_mask_key(&new_flow->key, &key, true, &mask); - /* Extract flow identifier. */ error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID], - &key, log); + &new_flow->key, log); if (error) goto err_kfree_flow; + /* unmasked key is needed to match when ufid is not used. */ + if (ovs_identifier_is_key(&new_flow->id)) + match.key = new_flow->id.unmasked_key; + + ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask); + /* Validate actions. */ error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key, &acts, log); @@ -996,7 +999,7 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info) if (ovs_identifier_is_ufid(&new_flow->id)) flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id); if (!flow) - flow = ovs_flow_tbl_lookup(&dp->table, &key); + flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key); if (likely(!flow)) { rcu_assign_pointer(new_flow->sf_acts, acts); @@ -1121,7 +1124,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, &mask); + ovs_match_init(&match, &key, true, &mask); error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], a[OVS_FLOW_ATTR_MASK], log); } else if (!ufid_present) { @@ -1238,7 +1241,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); } else if (!ufid_present) { @@ -1297,7 +1300,7 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log); if (a[OVS_FLOW_ATTR_KEY]) { - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL, log); if (unlikely(err)) @@ -2437,3 +2440,7 @@ module_exit(dp_cleanup); MODULE_DESCRIPTION("Open vSwitch switching datapath"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 0ea128eeeab2..c8c82e109c68 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -72,32 +73,33 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, { struct flow_stats *stats; int node = numa_node_id(); + int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); - stats = rcu_dereference(flow->stats[node]); + stats = rcu_dereference(flow->stats[cpu]); - /* Check if already have node-specific stats. */ + /* Check if already have CPU-specific stats. */ if (likely(stats)) { spin_lock(&stats->lock); /* Mark if we write on the pre-allocated stats. */ - if (node == 0 && unlikely(flow->stats_last_writer != node)) - flow->stats_last_writer = node; + if (cpu == 0 && unlikely(flow->stats_last_writer != cpu)) + flow->stats_last_writer = cpu; } else { stats = rcu_dereference(flow->stats[0]); /* Pre-allocated. */ spin_lock(&stats->lock); - /* If the current NUMA-node is the only writer on the + /* If the current CPU is the only writer on the * pre-allocated stats keep using them. */ - if (unlikely(flow->stats_last_writer != node)) { + if (unlikely(flow->stats_last_writer != cpu)) { /* A previous locker may have already allocated the - * stats, so we need to check again. If node-specific + * stats, so we need to check again. If CPU-specific * stats were already allocated, we update the pre- * allocated stats as we have already locked them. */ - if (likely(flow->stats_last_writer != NUMA_NO_NODE) - && likely(!rcu_access_pointer(flow->stats[node]))) { - /* Try to allocate node-specific stats. */ + if (likely(flow->stats_last_writer != -1) && + likely(!rcu_access_pointer(flow->stats[cpu]))) { + /* Try to allocate CPU-specific stats. */ struct flow_stats *new_stats; new_stats = @@ -114,12 +116,12 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, new_stats->tcp_flags = tcp_flags; spin_lock_init(&new_stats->lock); - rcu_assign_pointer(flow->stats[node], + rcu_assign_pointer(flow->stats[cpu], new_stats); goto unlock; } } - flow->stats_last_writer = node; + flow->stats_last_writer = cpu; } } @@ -136,14 +138,15 @@ void ovs_flow_stats_get(const struct sw_flow *flow, struct ovs_flow_stats *ovs_stats, unsigned long *used, __be16 *tcp_flags) { - int node; + int cpu; *used = 0; *tcp_flags = 0; memset(ovs_stats, 0, sizeof(*ovs_stats)); - for_each_node(node) { - struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { /* Local CPU may write on non-local stats, so we must @@ -163,10 +166,11 @@ void ovs_flow_stats_get(const struct sw_flow *flow, /* Called with ovs_mutex. */ void ovs_flow_stats_clear(struct sw_flow *flow) { - int node; + int cpu; - for_each_node(node) { - struct flow_stats *stats = ovsl_dereference(flow->stats[node]); + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { spin_lock_bh(&stats->lock); @@ -302,24 +306,57 @@ static bool icmp6hdr_ok(struct sk_buff *skb) sizeof(struct icmp6hdr)); } -static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +/** + * Parse vlan tag from vlan header. + * Returns ERROR on memory error. + * Returns 0 if it encounters a non-vlan or incomplete packet. + * Returns 1 after successfully parsing vlan tag. + */ +static int parse_vlan_tag(struct sk_buff *skb, struct vlan_head *key_vh) { - struct qtag_prefix { - __be16 eth_type; /* ETH_P_8021Q */ - __be16 tci; - }; - struct qtag_prefix *qp; + struct vlan_head *vh = (struct vlan_head *)skb->data; - if (unlikely(skb->len < sizeof(struct qtag_prefix) + sizeof(__be16))) + if (likely(!eth_type_vlan(vh->tpid))) return 0; - if (unlikely(!pskb_may_pull(skb, sizeof(struct qtag_prefix) + - sizeof(__be16)))) + if (unlikely(skb->len < sizeof(struct vlan_head) + sizeof(__be16))) + return 0; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct vlan_head) + + sizeof(__be16)))) return -ENOMEM; - qp = (struct qtag_prefix *) skb->data; - key->eth.tci = qp->tci | htons(VLAN_TAG_PRESENT); - __skb_pull(skb, sizeof(struct qtag_prefix)); + vh = (struct vlan_head *)skb->data; + key_vh->tci = vh->tci | htons(VLAN_TAG_PRESENT); + key_vh->tpid = vh->tpid; + + __skb_pull(skb, sizeof(struct vlan_head)); + return 1; +} + +static int parse_vlan(struct sk_buff *skb, struct sw_flow_key *key) +{ + int res; + + key->eth.vlan.tci = 0; + key->eth.vlan.tpid = 0; + key->eth.cvlan.tci = 0; + key->eth.cvlan.tpid = 0; + + if (likely(skb_vlan_tag_present(skb))) { + key->eth.vlan.tci = htons(skb->vlan_tci); + key->eth.vlan.tpid = skb->vlan_proto; + } else { + /* Parse outer vlan tag in the non-accelerated case. */ + res = parse_vlan_tag(skb, &key->eth.vlan); + if (res <= 0) + return res; + } + + /* Parse inner vlan tag. */ + res = parse_vlan_tag(skb, &key->eth.cvlan); + if (res <= 0) + return res; return 0; } @@ -480,12 +517,8 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) * update skb->csum here. */ - key->eth.tci = 0; - if (skb_vlan_tag_present(skb)) - key->eth.tci = htons(skb->vlan_tci); - else if (eth->h_proto == htons(ETH_P_8021Q)) - if (unlikely(parse_vlan(skb, key))) - return -ENOMEM; + if (unlikely(parse_vlan(skb, key))) + return -ENOMEM; key->eth.type = parse_ethertype(skb); if (unlikely(key->eth.type == htons(0))) @@ -600,12 +633,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) } else if (eth_p_mpls(key->eth.type)) { size_t stack_len = MPLS_HLEN; - /* In the presence of an MPLS label stack the end of the L2 - * header and the beginning of the L3 header differ. - * - * Advance network_header to the beginning of the L3 - * header. mac_len corresponds to the end of the L2 header. - */ + skb_set_inner_network_header(skb, skb->mac_len); while (1) { __be32 lse; @@ -613,12 +641,12 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (unlikely(error)) return 0; - memcpy(&lse, skb_network_header(skb), MPLS_HLEN); + memcpy(&lse, skb_inner_network_header(skb), MPLS_HLEN); if (stack_len == MPLS_HLEN) memcpy(&key->mpls.top_lse, &lse, MPLS_HLEN); - skb_set_network_header(skb, skb->mac_len + stack_len); + skb_set_inner_network_header(skb, skb->mac_len + stack_len); if (lse & htonl(MPLS_LS_S_MASK)) break; @@ -734,8 +762,6 @@ int ovs_flow_key_extract_userspace(struct net *net, const struct nlattr *attr, { int err; - memset(key, 0, OVS_SW_FLOW_KEY_METADATA_SIZE); - /* Extract metadata from netlink attributes. */ err = ovs_nla_get_flow_metadata(net, attr, key, log); if (err) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index 03378e75a67c..ae783f5c6695 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -50,6 +50,11 @@ struct ovs_tunnel_info { struct metadata_dst *tun_dst; }; +struct vlan_head { + __be16 tpid; /* Vlan type. Generally 802.1q or 802.1ad.*/ + __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ +}; + #define OVS_SW_FLOW_KEY_METADATA_SIZE \ (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) @@ -69,7 +74,8 @@ struct sw_flow_key { struct { u8 src[ETH_ALEN]; /* Ethernet source address. */ u8 dst[ETH_ALEN]; /* Ethernet destination address. */ - __be16 tci; /* 0 if no VLAN, VLAN_TAG_PRESENT set otherwise. */ + struct vlan_head vlan; + struct vlan_head cvlan; __be16 type; /* Ethernet frame type. */ } eth; union { @@ -172,14 +178,14 @@ struct sw_flow { struct hlist_node node[2]; u32 hash; } flow_table, ufid_table; - int stats_last_writer; /* NUMA-node id of the last writer on + int stats_last_writer; /* CPU id of the last writer on * 'stats[0]'. */ struct sw_flow_key key; struct sw_flow_id id; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; - struct flow_stats __rcu *stats[]; /* One for each NUMA node. First one + struct flow_stats __rcu *stats[]; /* One for each CPU. First one * is allocated at flow creation time, * the rest are allocated on demand * while holding the 'stats[0].lock'. diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index c78a6a1476fb..ae25ded82b3b 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -808,6 +808,167 @@ int ovs_nla_put_tunnel_info(struct sk_buff *skb, ip_tunnel_info_af(tun_info)); } +static int encode_vlan_from_nlattrs(struct sw_flow_match *match, + const struct nlattr *a[], + bool is_mask, bool inner) +{ + __be16 tci = 0; + __be16 tpid = 0; + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (likely(!inner)) { + SW_FLOW_KEY_PUT(match, eth.vlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, tci, is_mask); + } else { + SW_FLOW_KEY_PUT(match, eth.cvlan.tpid, tpid, is_mask); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, tci, is_mask); + } + return 0; +} + +static int validate_vlan_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + + if (!((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && + (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && + eth_type_vlan(nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE])))) { + /* Not a VLAN. */ + return 0; + } + + if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && + (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { + OVS_NLERR(log, "Invalid %s frame", (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (!(tci & htons(VLAN_TAG_PRESENT))) { + if (tci) { + OVS_NLERR(log, "%s TCI does not have VLAN_TAG_PRESENT bit set.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } else if (nla_len(a[OVS_KEY_ATTR_ENCAP])) { + /* Corner case for truncated VLAN header. */ + OVS_NLERR(log, "Truncated %s header has non-zero encap attribute.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + } + + return 1; +} + +static int validate_vlan_mask_from_nlattrs(const struct sw_flow_match *match, + u64 key_attrs, bool inner, + const struct nlattr **a, bool log) +{ + __be16 tci = 0; + __be16 tpid = 0; + bool encap_valid = !!(match->key->eth.vlan.tci & + htons(VLAN_TAG_PRESENT)); + bool i_encap_valid = !!(match->key->eth.cvlan.tci & + htons(VLAN_TAG_PRESENT)); + + if (!(key_attrs & (1 << OVS_KEY_ATTR_ENCAP))) { + /* Not a VLAN. */ + return 0; + } + + if ((!inner && !encap_valid) || (inner && !i_encap_valid)) { + OVS_NLERR(log, "Encap mask attribute is set for non-%s frame.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + if (a[OVS_KEY_ATTR_VLAN]) + tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); + + if (a[OVS_KEY_ATTR_ETHERTYPE]) + tpid = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); + + if (tpid != htons(0xffff)) { + OVS_NLERR(log, "Must have an exact match on %s TPID (mask=%x).", + (inner) ? "C-VLAN" : "VLAN", ntohs(tpid)); + return -EINVAL; + } + if (!(tci & htons(VLAN_TAG_PRESENT))) { + OVS_NLERR(log, "%s TCI mask does not have exact match for VLAN_TAG_PRESENT bit.", + (inner) ? "C-VLAN" : "VLAN"); + return -EINVAL; + } + + return 1; +} + +static int __parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, bool inner, + const struct nlattr **a, bool is_mask, + bool log) +{ + int err; + const struct nlattr *encap; + + if (!is_mask) + err = validate_vlan_from_nlattrs(match, *key_attrs, inner, + a, log); + else + err = validate_vlan_mask_from_nlattrs(match, *key_attrs, inner, + a, log); + if (err <= 0) + return err; + + err = encode_vlan_from_nlattrs(match, a, is_mask, inner); + if (err) + return err; + + *key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); + *key_attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + *key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); + + encap = a[OVS_KEY_ATTR_ENCAP]; + + if (!is_mask) + err = parse_flow_nlattrs(encap, a, key_attrs, log); + else + err = parse_flow_mask_nlattrs(encap, a, key_attrs, log); + + return err; +} + +static int parse_vlan_from_nlattrs(struct sw_flow_match *match, + u64 *key_attrs, const struct nlattr **a, + bool is_mask, bool log) +{ + int err; + bool encap_valid = false; + + err = __parse_vlan_from_nlattrs(match, key_attrs, false, a, + is_mask, log); + if (err) + return err; + + encap_valid = !!(match->key->eth.vlan.tci & htons(VLAN_TAG_PRESENT)); + if (encap_valid) { + err = __parse_vlan_from_nlattrs(match, key_attrs, true, a, + is_mask, log); + if (err) + return err; + } + + return 0; +} + static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 *attrs, const struct nlattr **a, bool is_mask, bool log) @@ -923,20 +1084,11 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, } if (attrs & (1 << OVS_KEY_ATTR_VLAN)) { - __be16 tci; - - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - if (!(tci & htons(VLAN_TAG_PRESENT))) { - if (is_mask) - OVS_NLERR(log, "VLAN TCI mask does not have exact match for VLAN_TAG_PRESENT bit."); - else - OVS_NLERR(log, "VLAN TCI does not have VLAN_TAG_PRESENT bit set."); - - return -EINVAL; - } - - SW_FLOW_KEY_PUT(match, eth.tci, tci, is_mask); - attrs &= ~(1 << OVS_KEY_ATTR_VLAN); + /* VLAN attribute is always parsed before getting here since it + * may occur multiple times. + */ + OVS_NLERR(log, "VLAN attribute unexpected."); + return -EINVAL; } if (attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) { @@ -1182,49 +1334,18 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, bool log) { const struct nlattr *a[OVS_KEY_ATTR_MAX + 1]; - const struct nlattr *encap; struct nlattr *newmask = NULL; u64 key_attrs = 0; u64 mask_attrs = 0; - bool encap_valid = false; int err; err = parse_flow_nlattrs(nla_key, a, &key_attrs, log); if (err) return err; - if ((key_attrs & (1 << OVS_KEY_ATTR_ETHERNET)) && - (key_attrs & (1 << OVS_KEY_ATTR_ETHERTYPE)) && - (nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]) == htons(ETH_P_8021Q))) { - __be16 tci; - - if (!((key_attrs & (1 << OVS_KEY_ATTR_VLAN)) && - (key_attrs & (1 << OVS_KEY_ATTR_ENCAP)))) { - OVS_NLERR(log, "Invalid Vlan frame."); - return -EINVAL; - } - - key_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - encap = a[OVS_KEY_ATTR_ENCAP]; - key_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - encap_valid = true; - - if (tci & htons(VLAN_TAG_PRESENT)) { - err = parse_flow_nlattrs(encap, a, &key_attrs, log); - if (err) - return err; - } else if (!tci) { - /* Corner case for truncated 802.1Q header. */ - if (nla_len(encap)) { - OVS_NLERR(log, "Truncated 802.1Q header has non-zero encap attribute."); - return -EINVAL; - } - } else { - OVS_NLERR(log, "Encap attr is set for non-VLAN frame"); - return -EINVAL; - } - } + err = parse_vlan_from_nlattrs(match, &key_attrs, a, false, log); + if (err) + return err; err = ovs_key_from_nlattrs(net, match, key_attrs, a, false, log); if (err) @@ -1265,46 +1386,12 @@ int ovs_nla_get_match(struct net *net, struct sw_flow_match *match, goto free_newmask; /* Always match on tci. */ - SW_FLOW_KEY_PUT(match, eth.tci, htons(0xffff), true); + SW_FLOW_KEY_PUT(match, eth.vlan.tci, htons(0xffff), true); + SW_FLOW_KEY_PUT(match, eth.cvlan.tci, htons(0xffff), true); - if (mask_attrs & 1 << OVS_KEY_ATTR_ENCAP) { - __be16 eth_type = 0; - __be16 tci = 0; - - if (!encap_valid) { - OVS_NLERR(log, "Encap mask attribute is set for non-VLAN frame."); - err = -EINVAL; - goto free_newmask; - } - - mask_attrs &= ~(1 << OVS_KEY_ATTR_ENCAP); - if (a[OVS_KEY_ATTR_ETHERTYPE]) - eth_type = nla_get_be16(a[OVS_KEY_ATTR_ETHERTYPE]); - - if (eth_type == htons(0xffff)) { - mask_attrs &= ~(1 << OVS_KEY_ATTR_ETHERTYPE); - encap = a[OVS_KEY_ATTR_ENCAP]; - err = parse_flow_mask_nlattrs(encap, a, - &mask_attrs, log); - if (err) - goto free_newmask; - } else { - OVS_NLERR(log, "VLAN frames must have an exact match on the TPID (mask=%x).", - ntohs(eth_type)); - err = -EINVAL; - goto free_newmask; - } - - if (a[OVS_KEY_ATTR_VLAN]) - tci = nla_get_be16(a[OVS_KEY_ATTR_VLAN]); - - if (!(tci & htons(VLAN_TAG_PRESENT))) { - OVS_NLERR(log, "VLAN tag present bit must have an exact match (tci_mask=%x).", - ntohs(tci)); - err = -EINVAL; - goto free_newmask; - } - } + err = parse_vlan_from_nlattrs(match, &mask_attrs, a, true, log); + if (err) + goto free_newmask; err = ovs_key_from_nlattrs(net, match, mask_attrs, a, true, log); @@ -1410,12 +1497,25 @@ int ovs_nla_get_flow_metadata(struct net *net, const struct nlattr *attr, return metadata_from_nlattrs(net, &match, &attrs, a, false, log); } +static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, + bool is_mask) +{ + __be16 eth_type = !is_mask ? vh->tpid : htons(0xffff); + + if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || + nla_put_be16(skb, OVS_KEY_ATTR_VLAN, vh->tci)) + return -EMSGSIZE; + return 0; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) { struct ovs_key_ethernet *eth_key; - struct nlattr *nla, *encap; + struct nlattr *nla; + struct nlattr *encap = NULL; + struct nlattr *in_encap = NULL; if (nla_put_u32(skb, OVS_KEY_ATTR_RECIRC_ID, output->recirc_id)) goto nla_put_failure; @@ -1464,17 +1564,21 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ether_addr_copy(eth_key->eth_src, output->eth.src); ether_addr_copy(eth_key->eth_dst, output->eth.dst); - if (swkey->eth.tci || swkey->eth.type == htons(ETH_P_8021Q)) { - __be16 eth_type; - eth_type = !is_mask ? htons(ETH_P_8021Q) : htons(0xffff); - if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, eth_type) || - nla_put_be16(skb, OVS_KEY_ATTR_VLAN, output->eth.tci)) + if (swkey->eth.vlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.vlan, is_mask)) goto nla_put_failure; encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); - if (!swkey->eth.tci) + if (!swkey->eth.vlan.tci) goto unencap; - } else - encap = NULL; + + if (swkey->eth.cvlan.tci || eth_type_vlan(swkey->eth.type)) { + if (ovs_nla_put_vlan(skb, &output->eth.cvlan, is_mask)) + goto nla_put_failure; + in_encap = nla_nest_start(skb, OVS_KEY_ATTR_ENCAP); + if (!swkey->eth.cvlan.tci) + goto unencap; + } + } if (swkey->eth.type == htons(ETH_P_802_2)) { /* @@ -1493,6 +1597,14 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, if (nla_put_be16(skb, OVS_KEY_ATTR_ETHERTYPE, output->eth.type)) goto nla_put_failure; + if (eth_type_vlan(swkey->eth.type)) { + /* There are 3 VLAN tags, we don't know anything about the rest + * of the packet, so truncate here. + */ + WARN_ON_ONCE(!(encap && in_encap)); + goto unencap; + } + if (swkey->eth.type == htons(ETH_P_IP)) { struct ovs_key_ipv4 *ipv4_key; @@ -1619,6 +1731,8 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, } unencap: + if (in_encap) + nla_nest_end(skb, in_encap); if (encap) nla_nest_end(skb, encap); @@ -1882,13 +1996,15 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, void ovs_match_init(struct sw_flow_match *match, struct sw_flow_key *key, + bool reset_key, struct sw_flow_mask *mask) { memset(match, 0, sizeof(*match)); match->key = key; match->mask = mask; - memset(key, 0, sizeof(*key)); + if (reset_key) + memset(key, 0, sizeof(*key)); if (mask) { memset(&mask->key, 0, sizeof(mask->key)); @@ -1935,7 +2051,7 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, struct nlattr *a; int err = 0, start, opts_type; - ovs_match_init(&match, &key, NULL); + ovs_match_init(&match, &key, true, NULL); opts_type = ip_tun_from_nlattr(nla_data(attr), &match, false, log); if (opts_type < 0) return opts_type; @@ -2283,7 +2399,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, case OVS_ACTION_ATTR_PUSH_VLAN: vlan = nla_data(a); - if (vlan->vlan_tpid != htons(ETH_P_8021Q)) + if (!eth_type_vlan(vlan->vlan_tpid)) return -EINVAL; if (!(vlan->vlan_tci & htons(VLAN_TAG_PRESENT))) return -EINVAL; @@ -2388,7 +2504,7 @@ int ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, (*sfa)->orig_len = nla_len(attr); err = __ovs_nla_copy_actions(net, attr, key, 0, sfa, key->eth.type, - key->eth.tci, log); + key->eth.vlan.tci, log); if (err) ovs_nla_free_flow_actions(*sfa); diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 47dd142eca1c..45f9769e5aac 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -41,7 +41,8 @@ size_t ovs_tun_key_attr_size(void); size_t ovs_key_attr_size(void); void ovs_match_init(struct sw_flow_match *match, - struct sw_flow_key *key, struct sw_flow_mask *mask); + struct sw_flow_key *key, bool reset_key, + struct sw_flow_mask *mask); int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *, int attr, bool is_mask, struct sk_buff *); diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index d073fff82fdb..ea7a8073fa02 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -79,17 +80,12 @@ struct sw_flow *ovs_flow_alloc(void) { struct sw_flow *flow; struct flow_stats *stats; - int node; - flow = kmem_cache_alloc(flow_cache, GFP_KERNEL); + flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL); if (!flow) return ERR_PTR(-ENOMEM); - flow->sf_acts = NULL; - flow->mask = NULL; - flow->id.unmasked_key = NULL; - flow->id.ufid_len = 0; - flow->stats_last_writer = NUMA_NO_NODE; + flow->stats_last_writer = -1; /* Initialize the default stat node. */ stats = kmem_cache_alloc_node(flow_stats_cache, @@ -102,10 +98,6 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); - for_each_node(node) - if (node != 0) - RCU_INIT_POINTER(flow->stats[node], NULL); - return flow; err: kmem_cache_free(flow_cache, flow); @@ -142,16 +134,17 @@ static struct flex_array *alloc_buckets(unsigned int n_buckets) static void flow_free(struct sw_flow *flow) { - int node; + int cpu; if (ovs_identifier_is_key(&flow->id)) kfree(flow->id.unmasked_key); if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); - for_each_node(node) - if (flow->stats[node]) + /* We open code this to make sure cpu 0 is always considered */ + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, - (struct flow_stats __force *)flow->stats[node]); + (struct flow_stats __force *)flow->stats[cpu]); kmem_cache_free(flow_cache, flow); } @@ -756,7 +749,7 @@ int ovs_flow_init(void) BUILD_BUG_ON(sizeof(struct sw_flow_key) % sizeof(long)); flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow) - + (nr_node_ids + + (nr_cpu_ids * sizeof(struct flow_stats *)), 0, 0, NULL); if (flow_cache == NULL) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 6b21fd068d87..8f198437c724 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -485,9 +485,14 @@ static unsigned int packet_length(const struct sk_buff *skb) { unsigned int length = skb->len - ETH_HLEN; - if (skb->protocol == htons(ETH_P_8021Q)) + if (skb_vlan_tagged(skb)) length -= VLAN_HLEN; + /* Don't subtract for multiple VLAN tags. Most (all?) drivers allow + * (ETH_LEN + VLAN_HLEN) in addition to the mtu value, but almost none + * account for 802.1ad. e.g. is_skb_forwardable(). + */ + return length; } diff --git a/net/rds/ib.h b/net/rds/ib.h index 046f7508c06b..45ac8e8e58f4 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -333,6 +333,7 @@ void rds_ib_conn_path_shutdown(struct rds_conn_path *cp); void rds_ib_state_change(struct sock *sk); int rds_ib_listen_init(void); void rds_ib_listen_stop(void); +__printf(2, 3) void __rds_ib_conn_error(struct rds_connection *conn, const char *, ...); int rds_ib_cm_handle_connect(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); diff --git a/net/rds/rds.h b/net/rds/rds.h index b2d17f0fafa8..fd0bccb2f9f9 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -688,6 +688,7 @@ void __rds_conn_error(struct rds_connection *conn, const char *, ...); #define rds_conn_error(conn, fmt...) \ __rds_conn_error(conn, KERN_WARNING "RDS: " fmt) +__printf(2, 3) void __rds_conn_path_error(struct rds_conn_path *cp, const char *, ...); #define rds_conn_path_error(cp, fmt...) \ __rds_conn_path_error(cp, KERN_WARNING "RDS: " fmt) diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 784c53163b7b..86f8853a038c 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -19,6 +19,20 @@ config AF_RXRPC See Documentation/networking/rxrpc.txt. +config AF_RXRPC_IPV6 + bool "IPv6 support for RxRPC" + depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + help + Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as + its network transport. + +config AF_RXRPC_INJECT_LOSS + bool "Inject packet loss into RxRPC packet stream" + depends on AF_RXRPC + help + Say Y here to inject packet loss by discarding some received and some + transmitted packets. + config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 10f3f48a16a8..8fc6ea347182 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -22,6 +22,7 @@ af-rxrpc-y := \ peer_object.o \ recvmsg.o \ security.o \ + sendmsg.o \ skbuff.o \ utils.o diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index 88effadd4b16..44c9c2b0b190 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include +#define CREATE_TRACE_POINTS #include "ar-internal.h" MODULE_DESCRIPTION("RxRPC network protocol"); @@ -43,7 +45,7 @@ u32 rxrpc_epoch; atomic_t rxrpc_debug_id; /* count of skbs currently in use */ -atomic_t rxrpc_n_skbs; +atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; struct workqueue_struct *rxrpc_workqueue; @@ -104,19 +106,25 @@ static int rxrpc_validate_address(struct rxrpc_sock *rx, case AF_INET: if (srx->transport_len < sizeof(struct sockaddr_in)) return -EINVAL; - _debug("INET: %x @ %pI4", - ntohs(srx->transport.sin.sin_port), - &srx->transport.sin.sin_addr); tail = offsetof(struct sockaddr_rxrpc, transport.sin.__pad); break; +#ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: + if (srx->transport_len < sizeof(struct sockaddr_in6)) + return -EINVAL; + tail = offsetof(struct sockaddr_rxrpc, transport) + + sizeof(struct sockaddr_in6); + break; +#endif + default: return -EAFNOSUPPORT; } if (tail < len) memset((void *)srx + tail, 0, len - tail); + _debug("INET: %pISp", &srx->transport); return 0; } @@ -128,7 +136,8 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) struct sockaddr_rxrpc *srx = (struct sockaddr_rxrpc *)saddr; struct sock *sk = sock->sk; struct rxrpc_local *local; - struct rxrpc_sock *rx = rxrpc_sk(sk), *prx; + struct rxrpc_sock *rx = rxrpc_sk(sk); + u16 service_id = srx->srx_service; int ret; _enter("%p,%p,%d", rx, saddr, len); @@ -152,16 +161,13 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) goto error_unlock; } - if (rx->srx.srx_service) { - write_lock_bh(&local->services_lock); - list_for_each_entry(prx, &local->services, listen_link) { - if (prx->srx.srx_service == rx->srx.srx_service) - goto service_in_use; - } - + if (service_id) { + write_lock(&local->services_lock); + if (rcu_access_pointer(local->service)) + goto service_in_use; rx->local = local; - list_add_tail(&rx->listen_link, &local->services); - write_unlock_bh(&local->services_lock); + rcu_assign_pointer(local->service, rx); + write_unlock(&local->services_lock); rx->sk.sk_state = RXRPC_SERVER_BOUND; } else { @@ -174,7 +180,7 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len) return 0; service_in_use: - write_unlock_bh(&local->services_lock); + write_unlock(&local->services_lock); rxrpc_put_local(local); ret = -EADDRINUSE; error_unlock: @@ -191,7 +197,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; struct rxrpc_sock *rx = rxrpc_sk(sk); - unsigned int max; + unsigned int max, old; int ret; _enter("%p,%d", rx, backlog); @@ -210,9 +216,13 @@ static int rxrpc_listen(struct socket *sock, int backlog) backlog = max; else if (backlog < 0 || backlog > max) break; + old = sk->sk_max_ack_backlog; sk->sk_max_ack_backlog = backlog; - rx->sk.sk_state = RXRPC_SERVER_LISTENING; - ret = 0; + ret = rxrpc_service_prealloc(rx, GFP_KERNEL); + if (ret == 0) + rx->sk.sk_state = RXRPC_SERVER_LISTENING; + else + sk->sk_max_ack_backlog = old; break; default: ret = -EBUSY; @@ -230,6 +240,8 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @srx: The address of the peer to contact * @key: The security context to use (defaults to socket setting) * @user_call_ID: The ID to use + * @gfp: The allocation constraints + * @notify_rx: Where to send notifications instead of socket queue * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -242,7 +254,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, struct sockaddr_rxrpc *srx, struct key *key, unsigned long user_call_ID, - gfp_t gfp) + gfp_t gfp, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -269,6 +282,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.exclusive = false; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, gfp); + if (!IS_ERR(call)) + call->notify_rx = notify_rx; release_sock(&rx->sk); _leave(" = %p", call); @@ -278,40 +293,39 @@ EXPORT_SYMBOL(rxrpc_kernel_begin_call); /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using + * @sock: The socket the call is on * @call: The call to end * * Allow a kernel service to end a call it was using. The call must be * complete before this is called (the call should be aborted if necessary). */ -void rxrpc_kernel_end_call(struct rxrpc_call *call) +void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) { _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); - rxrpc_remove_user_ID(call->socket, call); - rxrpc_put_call(call); + rxrpc_release_call(rxrpc_sk(sock->sk), call); + rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** - * rxrpc_kernel_intercept_rx_messages - Intercept received RxRPC messages + * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on - * @interceptor: The function to pass the messages to + * @notify_new_call: Function to be called when new calls appear + * @discard_new_call: Function to discard preallocated calls * - * Allow a kernel service to intercept messages heading for the Rx queue on an - * RxRPC socket. They get passed to the specified function instead. - * @interceptor should free the socket buffers it is given. @interceptor is - * called with the socket receive queue spinlock held and softirqs disabled - - * this ensures that the messages will be delivered in the right order. + * Allow a kernel service to be given notifications about new calls. */ -void rxrpc_kernel_intercept_rx_messages(struct socket *sock, - rxrpc_interceptor_t interceptor) +void rxrpc_kernel_new_call_notification( + struct socket *sock, + rxrpc_notify_new_call_t notify_new_call, + rxrpc_discard_new_call_t discard_new_call) { struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - _enter(""); - rx->interceptor = interceptor; + rx->notify_new_call = notify_new_call; + rx->discard_new_call = discard_new_call; } - -EXPORT_SYMBOL(rxrpc_kernel_intercept_rx_messages); +EXPORT_SYMBOL(rxrpc_kernel_new_call_notification); /* * connect an RxRPC socket @@ -391,6 +405,23 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) switch (rx->sk.sk_state) { case RXRPC_UNBOUND: + rx->srx.srx_family = AF_RXRPC; + rx->srx.srx_service = 0; + rx->srx.transport_type = SOCK_DGRAM; + rx->srx.transport.family = rx->family; + switch (rx->family) { + case AF_INET: + rx->srx.transport_len = sizeof(struct sockaddr_in); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + rx->srx.transport_len = sizeof(struct sockaddr_in6); + break; +#endif + default: + ret = -EAFNOSUPPORT; + goto error_unlock; + } local = rxrpc_lookup_local(&rx->srx); if (IS_ERR(local)) { ret = PTR_ERR(local); @@ -505,15 +536,16 @@ error: static unsigned int rxrpc_poll(struct file *file, struct socket *sock, poll_table *wait) { - unsigned int mask; struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + unsigned int mask; sock_poll_wait(file, sk_sleep(sk), wait); mask = 0; /* the socket is readable if there are any messages waiting on the Rx * queue */ - if (!skb_queue_empty(&sk->sk_receive_queue)) + if (!list_empty(&rx->recvmsg_q)) mask |= POLLIN | POLLRDNORM; /* the socket is writable if there is space to add new data to the @@ -540,7 +572,8 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -EAFNOSUPPORT; /* we support transport protocol UDP/UDP6 only */ - if (protocol != PF_INET) + if (protocol != PF_INET && + IS_ENABLED(CONFIG_AF_RXRPC_IPV6) && protocol != PF_INET6) return -EPROTONOSUPPORT; if (sock->type != SOCK_DGRAM) @@ -554,6 +587,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return -ENOMEM; sock_init_data(sock, sk); + sock_set_flag(sk, SOCK_RCU_FREE); sk->sk_state = RXRPC_UNBOUND; sk->sk_write_space = rxrpc_write_space; sk->sk_max_ack_backlog = 0; @@ -563,9 +597,11 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, rx->family = protocol; rx->calls = RB_ROOT; - INIT_LIST_HEAD(&rx->listen_link); - INIT_LIST_HEAD(&rx->secureq); - INIT_LIST_HEAD(&rx->acceptq); + spin_lock_init(&rx->incoming_lock); + INIT_LIST_HEAD(&rx->sock_calls); + INIT_LIST_HEAD(&rx->to_be_accepted); + INIT_LIST_HEAD(&rx->recvmsg_q); + rwlock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); @@ -573,6 +609,39 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, return 0; } +/* + * Kill all the calls on a socket and shut it down. + */ +static int rxrpc_shutdown(struct socket *sock, int flags) +{ + struct sock *sk = sock->sk; + struct rxrpc_sock *rx = rxrpc_sk(sk); + int ret = 0; + + _enter("%p,%d", sk, flags); + + if (flags != SHUT_RDWR) + return -EOPNOTSUPP; + if (sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + lock_sock(sk); + + spin_lock_bh(&sk->sk_receive_queue.lock); + if (sk->sk_state < RXRPC_CLOSE) { + sk->sk_state = RXRPC_CLOSE; + sk->sk_shutdown = SHUTDOWN_MASK; + } else { + ret = -ESHUTDOWN; + } + spin_unlock_bh(&sk->sk_receive_queue.lock); + + rxrpc_discard_prealloc(rx); + + release_sock(sk); + return ret; +} + /* * RxRPC socket destructor */ @@ -609,15 +678,14 @@ static int rxrpc_release_sock(struct sock *sk) sk->sk_state = RXRPC_CLOSE; spin_unlock_bh(&sk->sk_receive_queue.lock); - ASSERTCMP(rx->listen_link.next, !=, LIST_POISON1); - - if (!list_empty(&rx->listen_link)) { - write_lock_bh(&rx->local->services_lock); - list_del(&rx->listen_link); - write_unlock_bh(&rx->local->services_lock); + if (rx->local && rx->local->service == rx) { + write_lock(&rx->local->services_lock); + rx->local->service = NULL; + write_unlock(&rx->local->services_lock); } /* try to flush out this socket */ + rxrpc_discard_prealloc(rx); rxrpc_release_calls_on_socket(rx); flush_workqueue(rxrpc_workqueue); rxrpc_purge_queue(&sk->sk_receive_queue); @@ -666,7 +734,7 @@ static const struct proto_ops rxrpc_rpc_ops = { .poll = rxrpc_poll, .ioctl = sock_no_ioctl, .listen = rxrpc_listen, - .shutdown = sock_no_shutdown, + .shutdown = rxrpc_shutdown, .setsockopt = rxrpc_setsockopt, .getsockopt = sock_no_getsockopt, .sendmsg = rxrpc_sendmsg, @@ -697,7 +765,13 @@ static int __init af_rxrpc_init(void) BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > FIELD_SIZEOF(struct sk_buff, cb)); - rxrpc_epoch = get_seconds(); + get_random_bytes(&rxrpc_epoch, sizeof(rxrpc_epoch)); + rxrpc_epoch |= RXRPC_RANDOM_EPOCH; + get_random_bytes(&rxrpc_client_conn_ids.cur, + sizeof(rxrpc_client_conn_ids.cur)); + rxrpc_client_conn_ids.cur &= 0x3fffffff; + if (rxrpc_client_conn_ids.cur == 0) + rxrpc_client_conn_ids.cur = 1; ret = -ENOMEM; rxrpc_call_jar = kmem_cache_create( @@ -788,7 +862,8 @@ static void __exit af_rxrpc_exit(void) proto_unregister(&rxrpc_proto); rxrpc_destroy_all_calls(); rxrpc_destroy_all_connections(); - ASSERTCMP(atomic_read(&rxrpc_n_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_tx_skbs), ==, 0); + ASSERTCMP(atomic_read(&rxrpc_n_rx_skbs), ==, 0); rxrpc_destroy_all_locals(); remove_proc_entry("rxrpc_conns", init_net.proc_net); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index ff83fb1ddd47..d38dffd78085 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -35,10 +35,22 @@ struct rxrpc_crypt { #define rxrpc_queue_delayed_work(WS,D) \ queue_delayed_work(rxrpc_workqueue, (WS), (D)) -#define rxrpc_queue_call(CALL) rxrpc_queue_work(&(CALL)->processor) - struct rxrpc_connection; +/* + * Mark applied to socket buffers. + */ +enum rxrpc_skb_mark { + RXRPC_SKB_MARK_DATA, /* data message */ + RXRPC_SKB_MARK_FINAL_ACK, /* final ACK received message */ + RXRPC_SKB_MARK_BUSY, /* server busy message */ + RXRPC_SKB_MARK_REMOTE_ABORT, /* remote abort message */ + RXRPC_SKB_MARK_LOCAL_ABORT, /* local abort message */ + RXRPC_SKB_MARK_NET_ERROR, /* network error message */ + RXRPC_SKB_MARK_LOCAL_ERROR, /* local error message */ + RXRPC_SKB_MARK_NEW_CALL, /* local error message */ +}; + /* * sk_state for RxRPC sockets */ @@ -51,20 +63,45 @@ enum { RXRPC_CLOSE, /* socket is being closed */ }; +/* + * Service backlog preallocation. + * + * This contains circular buffers of preallocated peers, connections and calls + * for incoming service calls and their head and tail pointers. This allows + * calls to be set up in the data_ready handler, thereby avoiding the need to + * shuffle packets around so much. + */ +struct rxrpc_backlog { + unsigned short peer_backlog_head; + unsigned short peer_backlog_tail; + unsigned short conn_backlog_head; + unsigned short conn_backlog_tail; + unsigned short call_backlog_head; + unsigned short call_backlog_tail; +#define RXRPC_BACKLOG_MAX 32 + struct rxrpc_peer *peer_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_connection *conn_backlog[RXRPC_BACKLOG_MAX]; + struct rxrpc_call *call_backlog[RXRPC_BACKLOG_MAX]; +}; + /* * RxRPC socket definition */ struct rxrpc_sock { /* WARNING: sk has to be the first member */ struct sock sk; - rxrpc_interceptor_t interceptor; /* kernel service Rx interceptor function */ + rxrpc_notify_new_call_t notify_new_call; /* Func to notify of new call */ + rxrpc_discard_new_call_t discard_new_call; /* Func to discard a new call */ struct rxrpc_local *local; /* local endpoint */ - struct list_head listen_link; /* link in the local endpoint's listen list */ - struct list_head secureq; /* calls awaiting connection security clearance */ - struct list_head acceptq; /* calls awaiting acceptance */ + struct rxrpc_backlog *backlog; /* Preallocation for services */ + spinlock_t incoming_lock; /* Incoming call vs service shutdown lock */ + struct list_head sock_calls; /* List of calls owned by this socket */ + struct list_head to_be_accepted; /* calls awaiting acceptance */ + struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ + rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ - struct rb_root calls; /* outstanding calls on this socket */ + struct rb_root calls; /* User ID -> call mapping */ unsigned long flags; #define RXRPC_SOCK_CONNECTED 0 /* connect_srx is set */ rwlock_t call_lock; /* lock for calls */ @@ -103,13 +140,11 @@ struct rxrpc_host_header { * - max 48 bytes (struct sk_buff::cb) */ struct rxrpc_skb_priv { - struct rxrpc_call *call; /* call with which associated */ - unsigned long resend_at; /* time in jiffies at which to resend */ union { - unsigned int offset; /* offset into buffer of next read */ + u8 nr_jumbo; /* Number of jumbo subpackets */ + }; + union { int remain; /* amount of space remaining for next write */ - u32 error; /* network error code */ - bool need_resend; /* T if needs resending */ }; struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */ @@ -117,13 +152,6 @@ struct rxrpc_skb_priv { #define rxrpc_skb(__skb) ((struct rxrpc_skb_priv *) &(__skb)->cb) -enum rxrpc_command { - RXRPC_CMD_SEND_DATA, /* send data message */ - RXRPC_CMD_SEND_ABORT, /* request abort generation */ - RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ - RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ -}; - /* * RxRPC security module interface */ @@ -150,7 +178,12 @@ struct rxrpc_security { void *); /* verify the security on a received packet */ - int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, u32 *); + int (*verify_packet)(struct rxrpc_call *, struct sk_buff *, + unsigned int, unsigned int, rxrpc_seq_t, u16); + + /* Locate the data in a received packet that has been verified. */ + void (*locate_data)(struct rxrpc_call *, struct sk_buff *, + unsigned int *, unsigned int *); /* issue a challenge */ int (*issue_challenge)(struct rxrpc_connection *); @@ -180,9 +213,8 @@ struct rxrpc_local { struct list_head link; struct socket *socket; /* my UDP socket */ struct work_struct processor; - struct list_head services; /* services listening on this endpoint */ + struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ - struct sk_buff_head accept_queue; /* incoming calls awaiting acceptance */ struct sk_buff_head reject_queue; /* packets awaiting rejection */ struct sk_buff_head event_queue; /* endpoint event packets awaiting processing */ struct rb_root client_conns; /* Client connections by socket params */ @@ -220,10 +252,12 @@ struct rxrpc_peer { /* calculated RTT cache */ #define RXRPC_RTT_CACHE_SIZE 32 - suseconds_t rtt; /* current RTT estimate (in uS) */ - unsigned int rtt_point; /* next entry at which to insert */ - unsigned int rtt_usage; /* amount of cache actually used */ - suseconds_t rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* calculated RTT cache */ + ktime_t rtt_last_req; /* Time of last RTT request */ + u64 rtt; /* Current RTT estimate (in nS) */ + u64 rtt_sum; /* Sum of cache contents */ + u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ + u8 rtt_cursor; /* next entry at which to insert */ + u8 rtt_usage; /* amount of cache actually used */ }; /* @@ -255,6 +289,9 @@ enum rxrpc_conn_flag { RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */ RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */ RXRPC_CONN_IN_CLIENT_CONNS, /* Conn is in local->client_conns */ + RXRPC_CONN_EXPOSED, /* Conn has extra ref for exposure */ + RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */ + RXRPC_CONN_COUNTED, /* Counted by rxrpc_nr_client_conns */ }; /* @@ -264,18 +301,30 @@ enum rxrpc_conn_event { RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */ }; +/* + * The connection cache state. + */ +enum rxrpc_conn_cache_state { + RXRPC_CONN_CLIENT_INACTIVE, /* Conn is not yet listed */ + RXRPC_CONN_CLIENT_WAITING, /* Conn is on wait list, waiting for capacity */ + RXRPC_CONN_CLIENT_ACTIVE, /* Conn is on active list, doing calls */ + RXRPC_CONN_CLIENT_CULLED, /* Conn is culled and delisted, doing calls */ + RXRPC_CONN_CLIENT_IDLE, /* Conn is on idle list, doing mostly nothing */ + RXRPC_CONN__NR_CACHE_STATES +}; + /* * The connection protocol state. */ enum rxrpc_conn_proto_state { RXRPC_CONN_UNUSED, /* Connection not yet attempted */ RXRPC_CONN_CLIENT, /* Client connection */ + RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */ RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */ RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */ RXRPC_CONN_SERVICE, /* Service secured connection */ RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */ RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */ - RXRPC_CONN_NETWORK_ERROR, /* Conn terminated by network error */ RXRPC_CONN__NR_STATES }; @@ -288,23 +337,33 @@ struct rxrpc_connection { struct rxrpc_conn_proto proto; struct rxrpc_conn_parameters params; - spinlock_t channel_lock; + atomic_t usage; + struct rcu_head rcu; + struct list_head cache_link; + spinlock_t channel_lock; + unsigned char active_chans; /* Mask of active channels */ +#define RXRPC_ACTIVE_CHANS_MASK ((1 << RXRPC_MAXCALLS) - 1) + struct list_head waiting_calls; /* Calls waiting for channels */ struct rxrpc_channel { struct rxrpc_call __rcu *call; /* Active call */ u32 call_id; /* ID of current call */ u32 call_counter; /* Call ID counter */ u32 last_call; /* ID of last call */ - u32 last_result; /* Result of last call (0/abort) */ + u8 last_type; /* Type of last packet */ + u16 last_service_id; + union { + u32 last_seq; + u32 last_abort; + }; } channels[RXRPC_MAXCALLS]; - wait_queue_head_t channel_wq; /* queue to wait for channel to become available */ - struct rcu_head rcu; struct work_struct processor; /* connection event processor */ union { struct rb_node client_node; /* Node in local->client_conns */ struct rb_node service_node; /* Node in peer->service_conns */ }; + struct list_head proc_link; /* link in procfs list */ struct list_head link; /* link in master connection list */ struct sk_buff_head rx_queue; /* received conn-level packets */ const struct rxrpc_security *security; /* applied security module */ @@ -313,21 +372,18 @@ struct rxrpc_connection { struct rxrpc_crypt csum_iv; /* packet checksum base */ unsigned long flags; unsigned long events; - unsigned long put_time; /* Time at which last put */ + unsigned long idle_timestamp; /* Time at which last became idle */ spinlock_t state_lock; /* state-change lock */ - atomic_t usage; - enum rxrpc_conn_proto_state state : 8; /* current state of connection */ + enum rxrpc_conn_cache_state cache_state; + enum rxrpc_conn_proto_state state; /* current state of connection */ u32 local_abort; /* local abort code */ u32 remote_abort; /* remote abort code */ - int error; /* local error incurred */ int debug_id; /* debug ID for printks */ atomic_t serial; /* packet serial number counter */ - atomic_t hi_serial; /* highest serial number received */ - atomic_t avail_chans; /* number of channels available */ - u8 size_align; /* data size alignment (for security) */ - u8 header_size; /* rxrpc + security header size */ - u8 security_size; /* security header size */ + unsigned int hi_serial; /* highest serial number received */ u32 security_nonce; /* response re-use preventer */ + u8 size_align; /* data size alignment (for security) */ + u8 security_size; /* security header size */ u8 security_ix; /* security type */ u8 out_clientflag; /* RXRPC_CLIENT_INITIATED if we are client */ }; @@ -337,37 +393,23 @@ struct rxrpc_connection { */ enum rxrpc_call_flag { RXRPC_CALL_RELEASED, /* call has been released - no more message to userspace */ - RXRPC_CALL_TERMINAL_MSG, /* call has given the socket its final message */ - RXRPC_CALL_RCVD_LAST, /* all packets received */ - RXRPC_CALL_RUN_RTIMER, /* Tx resend timer started */ - RXRPC_CALL_TX_SOFT_ACK, /* sent some soft ACKs */ - RXRPC_CALL_PROC_BUSY, /* the processor is busy */ - RXRPC_CALL_INIT_ACCEPT, /* acceptance was initiated */ RXRPC_CALL_HAS_USERID, /* has a user ID attached */ - RXRPC_CALL_EXPECT_OOS, /* expect out of sequence packets */ + RXRPC_CALL_IS_SERVICE, /* Call is service call */ + RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ + RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ + RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_PINGING, /* Ping in process */ + RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ }; /* * Events that can be raised on a call. */ enum rxrpc_call_event { - RXRPC_CALL_EV_RCVD_ACKALL, /* ACKALL or reply received */ - RXRPC_CALL_EV_RCVD_BUSY, /* busy packet received */ - RXRPC_CALL_EV_RCVD_ABORT, /* abort packet received */ - RXRPC_CALL_EV_RCVD_ERROR, /* network error received */ - RXRPC_CALL_EV_ACK_FINAL, /* need to generate final ACK (and release call) */ RXRPC_CALL_EV_ACK, /* need to generate ACK */ - RXRPC_CALL_EV_REJECT_BUSY, /* need to generate busy message */ RXRPC_CALL_EV_ABORT, /* need to generate abort */ - RXRPC_CALL_EV_CONN_ABORT, /* local connection abort generated */ - RXRPC_CALL_EV_RESEND_TIMER, /* Tx resend timer expired */ + RXRPC_CALL_EV_TIMER, /* Timer expired */ RXRPC_CALL_EV_RESEND, /* Tx resend required */ - RXRPC_CALL_EV_DRAIN_RX_OOS, /* drain the Rx out of sequence queue */ - RXRPC_CALL_EV_LIFE_TIMER, /* call's lifetimer ran out */ - RXRPC_CALL_EV_ACCEPTED, /* incoming call accepted by userspace app */ - RXRPC_CALL_EV_SECURED, /* incoming call's connection is now secure */ - RXRPC_CALL_EV_POST_ACCEPT, /* need to post an "accept?" message to the app */ - RXRPC_CALL_EV_RELEASE, /* need to release the call's resources */ }; /* @@ -379,20 +421,38 @@ enum rxrpc_call_state { RXRPC_CALL_CLIENT_SEND_REQUEST, /* - client sending request phase */ RXRPC_CALL_CLIENT_AWAIT_REPLY, /* - client awaiting reply */ RXRPC_CALL_CLIENT_RECV_REPLY, /* - client receiving reply phase */ - RXRPC_CALL_CLIENT_FINAL_ACK, /* - client sending final ACK phase */ + RXRPC_CALL_SERVER_PREALLOC, /* - service preallocation */ RXRPC_CALL_SERVER_SECURING, /* - server securing request connection */ RXRPC_CALL_SERVER_ACCEPTING, /* - server accepting request */ RXRPC_CALL_SERVER_RECV_REQUEST, /* - server receiving request */ RXRPC_CALL_SERVER_ACK_REQUEST, /* - server pending ACK of request */ RXRPC_CALL_SERVER_SEND_REPLY, /* - server sending reply */ RXRPC_CALL_SERVER_AWAIT_ACK, /* - server awaiting final ACK */ - RXRPC_CALL_COMPLETE, /* - call completed */ - RXRPC_CALL_SERVER_BUSY, /* - call rejected by busy server */ + RXRPC_CALL_COMPLETE, /* - call complete */ + NR__RXRPC_CALL_STATES +}; + +/* + * Call completion condition (state == RXRPC_CALL_COMPLETE). + */ +enum rxrpc_call_completion { + RXRPC_CALL_SUCCEEDED, /* - Normal termination */ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ + RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - RXRPC_CALL_DEAD, /* - call is dead */ - NR__RXRPC_CALL_STATES + NR__RXRPC_CALL_COMPLETIONS +}; + +/* + * Call Tx congestion management modes. + */ +enum rxrpc_congest_mode { + RXRPC_CALL_SLOW_START, + RXRPC_CALL_CONGEST_AVOIDANCE, + RXRPC_CALL_PACKET_LOSS, + RXRPC_CALL_FAST_RETRANSMIT, + NR__RXRPC_CONGEST_MODES }; /* @@ -402,92 +462,329 @@ enum rxrpc_call_state { struct rxrpc_call { struct rcu_head rcu; struct rxrpc_connection *conn; /* connection carrying call */ - struct rxrpc_sock *socket; /* socket responsible */ - struct timer_list lifetimer; /* lifetime remaining on call */ - struct timer_list deadspan; /* reap timer for re-ACK'ing, etc */ - struct timer_list ack_timer; /* ACK generation timer */ - struct timer_list resend_timer; /* Tx resend timer */ - struct work_struct destroyer; /* call destroyer */ - struct work_struct processor; /* packet processor and ACK generator */ + struct rxrpc_peer *peer; /* Peer record for remote address */ + struct rxrpc_sock __rcu *socket; /* socket responsible */ + ktime_t ack_at; /* When deferred ACK needs to happen */ + ktime_t resend_at; /* When next resend needs to happen */ + ktime_t expire_at; /* When the call times out */ + struct timer_list timer; /* Combined event timer */ + struct work_struct processor; /* Event processor */ + rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */ struct list_head link; /* link in master call list */ + struct list_head chan_wait_link; /* Link in conn->waiting_calls */ struct hlist_node error_link; /* link in error distribution list */ - struct list_head accept_link; /* calls awaiting acceptance */ - struct rb_node sock_node; /* node in socket call tree */ - struct sk_buff_head rx_queue; /* received packets */ - struct sk_buff_head rx_oos_queue; /* packets received out of sequence */ + struct list_head accept_link; /* Link in rx->acceptq */ + struct list_head recvmsg_link; /* Link in rx->recvmsg_q */ + struct list_head sock_link; /* Link in rx->sock_calls */ + struct rb_node sock_node; /* Node in rx->calls */ struct sk_buff *tx_pending; /* Tx socket buffer being filled */ - wait_queue_head_t tx_waitq; /* wait for Tx window space to become available */ + wait_queue_head_t waitq; /* Wait queue for channel or Tx */ __be32 crypto_buf[2]; /* Temporary packet crypto buffer */ unsigned long user_call_ID; /* user-defined call ID */ - unsigned long creation_jif; /* time of call creation */ unsigned long flags; unsigned long events; spinlock_t lock; rwlock_t state_lock; /* lock for state transition */ - atomic_t usage; - atomic_t skb_count; /* Outstanding packets on this call */ - atomic_t sequence; /* Tx data packet sequence counter */ - u32 local_abort; /* local abort code */ - u32 remote_abort; /* remote abort code */ - int error_report; /* Network error (ICMP/local transport) */ + u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ - enum rxrpc_call_state state : 8; /* current state of call */ - int debug_id; /* debug ID for printks */ - u8 channel; /* connection channel occupied by this call */ - - /* transmission-phase ACK management */ - u8 acks_head; /* offset into window of first entry */ - u8 acks_tail; /* offset into window of last entry */ - u8 acks_winsz; /* size of un-ACK'd window */ - u8 acks_unacked; /* lowest unacked packet in last ACK received */ - int acks_latest; /* serial number of latest ACK received */ - rxrpc_seq_t acks_hard; /* highest definitively ACK'd msg seq */ - unsigned long *acks_window; /* sent packet window - * - elements are pointers with LSB set if ACK'd - */ - - /* receive-phase ACK management */ - rxrpc_seq_t rx_data_expect; /* next data seq ID expected to be received */ - rxrpc_seq_t rx_data_post; /* next data seq ID expected to be posted */ - rxrpc_seq_t rx_data_recv; /* last data seq ID encountered by recvmsg */ - rxrpc_seq_t rx_data_eaten; /* last data seq ID consumed by recvmsg */ - rxrpc_seq_t rx_first_oos; /* first packet in rx_oos_queue (or 0) */ - rxrpc_seq_t ackr_win_top; /* top of ACK window (rx_data_eaten is bottom) */ - rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ - u8 ackr_reason; /* reason to ACK */ - rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic_t ackr_not_idle; /* number of packets in Rx queue */ - - /* received packet records, 1 bit per record */ -#define RXRPC_ACKR_WINDOW_ASZ DIV_ROUND_UP(RXRPC_MAXACKS, BITS_PER_LONG) - unsigned long ackr_window[RXRPC_ACKR_WINDOW_ASZ + 1]; - - u8 in_clientflag; /* Copy of conn->in_clientflag */ - struct rxrpc_local *local; /* Local endpoint. */ + enum rxrpc_call_state state; /* current state of call */ + enum rxrpc_call_completion completion; /* Call completion condition */ + atomic_t usage; + u16 service_id; /* service ID */ + u8 security_ix; /* Security type */ u32 call_id; /* call ID on connection */ u32 cid; /* connection ID plus channel index */ - u32 epoch; /* epoch of this connection */ - u16 service_id; /* service ID */ + int debug_id; /* debug ID for printks */ + unsigned short rx_pkt_offset; /* Current recvmsg packet offset */ + unsigned short rx_pkt_len; /* Current recvmsg packet len */ + + /* Rx/Tx circular buffer, depending on phase. + * + * In the Rx phase, packets are annotated with 0 or the number of the + * segment of a jumbo packet each buffer refers to. There can be up to + * 47 segments in a maximum-size UDP packet. + * + * In the Tx phase, packets are annotated with which buffers have been + * acked. + */ +#define RXRPC_RXTX_BUFF_SIZE 64 +#define RXRPC_RXTX_BUFF_MASK (RXRPC_RXTX_BUFF_SIZE - 1) +#define RXRPC_INIT_RX_WINDOW_SIZE 32 + struct sk_buff **rxtx_buffer; + u8 *rxtx_annotations; +#define RXRPC_TX_ANNO_ACK 0 +#define RXRPC_TX_ANNO_UNACK 1 +#define RXRPC_TX_ANNO_NAK 2 +#define RXRPC_TX_ANNO_RETRANS 3 +#define RXRPC_TX_ANNO_MASK 0x03 +#define RXRPC_TX_ANNO_LAST 0x04 +#define RXRPC_TX_ANNO_RESENT 0x08 + +#define RXRPC_RX_ANNO_JUMBO 0x3f /* Jumbo subpacket number + 1 if not zero */ +#define RXRPC_RX_ANNO_JLAST 0x40 /* Set if last element of a jumbo packet */ +#define RXRPC_RX_ANNO_VERIFIED 0x80 /* Set if verified and decrypted */ + rxrpc_seq_t tx_hard_ack; /* Dead slot in buffer; the first transmitted but + * not hard-ACK'd packet follows this. + */ + rxrpc_seq_t tx_top; /* Highest Tx slot allocated. */ + + /* TCP-style slow-start congestion control [RFC5681]. Since the SMSS + * is fixed, we keep these numbers in terms of segments (ie. DATA + * packets) rather than bytes. + */ +#define RXRPC_TX_SMSS RXRPC_JUMBO_DATALEN + u8 cong_cwnd; /* Congestion window size */ + u8 cong_extra; /* Extra to send for congestion management */ + u8 cong_ssthresh; /* Slow-start threshold */ + enum rxrpc_congest_mode cong_mode:8; /* Congestion management mode */ + u8 cong_dup_acks; /* Count of ACKs showing missing packets */ + u8 cong_cumul_acks; /* Cumulative ACK count */ + ktime_t cong_tstamp; /* Last time cwnd was changed */ + + rxrpc_seq_t rx_hard_ack; /* Dead slot in buffer; the first received but not + * consumed packet follows this. + */ + rxrpc_seq_t rx_top; /* Highest Rx slot allocated. */ + rxrpc_seq_t rx_expect_next; /* Expected next packet sequence number */ + u8 rx_winsize; /* Size of Rx window */ + u8 tx_winsize; /* Maximum size of Tx window */ + bool tx_phase; /* T if transmission phase, F if receive phase */ + u8 nr_jumbo_bad; /* Number of jumbo dups/exceeds-windows */ + + /* receive-phase ACK management */ + u8 ackr_reason; /* reason to ACK */ + u16 ackr_skew; /* skew on packet being ACK'd */ + rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ + rxrpc_seq_t ackr_prev_seq; /* previous sequence number received */ + rxrpc_seq_t ackr_consumed; /* Highest packet shown consumed */ + rxrpc_seq_t ackr_seen; /* Highest packet shown seen */ + rxrpc_serial_t ackr_ping; /* Last ping sent */ + ktime_t ackr_ping_time; /* Time last ping sent */ + + /* transmission-phase ACK management */ + ktime_t acks_latest_ts; /* Timestamp of latest ACK received */ + rxrpc_serial_t acks_latest; /* serial number of latest ACK received */ + rxrpc_seq_t acks_lowest_nak; /* Lowest NACK in the buffer (or ==tx_hard_ack) */ }; /* - * locally abort an RxRPC call + * Summary of a new ACK and the changes it made to the Tx buffer packet states. */ -static inline void rxrpc_abort_call(struct rxrpc_call *call, u32 abort_code) -{ - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->local_abort = abort_code; - call->state = RXRPC_CALL_LOCALLY_ABORTED; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - } - write_unlock_bh(&call->state_lock); -} +struct rxrpc_ack_summary { + u8 ack_reason; + u8 nr_acks; /* Number of ACKs in packet */ + u8 nr_nacks; /* Number of NACKs in packet */ + u8 nr_new_acks; /* Number of new ACKs in packet */ + u8 nr_new_nacks; /* Number of new NACKs in packet */ + u8 nr_rot_new_acks; /* Number of rotated new ACKs */ + bool new_low_nack; /* T if new low NACK found */ + bool retrans_timeo; /* T if reTx due to timeout happened */ + u8 flight_size; /* Number of unreceived transmissions */ + /* Place to stash values for tracing */ + enum rxrpc_congest_mode mode:8; + u8 cwnd; + u8 ssthresh; + u8 dup_acks; + u8 cumulative_acks; +}; + +enum rxrpc_skb_trace { + rxrpc_skb_rx_cleaned, + rxrpc_skb_rx_freed, + rxrpc_skb_rx_got, + rxrpc_skb_rx_lost, + rxrpc_skb_rx_received, + rxrpc_skb_rx_rotated, + rxrpc_skb_rx_purged, + rxrpc_skb_rx_seen, + rxrpc_skb_tx_cleaned, + rxrpc_skb_tx_freed, + rxrpc_skb_tx_got, + rxrpc_skb_tx_new, + rxrpc_skb_tx_rotated, + rxrpc_skb_tx_seen, + rxrpc_skb__nr_trace +}; + +extern const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7]; + +enum rxrpc_conn_trace { + rxrpc_conn_new_client, + rxrpc_conn_new_service, + rxrpc_conn_queued, + rxrpc_conn_seen, + rxrpc_conn_got, + rxrpc_conn_put_client, + rxrpc_conn_put_service, + rxrpc_conn__nr_trace +}; + +extern const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4]; + +enum rxrpc_client_trace { + rxrpc_client_activate_chans, + rxrpc_client_alloc, + rxrpc_client_chan_activate, + rxrpc_client_chan_disconnect, + rxrpc_client_chan_pass, + rxrpc_client_chan_unstarted, + rxrpc_client_cleanup, + rxrpc_client_count, + rxrpc_client_discard, + rxrpc_client_duplicate, + rxrpc_client_exposed, + rxrpc_client_replace, + rxrpc_client_to_active, + rxrpc_client_to_culled, + rxrpc_client_to_idle, + rxrpc_client_to_inactive, + rxrpc_client_to_waiting, + rxrpc_client_uncount, + rxrpc_client__nr_trace +}; + +extern const char rxrpc_client_traces[rxrpc_client__nr_trace][7]; +extern const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5]; + +enum rxrpc_call_trace { + rxrpc_call_new_client, + rxrpc_call_new_service, + rxrpc_call_queued, + rxrpc_call_queued_ref, + rxrpc_call_seen, + rxrpc_call_connected, + rxrpc_call_release, + rxrpc_call_got, + rxrpc_call_got_userid, + rxrpc_call_got_kernel, + rxrpc_call_put, + rxrpc_call_put_userid, + rxrpc_call_put_kernel, + rxrpc_call_put_noqueue, + rxrpc_call_error, + rxrpc_call__nr_trace +}; + +extern const char rxrpc_call_traces[rxrpc_call__nr_trace][4]; + +enum rxrpc_transmit_trace { + rxrpc_transmit_wait, + rxrpc_transmit_queue, + rxrpc_transmit_queue_last, + rxrpc_transmit_rotate, + rxrpc_transmit_rotate_last, + rxrpc_transmit_await_reply, + rxrpc_transmit_end, + rxrpc_transmit__nr_trace +}; + +extern const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4]; + +enum rxrpc_receive_trace { + rxrpc_receive_incoming, + rxrpc_receive_queue, + rxrpc_receive_queue_last, + rxrpc_receive_front, + rxrpc_receive_rotate, + rxrpc_receive_end, + rxrpc_receive__nr_trace +}; + +extern const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4]; + +enum rxrpc_recvmsg_trace { + rxrpc_recvmsg_enter, + rxrpc_recvmsg_wait, + rxrpc_recvmsg_dequeue, + rxrpc_recvmsg_hole, + rxrpc_recvmsg_next, + rxrpc_recvmsg_cont, + rxrpc_recvmsg_full, + rxrpc_recvmsg_data_return, + rxrpc_recvmsg_terminal, + rxrpc_recvmsg_to_be_accepted, + rxrpc_recvmsg_return, + rxrpc_recvmsg__nr_trace +}; + +extern const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5]; + +enum rxrpc_rtt_tx_trace { + rxrpc_rtt_tx_ping, + rxrpc_rtt_tx_data, + rxrpc_rtt_tx__nr_trace +}; + +extern const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5]; + +enum rxrpc_rtt_rx_trace { + rxrpc_rtt_rx_ping_response, + rxrpc_rtt_rx_requested_ack, + rxrpc_rtt_rx__nr_trace +}; + +extern const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5]; + +enum rxrpc_timer_trace { + rxrpc_timer_begin, + rxrpc_timer_init_for_reply, + rxrpc_timer_expired, + rxrpc_timer_set_for_ack, + rxrpc_timer_set_for_resend, + rxrpc_timer_set_for_send, + rxrpc_timer__nr_trace +}; + +extern const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8]; + +enum rxrpc_propose_ack_trace { + rxrpc_propose_ack_client_tx_end, + rxrpc_propose_ack_input_data, + rxrpc_propose_ack_ping_for_lost_ack, + rxrpc_propose_ack_ping_for_lost_reply, + rxrpc_propose_ack_ping_for_params, + rxrpc_propose_ack_respond_to_ack, + rxrpc_propose_ack_respond_to_ping, + rxrpc_propose_ack_retry_tx, + rxrpc_propose_ack_rotate_rx, + rxrpc_propose_ack_terminal_ack, + rxrpc_propose_ack__nr_trace +}; + +enum rxrpc_propose_ack_outcome { + rxrpc_propose_ack_use, + rxrpc_propose_ack_update, + rxrpc_propose_ack_subsume, + rxrpc_propose_ack__nr_outcomes +}; + +extern const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8]; +extern const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes]; + +enum rxrpc_congest_change { + rxrpc_cong_begin_retransmission, + rxrpc_cong_cleared_nacks, + rxrpc_cong_new_low_nack, + rxrpc_cong_no_change, + rxrpc_cong_progress, + rxrpc_cong_retransmit_again, + rxrpc_cong_rtt_window_end, + rxrpc_cong_saw_nack, + rxrpc_congest__nr_change +}; + +extern const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10]; +extern const char rxrpc_congest_changes[rxrpc_congest__nr_change][9]; + +extern const char *const rxrpc_pkts[]; +extern const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4]; + +#include /* * af_rxrpc.c */ -extern atomic_t rxrpc_n_skbs; +extern atomic_t rxrpc_n_tx_skbs, rxrpc_n_rx_skbs; extern u32 rxrpc_epoch; extern atomic_t rxrpc_debug_id; extern struct workqueue_struct *rxrpc_workqueue; @@ -495,70 +792,178 @@ extern struct workqueue_struct *rxrpc_workqueue; /* * call_accept.c */ +int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t); +void rxrpc_discard_prealloc(struct rxrpc_sock *); +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *, + struct rxrpc_connection *, + struct sk_buff *); void rxrpc_accept_incoming_calls(struct rxrpc_local *); -struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *, unsigned long, + rxrpc_notify_rx_t); int rxrpc_reject_call(struct rxrpc_sock *); /* * call_event.c */ -void __rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); -void rxrpc_propose_ACK(struct rxrpc_call *, u8, u32, bool); +void rxrpc_set_timer(struct rxrpc_call *, enum rxrpc_timer_trace, ktime_t); +void rxrpc_propose_ACK(struct rxrpc_call *, u8, u16, u32, bool, bool, + enum rxrpc_propose_ack_trace); void rxrpc_process_call(struct work_struct *); /* * call_object.c */ +extern const char *const rxrpc_call_states[]; +extern const char *const rxrpc_call_completions[]; extern unsigned int rxrpc_max_call_lifetime; -extern unsigned int rxrpc_dead_call_expiry; extern struct kmem_cache *rxrpc_call_jar; extern struct list_head rxrpc_calls; extern rwlock_t rxrpc_call_lock; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); +struct rxrpc_call *rxrpc_alloc_call(gfp_t); struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, gfp_t); -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *, - struct rxrpc_connection *, - struct sk_buff *); -void rxrpc_release_call(struct rxrpc_call *); +void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, + struct sk_buff *); +void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); -void __rxrpc_put_call(struct rxrpc_call *); +bool __rxrpc_queue_call(struct rxrpc_call *); +bool rxrpc_queue_call(struct rxrpc_call *); +void rxrpc_see_call(struct rxrpc_call *); +void rxrpc_get_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_put_call(struct rxrpc_call *, enum rxrpc_call_trace); +void rxrpc_cleanup_call(struct rxrpc_call *); void __exit rxrpc_destroy_all_calls(void); +static inline bool rxrpc_is_service_call(const struct rxrpc_call *call) +{ + return test_bit(RXRPC_CALL_IS_SERVICE, &call->flags); +} + +static inline bool rxrpc_is_client_call(const struct rxrpc_call *call) +{ + return !rxrpc_is_service_call(call); +} + +/* + * Transition a call to the complete state. + */ +static inline bool __rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + if (call->state < RXRPC_CALL_COMPLETE) { + call->abort_code = abort_code; + call->error = error; + call->completion = compl, + call->state = RXRPC_CALL_COMPLETE; + wake_up(&call->waitq); + return true; + } + return false; +} + +static inline bool rxrpc_set_call_completion(struct rxrpc_call *call, + enum rxrpc_call_completion compl, + u32 abort_code, + int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_set_call_completion(call, compl, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Record that a call successfully completed. + */ +static inline bool __rxrpc_call_completed(struct rxrpc_call *call) +{ + return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0); +} + +static inline bool rxrpc_call_completed(struct rxrpc_call *call) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_call_completed(call); + write_unlock_bh(&call->state_lock); + return ret; +} + +/* + * Record that a call is locally aborted. + */ +static inline bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, + u32 abort_code, int error) +{ + trace_rxrpc_abort(why, call->cid, call->call_id, seq, + abort_code, error); + return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED, + abort_code, error); +} + +static inline bool rxrpc_abort_call(const char *why, struct rxrpc_call *call, + rxrpc_seq_t seq, u32 abort_code, int error) +{ + bool ret; + + write_lock_bh(&call->state_lock); + ret = __rxrpc_abort_call(why, call, seq, abort_code, error); + write_unlock_bh(&call->state_lock); + return ret; +} + /* * conn_client.c */ +extern unsigned int rxrpc_max_client_connections; +extern unsigned int rxrpc_reap_client_connections; +extern unsigned int rxrpc_conn_idle_client_expiry; +extern unsigned int rxrpc_conn_idle_client_fast_expiry; extern struct idr rxrpc_client_conn_ids; void rxrpc_destroy_client_conn_ids(void); int rxrpc_connect_call(struct rxrpc_call *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, gfp_t); -void rxrpc_unpublish_client_conn(struct rxrpc_connection *); +void rxrpc_expose_client_call(struct rxrpc_call *); +void rxrpc_disconnect_client_call(struct rxrpc_call *); +void rxrpc_put_client_conn(struct rxrpc_connection *); +void __exit rxrpc_destroy_all_client_connections(void); /* * conn_event.c */ void rxrpc_process_connection(struct work_struct *); -void rxrpc_reject_packet(struct rxrpc_local *, struct sk_buff *); -void rxrpc_reject_packets(struct rxrpc_local *); /* * conn_object.c */ extern unsigned int rxrpc_connection_expiry; extern struct list_head rxrpc_connections; +extern struct list_head rxrpc_connection_proc_list; extern rwlock_t rxrpc_connection_lock; int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, struct sk_buff *); -void __rxrpc_disconnect_call(struct rxrpc_call *); +void __rxrpc_disconnect_call(struct rxrpc_connection *, struct rxrpc_call *); void rxrpc_disconnect_call(struct rxrpc_call *); -void rxrpc_put_connection(struct rxrpc_connection *); +void rxrpc_kill_connection(struct rxrpc_connection *); +bool rxrpc_queue_conn(struct rxrpc_connection *); +void rxrpc_see_connection(struct rxrpc_connection *); +void rxrpc_get_connection(struct rxrpc_connection *); +struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *); +void rxrpc_put_service_conn(struct rxrpc_connection *); void __exit rxrpc_destroy_all_connections(void); static inline bool rxrpc_conn_is_client(const struct rxrpc_connection *conn) @@ -571,24 +976,15 @@ static inline bool rxrpc_conn_is_service(const struct rxrpc_connection *conn) return !rxrpc_conn_is_client(conn); } -static inline void rxrpc_get_connection(struct rxrpc_connection *conn) +static inline void rxrpc_put_connection(struct rxrpc_connection *conn) { - atomic_inc(&conn->usage); -} + if (!conn) + return; -static inline -struct rxrpc_connection *rxrpc_get_connection_maybe(struct rxrpc_connection *conn) -{ - return atomic_inc_not_zero(&conn->usage) ? conn : NULL; -} - -static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) -{ - if (!rxrpc_get_connection_maybe(conn)) - return false; - if (!rxrpc_queue_work(&conn->processor)) - rxrpc_put_connection(conn); - return true; + if (rxrpc_conn_is_client(conn)) + rxrpc_put_client_conn(conn); + else + rxrpc_put_service_conn(conn); } /* @@ -596,17 +992,14 @@ static inline bool rxrpc_queue_conn(struct rxrpc_connection *conn) */ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *, struct sk_buff *); -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *, - struct sockaddr_rxrpc *, - struct sk_buff *); +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t); +void rxrpc_new_incoming_connection(struct rxrpc_connection *, struct sk_buff *); void rxrpc_unpublish_service_conn(struct rxrpc_connection *); /* * input.c */ void rxrpc_data_ready(struct sock *); -int rxrpc_queue_rcv_skb(struct rxrpc_call *, struct sk_buff *, bool, bool); -void rxrpc_fast_process_packet(struct rxrpc_call *, struct sk_buff *); /* * insecure.c @@ -668,25 +1061,24 @@ extern unsigned int rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; +extern unsigned int rxrpc_resend_timeout; -extern const char *const rxrpc_pkts[]; extern const s8 rxrpc_ack_priority[]; -extern const char *rxrpc_acks(u8 reason); - /* * output.c */ -extern unsigned int rxrpc_resend_timeout; - -int rxrpc_send_data_packet(struct rxrpc_connection *, struct sk_buff *); -int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); +int rxrpc_send_call_packet(struct rxrpc_call *, u8); +int rxrpc_send_data_packet(struct rxrpc_call *, struct sk_buff *, bool); +void rxrpc_reject_packets(struct rxrpc_local *); /* * peer_event.c */ void rxrpc_error_report(struct sock *); void rxrpc_peer_error_distributor(struct work_struct *); +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); /* * peer_object.c @@ -696,10 +1088,13 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *, struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *, struct sockaddr_rxrpc *, gfp_t); struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t); +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *, + struct rxrpc_peer *); -static inline void rxrpc_get_peer(struct rxrpc_peer *peer) +static inline struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *peer) { atomic_inc(&peer->usage); + return peer; } static inline @@ -718,14 +1113,13 @@ static inline void rxrpc_put_peer(struct rxrpc_peer *peer) /* * proc.c */ -extern const char *const rxrpc_call_states[]; extern const struct file_operations rxrpc_call_seq_fops; extern const struct file_operations rxrpc_connection_seq_fops; /* * recvmsg.c */ -void rxrpc_remove_user_ID(struct rxrpc_sock *, struct rxrpc_call *); +void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* @@ -743,10 +1137,22 @@ void rxrpc_exit_security(void); int rxrpc_init_client_conn_security(struct rxrpc_connection *); int rxrpc_init_server_conn_security(struct rxrpc_connection *); +/* + * sendmsg.c + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t); + /* * skbuff.c */ +void rxrpc_kernel_data_consumed(struct rxrpc_call *, struct sk_buff *); void rxrpc_packet_destructor(struct sk_buff *); +void rxrpc_new_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_see_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_get_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_free_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_lose_skb(struct sk_buff *, enum rxrpc_skb_trace); +void rxrpc_purge_queue(struct sk_buff_head *); /* * sysctl.c @@ -764,6 +1170,23 @@ static inline void rxrpc_sysctl_exit(void) {} */ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +static inline bool before(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) < 0; +} +static inline bool before_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) <= 0; +} +static inline bool after(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) > 0; +} +static inline bool after_eq(u32 seq1, u32 seq2) +{ + return (s32)(seq1 - seq2) >= 0; +} + /* * debug tracing */ @@ -846,11 +1269,12 @@ do { \ #define ASSERTCMP(X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely(!(_x OP _y))) { \ - pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -865,11 +1289,12 @@ do { \ #define ASSERTIFCMP(C, X, OP, Y) \ do { \ - unsigned long _x = (unsigned long)(X); \ - unsigned long _y = (unsigned long)(Y); \ + __typeof__(X) _x = (X); \ + __typeof__(Y) _y = (__typeof__(X))(Y); \ if (unlikely((C) && !(_x OP _y))) { \ pr_err("Assertion failed - %lu(0x%lx) %s %lu(0x%lx) is false\n", \ - _x, _x, #OP, _y, _y); \ + (unsigned long)_x, (unsigned long)_x, #OP, \ + (unsigned long)_y, (unsigned long)_y); \ BUG(); \ } \ } while (0) @@ -893,54 +1318,3 @@ do { \ } while (0) #endif /* __KDEBUGALL */ - -/* - * socket buffer accounting / leak finding - */ -static inline void __rxrpc_new_skb(struct sk_buff *skb, const char *fn) -{ - //_net("new skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_inc(&rxrpc_n_skbs); -} - -#define rxrpc_new_skb(skb) __rxrpc_new_skb((skb), __func__) - -static inline void __rxrpc_kill_skb(struct sk_buff *skb, const char *fn) -{ - //_net("kill skb %p %s [%d]", skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); -} - -#define rxrpc_kill_skb(skb) __rxrpc_kill_skb((skb), __func__) - -static inline void __rxrpc_free_skb(struct sk_buff *skb, const char *fn) -{ - if (skb) { - CHECK_SLAB_OKAY(&skb->users); - //_net("free skb %p %s [%d]", - // skb, fn, atomic_read(&rxrpc_n_skbs)); - //atomic_dec(&rxrpc_n_skbs); - kfree_skb(skb); - } -} - -#define rxrpc_free_skb(skb) __rxrpc_free_skb((skb), __func__) - -static inline void rxrpc_purge_queue(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue((list))) != NULL) - rxrpc_free_skb(skb); -} - -#define rxrpc_get_call(CALL) \ -do { \ - CHECK_SLAB_OKAY(&(CALL)->usage); \ - if (atomic_inc_return(&(CALL)->usage) == 1) \ - BUG(); \ -} while (0) - -#define rxrpc_put_call(CALL) \ -do { \ - __rxrpc_put_call(CALL); \ -} while (0) diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 9bae21e66d65..3cac231d8405 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -20,265 +20,409 @@ #include #include #include +#include #include #include #include #include "ar-internal.h" /* - * generate a connection-level abort + * Preallocate a single service call, connection and peer and, if possible, + * give them a user ID and attach the user's side of the ID to them. */ -static int rxrpc_busy(struct rxrpc_local *local, struct sockaddr_rxrpc *srx, - struct rxrpc_wire_header *whdr) +static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx, + struct rxrpc_backlog *b, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) { - struct msghdr msg; - struct kvec iov[1]; - size_t len; - int ret; + const void *here = __builtin_return_address(0); + struct rxrpc_call *call; + int max, tmp; + unsigned int size = RXRPC_BACKLOG_MAX; + unsigned int head, tail, call_head, call_tail; - _enter("%d,,", local->debug_id); + max = rx->sk.sk_max_ack_backlog; + tmp = rx->sk.sk_ack_backlog; + if (tmp >= max) { + _leave(" = -ENOBUFS [full %u]", max); + return -ENOBUFS; + } + max -= tmp; - whdr->type = RXRPC_PACKET_TYPE_BUSY; - whdr->serial = htonl(1); + /* We don't need more conns and peers than we have calls, but on the + * other hand, we shouldn't ever use more peers than conns or conns + * than calls. + */ + call_head = b->call_backlog_head; + call_tail = READ_ONCE(b->call_backlog_tail); + tmp = CIRC_CNT(call_head, call_tail, size); + if (tmp >= max) { + _leave(" = -ENOBUFS [enough %u]", tmp); + return -ENOBUFS; + } + max = tmp + 1; - msg.msg_name = &srx->transport.sin; - msg.msg_namelen = sizeof(srx->transport.sin); - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - iov[0].iov_base = whdr; - iov[0].iov_len = sizeof(*whdr); - - len = iov[0].iov_len; - - _proto("Tx BUSY %%1"); - - ret = kernel_sendmsg(local->socket, &msg, iov, 1, len); - if (ret < 0) { - _leave(" = -EAGAIN [sendmsg failed: %d]", ret); - return -EAGAIN; + head = b->peer_backlog_head; + tail = READ_ONCE(b->peer_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_peer *peer = rxrpc_alloc_peer(rx->local, gfp); + if (!peer) + return -ENOMEM; + b->peer_backlog[head] = peer; + smp_store_release(&b->peer_backlog_head, + (head + 1) & (size - 1)); } - _leave(" = 0"); + head = b->conn_backlog_head; + tail = READ_ONCE(b->conn_backlog_tail); + if (CIRC_CNT(head, tail, size) < max) { + struct rxrpc_connection *conn; + + conn = rxrpc_prealloc_service_connection(gfp); + if (!conn) + return -ENOMEM; + b->conn_backlog[head] = conn; + smp_store_release(&b->conn_backlog_head, + (head + 1) & (size - 1)); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), here); + } + + /* Now it gets complicated, because calls get registered with the + * socket here, particularly if a user ID is preassigned by the user. + */ + call = rxrpc_alloc_call(gfp); + if (!call) + return -ENOMEM; + call->flags |= (1 << RXRPC_CALL_IS_SERVICE); + call->state = RXRPC_CALL_SERVER_PREALLOC; + + trace_rxrpc_call(call, rxrpc_call_new_service, + atomic_read(&call->usage), + here, (const void *)user_call_ID); + + write_lock(&rx->call_lock); + if (user_attach_call) { + struct rxrpc_call *xcall; + struct rb_node *parent, **pp; + + /* Check the user ID isn't already in use */ + pp = &rx->calls.rb_node; + parent = NULL; + while (*pp) { + parent = *pp; + xcall = rb_entry(parent, struct rxrpc_call, sock_node); + if (user_call_ID < call->user_call_ID) + pp = &(*pp)->rb_left; + else if (user_call_ID > call->user_call_ID) + pp = &(*pp)->rb_right; + else + goto id_in_use; + } + + call->user_call_ID = user_call_ID; + call->notify_rx = notify_rx; + rxrpc_get_call(call, rxrpc_call_got_kernel); + user_attach_call(call, user_call_ID); + rxrpc_get_call(call, rxrpc_call_got_userid); + rb_link_node(&call->sock_node, parent, pp); + rb_insert_color(&call->sock_node, &rx->calls); + set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + } + + list_add(&call->sock_link, &rx->sock_calls); + + write_unlock(&rx->call_lock); + + write_lock(&rxrpc_call_lock); + list_add_tail(&call->link, &rxrpc_calls); + write_unlock(&rxrpc_call_lock); + + b->call_backlog[call_head] = call; + smp_store_release(&b->call_backlog_head, (call_head + 1) & (size - 1)); + _leave(" = 0 [%d -> %lx]", call->debug_id, user_call_ID); + return 0; + +id_in_use: + write_unlock(&rx->call_lock); + rxrpc_cleanup_call(call); + _leave(" = -EBADSLT"); + return -EBADSLT; +} + +/* + * Preallocate sufficient service connections, calls and peers to cover the + * entire backlog of a socket. When a new call comes in, if we don't have + * sufficient of each available, the call gets rejected as busy or ignored. + * + * The backlog is replenished when a connection is accepted or rejected. + */ +int rxrpc_service_prealloc(struct rxrpc_sock *rx, gfp_t gfp) +{ + struct rxrpc_backlog *b = rx->backlog; + + if (!b) { + b = kzalloc(sizeof(struct rxrpc_backlog), gfp); + if (!b) + return -ENOMEM; + rx->backlog = b; + } + + if (rx->discard_new_call) + return 0; + + while (rxrpc_service_prealloc_one(rx, b, NULL, NULL, 0, gfp) == 0) + ; + return 0; } /* - * accept an incoming call that needs peer, transport and/or connection setting - * up + * Discard the preallocation on a service. */ -static int rxrpc_accept_incoming_call(struct rxrpc_local *local, - struct rxrpc_sock *rx, - struct sk_buff *skb, - struct sockaddr_rxrpc *srx) +void rxrpc_discard_prealloc(struct rxrpc_sock *rx) { - struct rxrpc_connection *conn; - struct rxrpc_skb_priv *sp, *nsp; + struct rxrpc_backlog *b = rx->backlog; + unsigned int size = RXRPC_BACKLOG_MAX, head, tail; + + if (!b) + return; + rx->backlog = NULL; + + /* Make sure that there aren't any incoming calls in progress before we + * clear the preallocation buffers. + */ + spin_lock_bh(&rx->incoming_lock); + spin_unlock_bh(&rx->incoming_lock); + + head = b->peer_backlog_head; + tail = b->peer_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_peer *peer = b->peer_backlog[tail]; + kfree(peer); + tail = (tail + 1) & (size - 1); + } + + head = b->conn_backlog_head; + tail = b->conn_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_connection *conn = b->conn_backlog[tail]; + write_lock(&rxrpc_connection_lock); + list_del(&conn->link); + list_del(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); + kfree(conn); + tail = (tail + 1) & (size - 1); + } + + head = b->call_backlog_head; + tail = b->call_backlog_tail; + while (CIRC_CNT(head, tail, size) > 0) { + struct rxrpc_call *call = b->call_backlog[tail]; + if (rx->discard_new_call) { + _debug("discard %lx", call->user_call_ID); + rx->discard_new_call(call, call->user_call_ID); + rxrpc_put_call(call, rxrpc_call_put_kernel); + } + rxrpc_call_completed(call); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + tail = (tail + 1) & (size - 1); + } + + kfree(b); +} + +/* + * Allocate a new incoming call from the prealloc pool, along with a connection + * and a peer as necessary. + */ +static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_backlog *b = rx->backlog; + struct rxrpc_peer *peer, *xpeer; struct rxrpc_call *call; - struct sk_buff *notification; - int ret; + unsigned short call_head, conn_head, peer_head; + unsigned short call_tail, conn_tail, peer_tail; + unsigned short call_count, conn_count; + + /* #calls >= #conns >= #peers must hold true. */ + call_head = smp_load_acquire(&b->call_backlog_head); + call_tail = b->call_backlog_tail; + call_count = CIRC_CNT(call_head, call_tail, RXRPC_BACKLOG_MAX); + conn_head = smp_load_acquire(&b->conn_backlog_head); + conn_tail = b->conn_backlog_tail; + conn_count = CIRC_CNT(conn_head, conn_tail, RXRPC_BACKLOG_MAX); + ASSERTCMP(conn_count, >=, call_count); + peer_head = smp_load_acquire(&b->peer_backlog_head); + peer_tail = b->peer_backlog_tail; + ASSERTCMP(CIRC_CNT(peer_head, peer_tail, RXRPC_BACKLOG_MAX), >=, + conn_count); + + if (call_count == 0) + return NULL; + + if (!conn) { + /* No connection. We're going to need a peer to start off + * with. If one doesn't yet exist, use a spare from the + * preallocation set. We dump the address into the spare in + * anticipation - and to save on stack space. + */ + xpeer = b->peer_backlog[peer_tail]; + if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + return NULL; + + peer = rxrpc_lookup_incoming_peer(local, xpeer); + if (peer == xpeer) { + b->peer_backlog[peer_tail] = NULL; + smp_store_release(&b->peer_backlog_tail, + (peer_tail + 1) & + (RXRPC_BACKLOG_MAX - 1)); + } + + /* Now allocate and set up the connection */ + conn = b->conn_backlog[conn_tail]; + b->conn_backlog[conn_tail] = NULL; + smp_store_release(&b->conn_backlog_tail, + (conn_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + rxrpc_get_local(local); + conn->params.local = local; + conn->params.peer = peer; + rxrpc_see_connection(conn); + rxrpc_new_incoming_connection(conn, skb); + } else { + rxrpc_get_connection(conn); + } + + /* And now we can allocate and set up a new call */ + call = b->call_backlog[call_tail]; + b->call_backlog[call_tail] = NULL; + smp_store_release(&b->call_backlog_tail, + (call_tail + 1) & (RXRPC_BACKLOG_MAX - 1)); + + rxrpc_see_call(call); + call->conn = conn; + call->peer = rxrpc_get_peer(conn->params.peer); + return call; +} + +/* + * Set up a new incoming call. Called in BH context with the RCU read lock + * held. + * + * If this is for a kernel service, when we allocate the call, it will have + * three refs on it: (1) the kernel service, (2) the user_call_ID tree, (3) the + * retainer ref obtained from the backlog buffer. Prealloc calls for userspace + * services only have the ref from the backlog buffer. We want to pass this + * ref to non-BH context to dispose of. + * + * If we want to report an error, we mark the skb with the packet type and + * abort code and return NULL. + */ +struct rxrpc_call *rxrpc_new_incoming_call(struct rxrpc_local *local, + struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_sock *rx; + struct rxrpc_call *call; + u16 service_id = sp->hdr.serviceId; _enter(""); - sp = rxrpc_skb(skb); + /* Get the socket providing the service */ + rx = rcu_dereference(local->service); + if (service_id == rx->srx.srx_service) + goto found_service; - /* get a notification message to send to the server app */ - notification = alloc_skb(0, GFP_NOFS); - if (!notification) { - _debug("no memory"); - ret = -ENOMEM; - goto error_nofree; - } - rxrpc_new_skb(notification); - notification->mark = RXRPC_SKB_MARK_NEW_CALL; - - conn = rxrpc_incoming_connection(local, srx, skb); - if (IS_ERR(conn)) { - _debug("no conn"); - ret = PTR_ERR(conn); - goto error; - } - - call = rxrpc_incoming_call(rx, conn, skb); - rxrpc_put_connection(conn); - if (IS_ERR(call)) { - _debug("no call"); - ret = PTR_ERR(call); - goto error; - } - - /* attach the call to the socket */ - read_lock_bh(&local->services_lock); - if (rx->sk.sk_state == RXRPC_CLOSE) - goto invalid_service; - - write_lock(&rx->call_lock); - if (!test_and_set_bit(RXRPC_CALL_INIT_ACCEPT, &call->flags)) { - rxrpc_get_call(call); - - spin_lock(&call->conn->state_lock); - if (sp->hdr.securityIndex > 0 && - call->conn->state == RXRPC_CONN_SERVICE_UNSECURED) { - _debug("await conn sec"); - list_add_tail(&call->accept_link, &rx->secureq); - call->conn->state = RXRPC_CONN_SERVICE_CHALLENGING; - set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); - rxrpc_queue_conn(call->conn); - } else { - _debug("conn ready"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_add_tail(&call->accept_link, &rx->acceptq); - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - nsp = rxrpc_skb(notification); - nsp->call = call; - - ASSERTCMP(atomic_read(&call->usage), >=, 3); - - _debug("notify"); - spin_lock(&call->lock); - ret = rxrpc_queue_rcv_skb(call, notification, true, - false); - spin_unlock(&call->lock); - notification = NULL; - BUG_ON(ret < 0); - } - spin_unlock(&call->conn->state_lock); - - _debug("queued"); - } - write_unlock(&rx->call_lock); - - _debug("process"); - rxrpc_fast_process_packet(call, skb); - - _debug("done"); - read_unlock_bh(&local->services_lock); - rxrpc_free_skb(notification); - rxrpc_put_call(call); - _leave(" = 0"); - return 0; - -invalid_service: - _debug("invalid"); - read_unlock_bh(&local->services_lock); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_get_call(call); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); - rxrpc_put_call(call); - ret = -ECONNREFUSED; -error: - rxrpc_free_skb(notification); -error_nofree: - _leave(" = %d", ret); - return ret; -} - -/* - * accept incoming calls that need peer, transport and/or connection setting up - * - the packets we get are all incoming client DATA packets that have seq == 1 - */ -void rxrpc_accept_incoming_calls(struct rxrpc_local *local) -{ - struct rxrpc_skb_priv *sp; - struct sockaddr_rxrpc srx; - struct rxrpc_sock *rx; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - int ret; - - _enter("%d", local->debug_id); - - skb = skb_dequeue(&local->accept_queue); - if (!skb) { - _leave("\n"); - return; - } - - _net("incoming call skb %p", skb); - - sp = rxrpc_skb(skb); - - /* Set up a response packet header in case we need it */ - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = 0; - whdr.flags = 0; - whdr.type = 0; - whdr.userStatus = 0; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = 0; - whdr.serviceId = htons(sp->hdr.serviceId); - - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) - goto drop; - - /* get the socket providing the service */ - read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == sp->hdr.serviceId && - rx->sk.sk_state != RXRPC_CLOSE) - goto found_service; - } - read_unlock_bh(&local->services_lock); - goto invalid_service; + trace_rxrpc_abort("INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_INVALID_OPERATION, EOPNOTSUPP); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [service]"); + return NULL; found_service: - _debug("found service %hd", rx->srx.srx_service); - if (sk_acceptq_is_full(&rx->sk)) - goto backlog_full; - sk_acceptq_added(&rx->sk); - sock_hold(&rx->sk); - read_unlock_bh(&local->services_lock); + spin_lock(&rx->incoming_lock); + if (rx->sk.sk_state == RXRPC_CLOSE) { + trace_rxrpc_abort("CLS", sp->hdr.cid, sp->hdr.callNumber, + sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN); + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; + skb->priority = RX_INVALID_OPERATION; + _leave(" = NULL [close]"); + call = NULL; + goto out; + } - ret = rxrpc_accept_incoming_call(local, rx, skb, &srx); - if (ret < 0) - sk_acceptq_removed(&rx->sk); - sock_put(&rx->sk); - switch (ret) { - case -ECONNRESET: /* old calls are ignored */ - case -ECONNABORTED: /* aborted calls are reaborted or ignored */ - case 0: - return; - case -ECONNREFUSED: - goto invalid_service; - case -EBUSY: - goto busy; - case -EKEYREJECTED: - goto security_mismatch; + call = rxrpc_alloc_incoming_call(rx, local, conn, skb); + if (!call) { + skb->mark = RXRPC_SKB_MARK_BUSY; + _leave(" = NULL [busy]"); + call = NULL; + goto out; + } + + trace_rxrpc_receive(call, rxrpc_receive_incoming, + sp->hdr.serial, sp->hdr.seq); + + /* Make the call live. */ + rxrpc_incoming_call(rx, call, skb); + conn = call->conn; + + if (rx->notify_new_call) + rx->notify_new_call(&rx->sk, call, call->user_call_ID); + else + sk_acceptq_added(&rx->sk); + + spin_lock(&conn->state_lock); + switch (conn->state) { + case RXRPC_CONN_SERVICE_UNSECURED: + conn->state = RXRPC_CONN_SERVICE_CHALLENGING; + set_bit(RXRPC_CONN_EV_CHALLENGE, &call->conn->events); + rxrpc_queue_conn(call->conn); + break; + + case RXRPC_CONN_SERVICE: + write_lock(&call->state_lock); + if (rx->discard_new_call) + call->state = RXRPC_CALL_SERVER_RECV_REQUEST; + else + call->state = RXRPC_CALL_SERVER_ACCEPTING; + write_unlock(&call->state_lock); + break; + + case RXRPC_CONN_REMOTELY_ABORTED: + rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + conn->remote_abort, ECONNABORTED); + break; + case RXRPC_CONN_LOCALLY_ABORTED: + rxrpc_abort_call("CON", call, sp->hdr.seq, + conn->local_abort, ECONNABORTED); + break; default: BUG(); } + spin_unlock(&conn->state_lock); -backlog_full: - read_unlock_bh(&local->services_lock); -busy: - rxrpc_busy(local, &srx, &whdr); - rxrpc_free_skb(skb); - return; + if (call->state == RXRPC_CALL_SERVER_ACCEPTING) + rxrpc_notify_socket(call); -drop: - rxrpc_free_skb(skb); - return; + /* We have to discard the prealloc queue's ref here and rely on a + * combination of the RCU read lock and refs held either by the socket + * (recvmsg queue, to-be-accepted queue or user ID tree) or the kernel + * service to prevent the call from being deallocated too early. + */ + rxrpc_put_call(call, rxrpc_call_put); -invalid_service: - skb->priority = RX_INVALID_OPERATION; - rxrpc_reject_packet(local, skb); - return; - - /* can't change connection security type mid-flow */ -security_mismatch: - skb->priority = RX_PROTOCOL_ERROR; - rxrpc_reject_packet(local, skb); - return; + _leave(" = %p{%d}", call, call->debug_id); +out: + spin_unlock(&rx->incoming_lock); + return call; } /* @@ -286,7 +430,8 @@ security_mismatch: * - assign the user call ID to the call at the front of the queue */ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, - unsigned long user_call_ID) + unsigned long user_call_ID, + rxrpc_notify_rx_t notify_rx) { struct rxrpc_call *call; struct rb_node *parent, **pp; @@ -298,12 +443,13 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + kleave(" = -ENODATA [empty]"); + return ERR_PTR(-ENODATA); + } /* check the user ID isn't already in use */ - ret = -EBADSLT; pp = &rx->calls.rb_node; parent = NULL; while (*pp) { @@ -315,62 +461,59 @@ struct rxrpc_call *rxrpc_accept_call(struct rxrpc_sock *rx, else if (user_call_ID > call->user_call_ID) pp = &(*pp)->rb_right; else - goto out; + goto id_in_use; } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: call->state = RXRPC_CALL_SERVER_RECV_REQUEST; break; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; - goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; - goto out_discard; default: BUG(); } /* formalise the acceptance */ + call->notify_rx = notify_rx; call->user_call_ID = user_call_ID; + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); if (test_and_set_bit(RXRPC_CALL_HAS_USERID, &call->flags)) BUG(); - if (test_and_set_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) - BUG(); - rxrpc_queue_call(call); - rxrpc_get_call(call); write_unlock_bh(&call->state_lock); write_unlock(&rx->call_lock); + rxrpc_notify_socket(call); + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %p{%d}", call, call->debug_id); return call; - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ out_release: _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); -out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + goto out; + +id_in_use: + ret = -EBADSLT; + write_unlock(&rx->call_lock); +out: + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ERR_PTR(ret); } @@ -382,6 +525,7 @@ out: int rxrpc_reject_call(struct rxrpc_sock *rx) { struct rxrpc_call *call; + bool abort = false; int ret; _enter(""); @@ -390,88 +534,73 @@ int rxrpc_reject_call(struct rxrpc_sock *rx) write_lock(&rx->call_lock); - ret = -ENODATA; - if (list_empty(&rx->acceptq)) - goto out; + if (list_empty(&rx->to_be_accepted)) { + write_unlock(&rx->call_lock); + return -ENODATA; + } - /* dequeue the first call and check it's still valid */ - call = list_entry(rx->acceptq.next, struct rxrpc_call, accept_link); + /* Dequeue the first call and check it's still valid. We gain + * responsibility for the queue's reference. + */ + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); list_del_init(&call->accept_link); sk_acceptq_removed(&rx->sk); + rxrpc_see_call(call); write_lock_bh(&call->state_lock); switch (call->state) { case RXRPC_CALL_SERVER_ACCEPTING: - call->state = RXRPC_CALL_SERVER_BUSY; - if (test_and_set_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) - rxrpc_queue_call(call); - ret = 0; - goto out_release; - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_LOCALLY_ABORTED: - ret = -ECONNABORTED; - goto out_release; - case RXRPC_CALL_NETWORK_ERROR: - ret = call->conn->error; - goto out_release; - case RXRPC_CALL_DEAD: - ret = -ETIME; + __rxrpc_abort_call("REJ", call, 1, RX_USER_ABORT, ECONNABORTED); + abort = true; + /* fall through */ + case RXRPC_CALL_COMPLETE: + ret = call->error; goto out_discard; default: BUG(); } - /* if the call is already dying or dead, then we leave the socket's ref - * on it to be released by rxrpc_dead_call_expired() as induced by - * rxrpc_release_call() */ -out_release: - _debug("release %p", call); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); out_discard: write_unlock_bh(&call->state_lock); - _debug("discard %p", call); -out: write_unlock(&rx->call_lock); + if (abort) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + } + rxrpc_service_prealloc(rx, GFP_KERNEL); _leave(" = %d", ret); return ret; } -/** - * rxrpc_kernel_accept_call - Allow a kernel service to accept an incoming call - * @sock: The socket on which the impending call is waiting - * @user_call_ID: The tag to attach to the call +/* + * rxrpc_kernel_charge_accept - Charge up socket with preallocated calls + * @sock: The socket on which to preallocate + * @notify_rx: Event notification function for the call + * @user_attach_call: Func to attach call to user_call_ID + * @user_call_ID: The tag to attach to the preallocated call + * @gfp: The allocation conditions. * - * Allow a kernel service to accept an incoming call, assuming the incoming - * call is still valid. - */ -struct rxrpc_call *rxrpc_kernel_accept_call(struct socket *sock, - unsigned long user_call_ID) -{ - struct rxrpc_call *call; - - _enter(",%lx", user_call_ID); - call = rxrpc_accept_call(rxrpc_sk(sock->sk), user_call_ID); - _leave(" = %p", call); - return call; -} -EXPORT_SYMBOL(rxrpc_kernel_accept_call); - -/** - * rxrpc_kernel_reject_call - Allow a kernel service to reject an incoming call - * @sock: The socket on which the impending call is waiting + * Charge up the socket with preallocated calls, each with a user ID. A + * function should be provided to effect the attachment from the user's side. + * The user is given a ref to hold on the call. * - * Allow a kernel service to reject an incoming call with a BUSY message, - * assuming the incoming call is still valid. + * Note that the call may be come connected before this function returns. */ -int rxrpc_kernel_reject_call(struct socket *sock) +int rxrpc_kernel_charge_accept(struct socket *sock, + rxrpc_notify_rx_t notify_rx, + rxrpc_user_attach_call_t user_attach_call, + unsigned long user_call_ID, gfp_t gfp) { - int ret; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + struct rxrpc_backlog *b = rx->backlog; - _enter(""); - ret = rxrpc_reject_call(rxrpc_sk(sock->sk)); - _leave(" = %d", ret); - return ret; + if (sock->sk->sk_state == RXRPC_CLOSE) + return -ESHUTDOWN; + + return rxrpc_service_prealloc_one(rx, b, notify_rx, + user_attach_call, user_call_ID, + gfp); } -EXPORT_SYMBOL(rxrpc_kernel_reject_call); +EXPORT_SYMBOL(rxrpc_kernel_charge_accept); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index e60cf65c2232..4f00476630b9 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -21,1282 +21,352 @@ #include #include "ar-internal.h" +/* + * Set the timer + */ +void rxrpc_set_timer(struct rxrpc_call *call, enum rxrpc_timer_trace why, + ktime_t now) +{ + unsigned long t_j, now_j = jiffies; + ktime_t t; + bool queue = false; + + read_lock_bh(&call->state_lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + t = call->expire_at; + if (!ktime_after(t, now)) + goto out; + + if (!ktime_after(call->resend_at, now)) { + call->resend_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + queue = true; + } else if (ktime_before(call->resend_at, t)) { + t = call->resend_at; + } + + if (!ktime_after(call->ack_at, now)) { + call->ack_at = call->expire_at; + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) + queue = true; + } else if (ktime_before(call->ack_at, t)) { + t = call->ack_at; + } + + t_j = nsecs_to_jiffies(ktime_to_ns(ktime_sub(t, now))); + t_j += jiffies; + + /* We have to make sure that the calculated jiffies value falls + * at or after the nsec value, or we may loop ceaselessly + * because the timer times out, but we haven't reached the nsec + * timeout yet. + */ + t_j++; + + if (call->timer.expires != t_j || !timer_pending(&call->timer)) { + mod_timer(&call->timer, t_j); + trace_rxrpc_timer(call, why, now, now_j); + } + + if (queue) + rxrpc_queue_call(call); + } + +out: + read_unlock_bh(&call->state_lock); +} + /* * propose an ACK be sent */ -void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) +static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, + u16 skew, u32 serial, bool immediate, + bool background, + enum rxrpc_propose_ack_trace why) { - unsigned long expiry; + enum rxrpc_propose_ack_outcome outcome = rxrpc_propose_ack_use; + unsigned int expiry = rxrpc_soft_ack_delay; + ktime_t now, ack_at; s8 prior = rxrpc_ack_priority[ack_reason]; - ASSERTCMP(prior, >, 0); - - _enter("{%d},%s,%%%x,%u", - call->debug_id, rxrpc_acks(ack_reason), serial, immediate); - - if (prior < rxrpc_ack_priority[call->ackr_reason]) { - if (immediate) - goto cancel_timer; - return; - } - - /* update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial - * numbers */ - if (prior == rxrpc_ack_priority[call->ackr_reason]) { - if (prior <= 4) + /* Update DELAY, IDLE, REQUESTED and PING_RESPONSE ACK serial + * numbers, but we don't alter the timeout. + */ + _debug("prior %u %u vs %u %u", + ack_reason, prior, + call->ackr_reason, rxrpc_ack_priority[call->ackr_reason]); + if (ack_reason == call->ackr_reason) { + if (RXRPC_ACK_UPDATEABLE & (1 << ack_reason)) { + outcome = rxrpc_propose_ack_update; call->ackr_serial = serial; - if (immediate) - goto cancel_timer; - return; + call->ackr_skew = skew; + } + if (!immediate) + goto trace; + } else if (prior > rxrpc_ack_priority[call->ackr_reason]) { + call->ackr_reason = ack_reason; + call->ackr_serial = serial; + call->ackr_skew = skew; + } else { + outcome = rxrpc_propose_ack_subsume; } - call->ackr_reason = ack_reason; - call->ackr_serial = serial; - switch (ack_reason) { - case RXRPC_ACK_DELAY: - _debug("run delay timer"); - expiry = rxrpc_soft_ack_delay; - goto run_timer; - - case RXRPC_ACK_IDLE: - if (!immediate) { - _debug("run defer timer"); - expiry = rxrpc_idle_ack_delay; - goto run_timer; - } - goto cancel_timer; - case RXRPC_ACK_REQUESTED: - expiry = rxrpc_requested_ack_delay; - if (!expiry) - goto cancel_timer; - if (!immediate || serial == 1) { - _debug("run defer timer"); - goto run_timer; - } + if (rxrpc_requested_ack_delay < expiry) + expiry = rxrpc_requested_ack_delay; + if (serial == 1) + immediate = false; + break; + + case RXRPC_ACK_DELAY: + if (rxrpc_soft_ack_delay < expiry) + expiry = rxrpc_soft_ack_delay; + break; + + case RXRPC_ACK_PING: + case RXRPC_ACK_IDLE: + if (rxrpc_idle_ack_delay < expiry) + expiry = rxrpc_idle_ack_delay; + break; default: - _debug("immediate ACK"); - goto cancel_timer; + immediate = true; + break; } -run_timer: - expiry += jiffies; - if (!timer_pending(&call->ack_timer) || - time_after(call->ack_timer.expires, expiry)) - mod_timer(&call->ack_timer, expiry); - return; + if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { + _debug("already scheduled"); + } else if (immediate || expiry == 0) { + _debug("immediate ACK %lx", call->events); + if (!test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events) && + background) + rxrpc_queue_call(call); + } else { + now = ktime_get_real(); + ack_at = ktime_add_ms(now, expiry); + if (ktime_before(ack_at, call->ack_at)) { + call->ack_at = ack_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_ack, now); + } + } -cancel_timer: - _debug("cancel timer %%%u", serial); - try_to_del_timer_sync(&call->ack_timer); - read_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); +trace: + trace_rxrpc_propose_ack(call, why, ack_reason, serial, immediate, + background, outcome); } /* * propose an ACK be sent, locking the call structure */ void rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, - u32 serial, bool immediate) + u16 skew, u32 serial, bool immediate, bool background, + enum rxrpc_propose_ack_trace why) { - s8 prior = rxrpc_ack_priority[ack_reason]; - - if (prior > rxrpc_ack_priority[call->ackr_reason]) { - spin_lock_bh(&call->lock); - __rxrpc_propose_ACK(call, ack_reason, serial, immediate); - spin_unlock_bh(&call->lock); - } + spin_lock_bh(&call->lock); + __rxrpc_propose_ACK(call, ack_reason, skew, serial, + immediate, background, why); + spin_unlock_bh(&call->lock); } /* - * set the resend timer + * Handle congestion being detected by the retransmit timeout. */ -static void rxrpc_set_resend(struct rxrpc_call *call, u8 resend, - unsigned long resend_at) +static void rxrpc_congestion_timeout(struct rxrpc_call *call) { - read_lock_bh(&call->state_lock); - if (call->state >= RXRPC_CALL_COMPLETE) - resend = 0; - - if (resend & 1) { - _debug("SET RESEND"); - set_bit(RXRPC_CALL_EV_RESEND, &call->events); - } - - if (resend & 2) { - _debug("MODIFY RESEND TIMER"); - set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - mod_timer(&call->resend_timer, resend_at); - } else { - _debug("KILL RESEND TIMER"); - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - read_unlock_bh(&call->state_lock); + set_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags); } /* - * resend packets + * Perform retransmission of NAK'd and unack'd packets. */ -static void rxrpc_resend(struct rxrpc_call *call) +static void rxrpc_resend(struct rxrpc_call *call, ktime_t now) { - struct rxrpc_wire_header *whdr; struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - bool stop; - int loop; - u8 resend; + struct sk_buff *skb; + rxrpc_seq_t cursor, seq, top; + ktime_t max_age, oldest, ack_ts; + int ix; + u8 annotation, anno_type, retrans = 0, unacked = 0; - _enter("{%d,%d,%d,%d},", - call->acks_hard, call->acks_unacked, - atomic_read(&call->sequence), - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); + _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - stop = false; - resend = 0; - resend_at = 0; + max_age = ktime_sub_ms(now, rxrpc_resend_timeout); - for (loop = call->acks_tail; - loop != call->acks_head || stop; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - if (*p_txb & 1) + spin_lock_bh(&call->lock); + + cursor = call->tx_hard_ack; + top = call->tx_top; + ASSERT(before_eq(cursor, top)); + if (cursor == top) + goto out_unlock; + + /* Scan the packet list without dropping the lock and decide which of + * the packets in the Tx buffer we're going to resend and what the new + * resend timeout will be. + */ + oldest = now; + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_ACK) continue; - txb = (struct sk_buff *) *p_txb; - sp = rxrpc_skb(txb); + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + sp = rxrpc_skb(skb); - if (sp->need_resend) { - sp->need_resend = false; - - /* each Tx packet has a new serial number */ - sp->hdr.serial = atomic_inc_return(&call->conn->serial); - - whdr = (struct rxrpc_wire_header *)txb->head; - whdr->serial = htonl(sp->hdr.serial); - - _proto("Tx DATA %%%u { #%d }", - sp->hdr.serial, sp->hdr.seq); - if (rxrpc_send_data_packet(call->conn, txb) < 0) { - stop = true; - sp->resend_at = jiffies + 3; - } else { - sp->resend_at = - jiffies + rxrpc_resend_timeout; + if (anno_type == RXRPC_TX_ANNO_UNACK) { + if (ktime_after(skb->tstamp, max_age)) { + if (ktime_before(skb->tstamp, oldest)) + oldest = skb->tstamp; + continue; } + if (!(annotation & RXRPC_TX_ANNO_RESENT)) + unacked++; } - if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } + /* Okay, we need to retransmit a packet. */ + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS | annotation; + retrans++; + trace_rxrpc_retransmit(call, seq, annotation | anno_type, + ktime_to_ns(ktime_sub(skb->tstamp, max_age))); } - rxrpc_set_resend(call, resend, resend_at); + call->resend_at = ktime_add_ms(oldest, rxrpc_resend_timeout); + + if (unacked) + rxrpc_congestion_timeout(call); + + /* If there was nothing that needed retransmission then it's likely + * that an ACK got lost somewhere. Send a ping to find out instead of + * retransmitting data. + */ + if (!retrans) { + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); + spin_unlock_bh(&call->lock); + ack_ts = ktime_sub(now, call->acks_latest_ts); + if (ktime_to_ns(ack_ts) < call->peer->rtt) + goto out; + rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, 0, true, false, + rxrpc_propose_ack_ping_for_lost_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto out; + } + + /* Now go through the Tx window and perform the retransmissions. We + * have to drop the lock for each send. If an ACK comes in whilst the + * lock is dropped, it may clear some of the retransmission markers for + * packets that it soft-ACKs. + */ + for (seq = cursor + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type != RXRPC_TX_ANNO_RETRANS) + continue; + + skb = call->rxtx_buffer[ix]; + rxrpc_get_skb(skb, rxrpc_skb_tx_got); + spin_unlock_bh(&call->lock); + + if (rxrpc_send_data_packet(call, skb, true) < 0) { + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + return; + } + + if (rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + spin_lock_bh(&call->lock); + + /* We need to clear the retransmit state, but there are two + * things we need to be aware of: A new ACK/NAK might have been + * received and the packet might have been hard-ACK'd (in which + * case it will no longer be in the buffer). + */ + if (after(seq, call->tx_hard_ack)) { + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + if (anno_type == RXRPC_TX_ANNO_RETRANS || + anno_type == RXRPC_TX_ANNO_NAK) { + annotation &= ~RXRPC_TX_ANNO_MASK; + annotation |= RXRPC_TX_ANNO_UNACK; + } + annotation |= RXRPC_TX_ANNO_RESENT; + call->rxtx_annotations[ix] = annotation; + } + + if (after(call->tx_hard_ack, seq)) + seq = call->tx_hard_ack; + } + +out_unlock: + spin_unlock_bh(&call->lock); +out: _leave(""); } /* - * handle resend timer expiry - */ -static void rxrpc_resend_timer(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 resend; - - _enter("%d,%d,%d", - call->acks_tail, call->acks_unacked, call->acks_head); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - resend = 0; - resend_at = 0; - - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - ASSERT(!(*p_txb & 1)); - - if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(""); -} - -/* - * process soft ACKs of our transmitted packets - * - these indicate packets the peer has or has not received, but hasn't yet - * given to the consumer, and so can still be discarded and re-requested - */ -static int rxrpc_process_soft_ACKs(struct rxrpc_call *call, - struct rxrpc_ackpacket *ack, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *txb; - unsigned long *p_txb, resend_at; - int loop; - u8 sacks[RXRPC_MAXACKS], resend; - - _enter("{%d,%d},{%d},", - call->acks_hard, - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz), - ack->nAcks); - - if (skb_copy_bits(skb, 0, sacks, ack->nAcks) < 0) - goto protocol_error; - - resend = 0; - resend_at = 0; - for (loop = 0; loop < ack->nAcks; loop++) { - p_txb = call->acks_window; - p_txb += (call->acks_tail + loop) & (call->acks_winsz - 1); - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - switch (sacks[loop]) { - case RXRPC_ACK_TYPE_ACK: - sp->need_resend = false; - *p_txb |= 1; - break; - case RXRPC_ACK_TYPE_NACK: - sp->need_resend = true; - *p_txb &= ~1; - resend = 1; - break; - default: - _debug("Unsupported ACK type %d", sacks[loop]); - goto protocol_error; - } - } - - smp_mb(); - call->acks_unacked = (call->acks_tail + loop) & (call->acks_winsz - 1); - - /* anything not explicitly ACK'd is implicitly NACK'd, but may just not - * have been received or processed yet by the far end */ - for (loop = call->acks_unacked; - loop != call->acks_head; - loop = (loop + 1) & (call->acks_winsz - 1) - ) { - p_txb = call->acks_window + loop; - smp_read_barrier_depends(); - txb = (struct sk_buff *) (*p_txb & ~1); - sp = rxrpc_skb(txb); - - if (*p_txb & 1) { - /* packet must have been discarded */ - sp->need_resend = true; - *p_txb &= ~1; - resend |= 1; - } else if (sp->need_resend) { - ; - } else if (time_after_eq(jiffies + 1, sp->resend_at)) { - sp->need_resend = true; - resend |= 1; - } else if (resend & 2) { - if (time_before(sp->resend_at, resend_at)) - resend_at = sp->resend_at; - } else { - resend_at = sp->resend_at; - resend |= 2; - } - } - - rxrpc_set_resend(call, resend, resend_at); - _leave(" = 0"); - return 0; - -protocol_error: - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * discard hard-ACK'd packets from the Tx window - */ -static void rxrpc_rotate_tx_window(struct rxrpc_call *call, u32 hard) -{ - unsigned long _skb; - int tail = call->acks_tail, old_tail; - int win = CIRC_CNT(call->acks_head, tail, call->acks_winsz); - - _enter("{%u,%u},%u", call->acks_hard, win, hard); - - ASSERTCMP(hard - call->acks_hard, <=, win); - - while (call->acks_hard < hard) { - smp_read_barrier_depends(); - _skb = call->acks_window[tail] & ~1; - rxrpc_free_skb((struct sk_buff *) _skb); - old_tail = tail; - tail = (tail + 1) & (call->acks_winsz - 1); - call->acks_tail = tail; - if (call->acks_unacked == old_tail) - call->acks_unacked = tail; - call->acks_hard++; - } - - wake_up(&call->tx_waitq); -} - -/* - * clear the Tx window in the event of a failure - */ -static void rxrpc_clear_tx_window(struct rxrpc_call *call) -{ - rxrpc_rotate_tx_window(call, atomic_read(&call->sequence)); -} - -/* - * drain the out of sequence received packet queue into the packet Rx queue - */ -static int rxrpc_drain_rx_oos_queue(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool terminal; - int ret; - - _enter("{%d,%d}", call->rx_data_post, call->rx_first_oos); - - spin_lock_bh(&call->lock); - - ret = -ECONNRESET; - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) - goto socket_unavailable; - - skb = skb_dequeue(&call->rx_oos_queue); - if (skb) { - sp = rxrpc_skb(skb); - - _debug("drain OOS packet %d [%d]", - sp->hdr.seq, call->rx_first_oos); - - if (sp->hdr.seq != call->rx_first_oos) { - skb_queue_head(&call->rx_oos_queue, skb); - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - _debug("requeue %p {%u}", skb, call->rx_first_oos); - } else { - skb->mark = RXRPC_SKB_MARK_DATA; - terminal = ((sp->hdr.flags & RXRPC_LAST_PACKET) && - !(sp->hdr.flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, true, terminal); - BUG_ON(ret < 0); - _debug("drain #%u", call->rx_data_post); - call->rx_data_post++; - - /* find out what the next packet is */ - skb = skb_peek(&call->rx_oos_queue); - if (skb) - call->rx_first_oos = rxrpc_skb(skb)->hdr.seq; - else - call->rx_first_oos = 0; - _debug("peek %p {%u}", skb, call->rx_first_oos); - } - } - - ret = 0; -socket_unavailable: - spin_unlock_bh(&call->lock); - _leave(" = %d", ret); - return ret; -} - -/* - * insert an out of sequence packet into the buffer - */ -static void rxrpc_insert_oos_packet(struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp, *psp; - struct sk_buff *p; - u32 seq; - - sp = rxrpc_skb(skb); - seq = sp->hdr.seq; - _enter(",,{%u}", seq); - - skb->destructor = rxrpc_packet_destructor; - ASSERTCMP(sp->call, ==, NULL); - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - - /* insert into the buffer in sequence order */ - spin_lock_bh(&call->lock); - - skb_queue_walk(&call->rx_oos_queue, p) { - psp = rxrpc_skb(p); - if (psp->hdr.seq > seq) { - _debug("insert oos #%u before #%u", seq, psp->hdr.seq); - skb_insert(p, skb, &call->rx_oos_queue); - goto inserted; - } - } - - _debug("append oos #%u", seq); - skb_queue_tail(&call->rx_oos_queue, skb); -inserted: - - /* we might now have a new front to the queue */ - if (call->rx_first_oos == 0 || seq < call->rx_first_oos) - call->rx_first_oos = seq; - - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - } - read_unlock(&call->state_lock); - - spin_unlock_bh(&call->lock); - _leave(" [stored #%u]", call->rx_first_oos); -} - -/* - * clear the Tx window on final ACK reception - */ -static void rxrpc_zap_tx_window(struct rxrpc_call *call) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - unsigned long _skb, *acks_window; - u8 winsz = call->acks_winsz; - int tail; - - acks_window = call->acks_window; - call->acks_window = NULL; - - while (CIRC_CNT(call->acks_head, call->acks_tail, winsz) > 0) { - tail = call->acks_tail; - smp_read_barrier_depends(); - _skb = acks_window[tail] & ~1; - smp_mb(); - call->acks_tail = (call->acks_tail + 1) & (winsz - 1); - - skb = (struct sk_buff *) _skb; - sp = rxrpc_skb(skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb(skb); - } - - kfree(acks_window); -} - -/* - * process the extra information that may be appended to an ACK packet - */ -static void rxrpc_extract_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, - unsigned int latest, int nAcks) -{ - struct rxrpc_ackinfo ackinfo; - struct rxrpc_peer *peer; - unsigned int mtu; - - if (skb_copy_bits(skb, nAcks + 3, &ackinfo, sizeof(ackinfo)) < 0) { - _leave(" [no ackinfo]"); - return; - } - - _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", - latest, - ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU), - ntohl(ackinfo.rwind), ntohl(ackinfo.jumbo_max)); - - mtu = min(ntohl(ackinfo.rxMTU), ntohl(ackinfo.maxMTU)); - - peer = call->conn->params.peer; - if (mtu < peer->maxdata) { - spin_lock_bh(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock_bh(&peer->lock); - _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); - } -} - -/* - * process packets in the reception queue - */ -static int rxrpc_process_rx_queue(struct rxrpc_call *call, - u32 *_abort_code) -{ - struct rxrpc_ackpacket ack; - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - bool post_ACK; - int latest; - u32 hard, tx; - - _enter(""); - -process_further: - skb = skb_dequeue(&call->rx_queue); - if (!skb) - return -EAGAIN; - - _net("deferred skb %p", skb); - - sp = rxrpc_skb(skb); - - _debug("process %s [st %d]", rxrpc_pkts[sp->hdr.type], call->state); - - post_ACK = false; - - switch (sp->hdr.type) { - /* data packets that wind up here have been received out of - * order, need security processing or are jumbo packets */ - case RXRPC_PACKET_TYPE_DATA: - _proto("OOSQ DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - /* secured packets must be verified and possibly decrypted */ - if (call->conn->security->verify_packet(call, skb, - _abort_code) < 0) - goto protocol_error; - - rxrpc_insert_oos_packet(call, skb); - goto process_further; - - /* partial ACK to process */ - case RXRPC_PACKET_TYPE_ACK: - if (skb_copy_bits(skb, 0, &ack, sizeof(ack)) < 0) { - _debug("extraction failure"); - goto protocol_error; - } - if (!skb_pull(skb, sizeof(ack))) - BUG(); - - latest = sp->hdr.serial; - hard = ntohl(ack.firstPacket); - tx = atomic_read(&call->sequence); - - _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - latest, - ntohs(ack.maxSkew), - hard, - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - rxrpc_extract_ackinfo(call, skb, latest, ack.nAcks); - - if (ack.reason == RXRPC_ACK_PING) { - _proto("Rx ACK %%%u PING Request", latest); - rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, - sp->hdr.serial, true); - } - - /* discard any out-of-order or duplicate ACKs */ - if (latest - call->acks_latest <= 0) { - _debug("discard ACK %d <= %d", - latest, call->acks_latest); - goto discard; - } - call->acks_latest = latest; - - if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_CLIENT_AWAIT_REPLY && - call->state != RXRPC_CALL_SERVER_SEND_REPLY && - call->state != RXRPC_CALL_SERVER_AWAIT_ACK) - goto discard; - - _debug("Tx=%d H=%u S=%d", tx, call->acks_hard, call->state); - - if (hard > 0) { - if (hard - 1 > tx) { - _debug("hard-ACK'd packet %d not transmitted" - " (%d top)", - hard - 1, tx); - goto protocol_error; - } - - if ((call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY || - call->state == RXRPC_CALL_SERVER_AWAIT_ACK) && - hard > tx) { - call->acks_hard = tx; - goto all_acked; - } - - smp_rmb(); - rxrpc_rotate_tx_window(call, hard - 1); - } - - if (ack.nAcks > 0) { - if (hard - 1 + ack.nAcks > tx) { - _debug("soft-ACK'd packet %d+%d not" - " transmitted (%d top)", - hard - 1, ack.nAcks, tx); - goto protocol_error; - } - - if (rxrpc_process_soft_ACKs(call, &ack, skb) < 0) - goto protocol_error; - } - goto discard; - - /* complete ACK to process */ - case RXRPC_PACKET_TYPE_ACKALL: - goto all_acked; - - /* abort and busy are handled elsewhere */ - case RXRPC_PACKET_TYPE_BUSY: - case RXRPC_PACKET_TYPE_ABORT: - BUG(); - - /* connection level events - also handled elsewhere */ - case RXRPC_PACKET_TYPE_CHALLENGE: - case RXRPC_PACKET_TYPE_RESPONSE: - case RXRPC_PACKET_TYPE_DEBUG: - BUG(); - } - - /* if we've had a hard ACK that covers all the packets we've sent, then - * that ends that phase of the operation */ -all_acked: - write_lock_bh(&call->state_lock); - _debug("ack all %d", call->state); - - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - break; - case RXRPC_CALL_SERVER_AWAIT_ACK: - _debug("srv complete"); - call->state = RXRPC_CALL_COMPLETE; - post_ACK = true; - break; - case RXRPC_CALL_CLIENT_SEND_REQUEST: - case RXRPC_CALL_SERVER_RECV_REQUEST: - goto protocol_error_unlock; /* can't occur yet */ - default: - write_unlock_bh(&call->state_lock); - goto discard; /* assume packet left over from earlier phase */ - } - - write_unlock_bh(&call->state_lock); - - /* if all the packets we sent are hard-ACK'd, then we can discard - * whatever we've got left */ - _debug("clear Tx %d", - CIRC_CNT(call->acks_head, call->acks_tail, call->acks_winsz)); - - del_timer_sync(&call->resend_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - - if (call->acks_window) - rxrpc_zap_tx_window(call); - - if (post_ACK) { - /* post the final ACK message for userspace to pick up */ - _debug("post ACK"); - skb->mark = RXRPC_SKB_MARK_FINAL_ACK; - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - spin_lock_bh(&call->lock); - if (rxrpc_queue_rcv_skb(call, skb, true, true) < 0) - BUG(); - spin_unlock_bh(&call->lock); - goto process_further; - } - -discard: - rxrpc_free_skb(skb); - goto process_further; - -protocol_error_unlock: - write_unlock_bh(&call->state_lock); -protocol_error: - rxrpc_free_skb(skb); - _leave(" = -EPROTO"); - return -EPROTO; -} - -/* - * post a message to the socket Rx queue for recvmsg() to pick up - */ -static int rxrpc_post_message(struct rxrpc_call *call, u32 mark, u32 error, - bool fatal) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - int ret; - - _enter("{%d,%lx},%u,%u,%d", - call->debug_id, call->flags, mark, error, fatal); - - /* remove timers and things for fatal messages */ - if (fatal) { - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } - - if (mark != RXRPC_SKB_MARK_NEW_CALL && - !test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - _leave("[no userid]"); - return 0; - } - - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - skb = alloc_skb(0, GFP_NOFS); - if (!skb) - return -ENOMEM; - - rxrpc_new_skb(skb); - - skb->mark = mark; - - sp = rxrpc_skb(skb); - memset(sp, 0, sizeof(*sp)); - sp->error = error; - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - - spin_lock_bh(&call->lock); - ret = rxrpc_queue_rcv_skb(call, skb, true, fatal); - spin_unlock_bh(&call->lock); - BUG_ON(ret < 0); - } - - return 0; -} - -/* - * handle background processing of incoming call packets and ACK / abort - * generation + * Handle retransmission and deferred ACK/abort generation. */ void rxrpc_process_call(struct work_struct *work) { struct rxrpc_call *call = container_of(work, struct rxrpc_call, processor); - struct rxrpc_wire_header whdr; - struct rxrpc_ackpacket ack; - struct rxrpc_ackinfo ackinfo; - struct msghdr msg; - struct kvec iov[5]; - enum rxrpc_call_event genbit; - unsigned long bits; - __be32 data, pad; - size_t len; - int loop, nbit, ioc, ret, mtu; - u32 serial, abort_code = RX_PROTOCOL_ERROR; - u8 *acks = NULL; + ktime_t now; + + rxrpc_see_call(call); //printk("\n--------------------\n"); - _enter("{%d,%s,%lx} [%lu]", - call->debug_id, rxrpc_call_states[call->state], call->events, - (jiffies - call->creation_jif) / (HZ / 10)); + _enter("{%d,%s,%lx}", + call->debug_id, rxrpc_call_states[call->state], call->events); - if (test_and_set_bit(RXRPC_CALL_PROC_BUSY, &call->flags)) { - _debug("XXXXXXXXXXXXX RUNNING ON MULTIPLE CPUS XXXXXXXXXXXXX"); - return; +recheck_state: + if (test_and_clear_bit(RXRPC_CALL_EV_ABORT, &call->events)) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + goto recheck_state; } - if (!call->conn) - goto skip_msg_init; - - /* there's a good chance we're going to have to send a message, so set - * one up in advance */ - msg.msg_name = &call->conn->params.peer->srx.transport; - msg.msg_namelen = call->conn->params.peer->srx.transport_len; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - whdr.epoch = htonl(call->conn->proto.epoch); - whdr.cid = htonl(call->cid); - whdr.callNumber = htonl(call->call_id); - whdr.seq = 0; - whdr.type = RXRPC_PACKET_TYPE_ACK; - whdr.flags = call->conn->out_clientflag; - whdr.userStatus = 0; - whdr.securityIndex = call->conn->security_ix; - whdr._rsvd = 0; - whdr.serviceId = htons(call->service_id); - - memset(iov, 0, sizeof(iov)); - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); -skip_msg_init: - - /* deal with events of a final nature */ - if (test_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events)) { - enum rxrpc_skb_mark mark; - int error; - - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - error = call->error_report; - if (error < RXRPC_LOCAL_ERROR_OFFSET) { - mark = RXRPC_SKB_MARK_NET_ERROR; - _debug("post net error %d", error); - } else { - mark = RXRPC_SKB_MARK_LOCAL_ERROR; - error -= RXRPC_LOCAL_ERROR_OFFSET; - _debug("post net local error %d", error); - } - - if (rxrpc_post_message(call, mark, error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - goto kill_ACKs; + if (call->state == RXRPC_CALL_COMPLETE) { + del_timer_sync(&call->timer); + goto out_put; } - if (test_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); - - clear_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_ABORT, &call->events); - - _debug("post conn abort"); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - call->conn->error, true) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - goto kill_ACKs; + now = ktime_get_real(); + if (ktime_before(call->expire_at, now)) { + rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, ETIME); + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + goto recheck_state; } - if (test_bit(RXRPC_CALL_EV_REJECT_BUSY, &call->events)) { - whdr.type = RXRPC_PACKET_TYPE_BUSY; - genbit = RXRPC_CALL_EV_REJECT_BUSY; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - ASSERTCMP(call->state, >, RXRPC_CALL_COMPLETE); - - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ECONNABORTED, true) < 0) - goto no_mem; - whdr.type = RXRPC_PACKET_TYPE_ABORT; - data = htonl(call->local_abort); - iov[1].iov_base = &data; - iov[1].iov_len = sizeof(data); - genbit = RXRPC_CALL_EV_ABORT; - goto send_message; - } - - if (test_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) { - genbit = RXRPC_CALL_EV_ACK_FINAL; - - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - ack.serial = 0; - ack.reason = RXRPC_ACK_IDLE; - ack.nAcks = 0; - call->ackr_reason = 0; - - spin_lock_bh(&call->lock); - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = &pad; - iov[2].iov_len = 3; - iov[3].iov_base = &ackinfo; - iov[3].iov_len = sizeof(ackinfo); - goto send_ACK; - } - - if (call->events & ((1 << RXRPC_CALL_EV_RCVD_BUSY) | - (1 << RXRPC_CALL_EV_RCVD_ABORT)) - ) { - u32 mark; - - if (test_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events)) - mark = RXRPC_SKB_MARK_REMOTE_ABORT; - else - mark = RXRPC_SKB_MARK_BUSY; - - _debug("post abort/busy"); - rxrpc_clear_tx_window(call); - if (rxrpc_post_message(call, mark, ECONNABORTED, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - } - - if (test_and_clear_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events)) { - _debug("do implicit ackall"); - rxrpc_clear_tx_window(call); - } - - if (test_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events)) { - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_TIMEOUT; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - } - write_unlock_bh(&call->state_lock); - - _debug("post timeout"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_LOCAL_ERROR, - ETIME, true) < 0) - goto no_mem; - - clear_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - goto kill_ACKs; - } - - /* deal with assorted inbound messages */ - if (!skb_queue_empty(&call->rx_queue)) { - switch (rxrpc_process_rx_queue(call, &abort_code)) { - case 0: - case -EAGAIN: - break; - case -ENOMEM: - goto no_mem; - case -EKEYEXPIRED: - case -EKEYREJECTED: - case -EPROTO: - rxrpc_abort_call(call, abort_code); - goto kill_ACKs; + if (test_and_clear_bit(RXRPC_CALL_EV_ACK, &call->events)) { + call->ack_at = call->expire_at; + if (call->ackr_reason) { + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + goto recheck_state; } } - /* handle resending */ - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_resend_timer(call); - if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) - rxrpc_resend(call); - - /* consider sending an ordinary ACK */ - if (test_bit(RXRPC_CALL_EV_ACK, &call->events)) { - _debug("send ACK: window: %d - %d { %lx }", - call->rx_data_eaten, call->ackr_win_top, - call->ackr_window[0]); - - if (call->state > RXRPC_CALL_SERVER_ACK_REQUEST && - call->ackr_reason != RXRPC_ACK_PING_RESPONSE) { - /* ACK by sending reply DATA packet in this state */ - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - goto maybe_reschedule; - } - - genbit = RXRPC_CALL_EV_ACK; - - acks = kzalloc(call->ackr_win_top - call->rx_data_eaten, - GFP_NOFS); - if (!acks) - goto no_mem; - - //hdr.flags = RXRPC_SLOW_START_OK; - ack.bufferSpace = htons(8); - ack.maxSkew = 0; - - spin_lock_bh(&call->lock); - ack.reason = call->ackr_reason; - ack.serial = htonl(call->ackr_serial); - ack.previousPacket = htonl(call->ackr_prev_seq); - ack.firstPacket = htonl(call->rx_data_eaten + 1); - - ack.nAcks = 0; - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - nbit = loop * BITS_PER_LONG; - for (bits = call->ackr_window[loop]; bits; bits >>= 1 - ) { - _debug("- l=%d n=%d b=%lx", loop, nbit, bits); - if (bits & 1) { - acks[nbit] = RXRPC_ACK_TYPE_ACK; - ack.nAcks = nbit + 1; - } - nbit++; - } - } - call->ackr_reason = 0; - spin_unlock_bh(&call->lock); - - pad = 0; - - iov[1].iov_base = &ack; - iov[1].iov_len = sizeof(ack); - iov[2].iov_base = acks; - iov[2].iov_len = ack.nAcks; - iov[3].iov_base = &pad; - iov[3].iov_len = 3; - iov[4].iov_base = &ackinfo; - iov[4].iov_len = sizeof(ackinfo); - - switch (ack.reason) { - case RXRPC_ACK_REQUESTED: - case RXRPC_ACK_DUPLICATE: - case RXRPC_ACK_OUT_OF_SEQUENCE: - case RXRPC_ACK_EXCEEDS_WINDOW: - case RXRPC_ACK_NOSPACE: - case RXRPC_ACK_PING: - case RXRPC_ACK_PING_RESPONSE: - goto send_ACK_with_skew; - case RXRPC_ACK_DELAY: - case RXRPC_ACK_IDLE: - goto send_ACK; - } + if (test_and_clear_bit(RXRPC_CALL_EV_RESEND, &call->events)) { + rxrpc_resend(call, now); + goto recheck_state; } - /* handle completion of security negotiations on an incoming - * connection */ - if (test_and_clear_bit(RXRPC_CALL_EV_SECURED, &call->events)) { - _debug("secured"); - spin_lock_bh(&call->lock); - - if (call->state == RXRPC_CALL_SERVER_SECURING) { - _debug("securing"); - write_lock(&call->socket->call_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - _debug("not released"); - call->state = RXRPC_CALL_SERVER_ACCEPTING; - list_move_tail(&call->accept_link, - &call->socket->acceptq); - } - write_unlock(&call->socket->call_lock); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - read_unlock(&call->state_lock); - } - - spin_unlock_bh(&call->lock); - if (!test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) - goto maybe_reschedule; - } - - /* post a notification of an acceptable connection to the app */ - if (test_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events)) { - _debug("post accept"); - if (rxrpc_post_message(call, RXRPC_SKB_MARK_NEW_CALL, - 0, false) < 0) - goto no_mem; - clear_bit(RXRPC_CALL_EV_POST_ACCEPT, &call->events); - goto maybe_reschedule; - } - - /* handle incoming call acceptance */ - if (test_and_clear_bit(RXRPC_CALL_EV_ACCEPTED, &call->events)) { - _debug("accepted"); - ASSERTCMP(call->rx_data_post, ==, 0); - call->rx_data_post = 1; - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) - set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events); - read_unlock_bh(&call->state_lock); - } - - /* drain the out of sequence received packet queue into the packet Rx - * queue */ - if (test_and_clear_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) { - while (call->rx_data_post == call->rx_first_oos) - if (rxrpc_drain_rx_oos_queue(call) < 0) - break; - goto maybe_reschedule; - } - - if (test_bit(RXRPC_CALL_EV_RELEASE, &call->events)) { - rxrpc_release_call(call); - clear_bit(RXRPC_CALL_EV_RELEASE, &call->events); - } + rxrpc_set_timer(call, rxrpc_timer_set_for_resend, now); /* other events may have been raised since we started checking */ - goto maybe_reschedule; - -send_ACK_with_skew: - ack.maxSkew = htons(atomic_read(&call->conn->hi_serial) - - ntohl(ack.serial)); -send_ACK: - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; - ackinfo.maxMTU = htonl(mtu); - ackinfo.rwind = htonl(rxrpc_rx_window_size); - - /* permit the peer to send us jumbo packets if it wants to */ - ackinfo.rxMTU = htonl(rxrpc_rx_mtu); - ackinfo.jumbo_max = htonl(rxrpc_rx_jumbo_max); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", - serial, - ntohs(ack.maxSkew), - ntohl(ack.firstPacket), - ntohl(ack.previousPacket), - ntohl(ack.serial), - rxrpc_acks(ack.reason), - ack.nAcks); - - del_timer_sync(&call->ack_timer); - if (ack.nAcks > 0) - set_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags); - goto send_message_2; - -send_message: - _debug("send message"); - - serial = atomic_inc_return(&call->conn->serial); - whdr.serial = htonl(serial); - _proto("Tx %s %%%u", rxrpc_pkts[whdr.type], serial); -send_message_2: - - len = iov[0].iov_len; - ioc = 1; - if (iov[4].iov_len) { - ioc = 5; - len += iov[4].iov_len; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[3].iov_len) { - ioc = 4; - len += iov[3].iov_len; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[2].iov_len) { - ioc = 3; - len += iov[2].iov_len; - len += iov[1].iov_len; - } else if (iov[1].iov_len) { - ioc = 2; - len += iov[1].iov_len; - } - - ret = kernel_sendmsg(call->conn->params.local->socket, - &msg, iov, ioc, len); - if (ret < 0) { - _debug("sendmsg failed: %d", ret); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - goto error; - } - - switch (genbit) { - case RXRPC_CALL_EV_ABORT: - clear_bit(genbit, &call->events); - clear_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - goto kill_ACKs; - - case RXRPC_CALL_EV_ACK_FINAL: - write_lock_bh(&call->state_lock); - if (call->state == RXRPC_CALL_CLIENT_FINAL_ACK) - call->state = RXRPC_CALL_COMPLETE; - write_unlock_bh(&call->state_lock); - goto kill_ACKs; - - default: - clear_bit(genbit, &call->events); - switch (call->state) { - case RXRPC_CALL_CLIENT_AWAIT_REPLY: - case RXRPC_CALL_CLIENT_RECV_REPLY: - case RXRPC_CALL_SERVER_RECV_REQUEST: - case RXRPC_CALL_SERVER_ACK_REQUEST: - _debug("start ACK timer"); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, - call->ackr_serial, false); - default: - break; - } - goto maybe_reschedule; - } - -kill_ACKs: - del_timer_sync(&call->ack_timer); - if (test_and_clear_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events)) - rxrpc_put_call(call); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - -maybe_reschedule: - if (call->events || !skb_queue_empty(&call->rx_queue)) { - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - - /* don't leave aborted connections on the accept queue */ - if (call->state >= RXRPC_CALL_COMPLETE && - !list_empty(&call->accept_link)) { - _debug("X unlinking once-pending call %p { e=%lx f=%lx c=%x }", - call, call->events, call->flags, call->conn->proto.cid); - - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); - } - -error: - clear_bit(RXRPC_CALL_PROC_BUSY, &call->flags); - kfree(acks); - - /* because we don't want two CPUs both processing the work item for one - * call at the same time, we use a flag to note when it's busy; however - * this means there's a race between clearing the flag and setting the - * work pending bit and the work item being processed again */ - if (call->events && !work_pending(&call->processor)) { - _debug("jumpstart %x", call->conn->proto.cid); - rxrpc_queue_call(call); + if (call->events && call->state < RXRPC_CALL_COMPLETE) { + __rxrpc_queue_call(call); + goto out; } +out_put: + rxrpc_put_call(call, rxrpc_call_put); +out: _leave(""); - return; - -no_mem: - _debug("out of memory"); - goto maybe_reschedule; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index ae057e0740f3..364b42dc3dce 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -19,23 +19,13 @@ #include #include "ar-internal.h" -/* - * Maximum lifetime of a call (in jiffies). - */ -unsigned int rxrpc_max_call_lifetime = 60 * HZ; - -/* - * Time till dead call expires after last use (in jiffies). - */ -unsigned int rxrpc_dead_call_expiry = 2 * HZ; - const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { - [RXRPC_CALL_UNINITIALISED] = "Uninit", + [RXRPC_CALL_UNINITIALISED] = "Uninit ", [RXRPC_CALL_CLIENT_AWAIT_CONN] = "ClWtConn", [RXRPC_CALL_CLIENT_SEND_REQUEST] = "ClSndReq", [RXRPC_CALL_CLIENT_AWAIT_REPLY] = "ClAwtRpl", [RXRPC_CALL_CLIENT_RECV_REPLY] = "ClRcvRpl", - [RXRPC_CALL_CLIENT_FINAL_ACK] = "ClFnlACK", + [RXRPC_CALL_SERVER_PREALLOC] = "SvPrealc", [RXRPC_CALL_SERVER_SECURING] = "SvSecure", [RXRPC_CALL_SERVER_ACCEPTING] = "SvAccept", [RXRPC_CALL_SERVER_RECV_REQUEST] = "SvRcvReq", @@ -43,22 +33,47 @@ const char *const rxrpc_call_states[NR__RXRPC_CALL_STATES] = { [RXRPC_CALL_SERVER_SEND_REPLY] = "SvSndRpl", [RXRPC_CALL_SERVER_AWAIT_ACK] = "SvAwtACK", [RXRPC_CALL_COMPLETE] = "Complete", - [RXRPC_CALL_SERVER_BUSY] = "SvBusy ", +}; + +const char *const rxrpc_call_completions[NR__RXRPC_CALL_COMPLETIONS] = { + [RXRPC_CALL_SUCCEEDED] = "Complete", [RXRPC_CALL_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CALL_LOCALLY_ABORTED] = "LocAbort", + [RXRPC_CALL_LOCAL_ERROR] = "LocError", [RXRPC_CALL_NETWORK_ERROR] = "NetError", - [RXRPC_CALL_DEAD] = "Dead ", +}; + +const char rxrpc_call_traces[rxrpc_call__nr_trace][4] = { + [rxrpc_call_new_client] = "NWc", + [rxrpc_call_new_service] = "NWs", + [rxrpc_call_queued] = "QUE", + [rxrpc_call_queued_ref] = "QUR", + [rxrpc_call_connected] = "CON", + [rxrpc_call_release] = "RLS", + [rxrpc_call_seen] = "SEE", + [rxrpc_call_got] = "GOT", + [rxrpc_call_got_userid] = "Gus", + [rxrpc_call_got_kernel] = "Gke", + [rxrpc_call_put] = "PUT", + [rxrpc_call_put_userid] = "Pus", + [rxrpc_call_put_kernel] = "Pke", + [rxrpc_call_put_noqueue] = "PNQ", + [rxrpc_call_error] = "*E*", }; struct kmem_cache *rxrpc_call_jar; LIST_HEAD(rxrpc_calls); DEFINE_RWLOCK(rxrpc_call_lock); -static void rxrpc_destroy_call(struct work_struct *work); -static void rxrpc_call_life_expired(unsigned long _call); -static void rxrpc_dead_call_expired(unsigned long _call); -static void rxrpc_ack_time_expired(unsigned long _call); -static void rxrpc_resend_time_expired(unsigned long _call); +static void rxrpc_call_timer_expired(unsigned long _call) +{ + struct rxrpc_call *call = (struct rxrpc_call *)_call; + + _enter("%d", call->debug_id); + + if (call->state < RXRPC_CALL_COMPLETE) + rxrpc_set_timer(call, rxrpc_timer_expired, ktime_get_real()); +} /* * find an extant server call @@ -91,7 +106,7 @@ struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *rx, return NULL; found_extant_call: - rxrpc_get_call(call); + rxrpc_get_call(call, rxrpc_call_got); read_unlock(&rx->call_lock); _leave(" = %p [%d]", call, atomic_read(&call->usage)); return call; @@ -100,7 +115,7 @@ found_extant_call: /* * allocate a new call */ -static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) +struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) { struct rxrpc_call *call; @@ -108,29 +123,25 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) if (!call) return NULL; - call->acks_winsz = 16; - call->acks_window = kmalloc(call->acks_winsz * sizeof(unsigned long), + call->rxtx_buffer = kcalloc(RXRPC_RXTX_BUFF_SIZE, + sizeof(struct sk_buff *), gfp); - if (!call->acks_window) { - kmem_cache_free(rxrpc_call_jar, call); - return NULL; - } + if (!call->rxtx_buffer) + goto nomem; - setup_timer(&call->lifetimer, &rxrpc_call_life_expired, - (unsigned long) call); - setup_timer(&call->deadspan, &rxrpc_dead_call_expired, - (unsigned long) call); - setup_timer(&call->ack_timer, &rxrpc_ack_time_expired, - (unsigned long) call); - setup_timer(&call->resend_timer, &rxrpc_resend_time_expired, - (unsigned long) call); - INIT_WORK(&call->destroyer, &rxrpc_destroy_call); + call->rxtx_annotations = kcalloc(RXRPC_RXTX_BUFF_SIZE, sizeof(u8), gfp); + if (!call->rxtx_annotations) + goto nomem_2; + + setup_timer(&call->timer, rxrpc_call_timer_expired, + (unsigned long)call); INIT_WORK(&call->processor, &rxrpc_process_call); INIT_LIST_HEAD(&call->link); + INIT_LIST_HEAD(&call->chan_wait_link); INIT_LIST_HEAD(&call->accept_link); - skb_queue_head_init(&call->rx_queue); - skb_queue_head_init(&call->rx_oos_queue); - init_waitqueue_head(&call->tx_waitq); + INIT_LIST_HEAD(&call->recvmsg_link); + INIT_LIST_HEAD(&call->sock_link); + init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); @@ -138,70 +149,65 @@ static struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) memset(&call->sock_node, 0xed, sizeof(call->sock_node)); - call->rx_data_expect = 1; - call->rx_data_eaten = 0; - call->rx_first_oos = 0; - call->ackr_win_top = call->rx_data_eaten + 1 + rxrpc_rx_window_size; - call->creation_jif = jiffies; + /* Leave space in the ring to handle a maxed-out jumbo packet */ + call->rx_winsize = rxrpc_rx_window_size; + call->tx_winsize = 16; + call->rx_expect_next = 1; + + if (RXRPC_TX_SMSS > 2190) + call->cong_cwnd = 2; + else if (RXRPC_TX_SMSS > 1095) + call->cong_cwnd = 3; + else + call->cong_cwnd = 4; + call->cong_ssthresh = RXRPC_RXTX_BUFF_SIZE - 1; return call; + +nomem_2: + kfree(call->rxtx_buffer); +nomem: + kmem_cache_free(rxrpc_call_jar, call); + return NULL; } /* * Allocate a new client call. */ -static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx, - struct sockaddr_rxrpc *srx, +static struct rxrpc_call *rxrpc_alloc_client_call(struct sockaddr_rxrpc *srx, gfp_t gfp) { struct rxrpc_call *call; + ktime_t now; _enter(""); - ASSERT(rx->local != NULL); - call = rxrpc_alloc_call(gfp); if (!call) return ERR_PTR(-ENOMEM); call->state = RXRPC_CALL_CLIENT_AWAIT_CONN; - - sock_hold(&rx->sk); - call->socket = rx; - call->rx_data_post = 1; - - call->local = rx->local; call->service_id = srx->srx_service; - call->in_clientflag = 0; + call->tx_phase = true; + now = ktime_get_real(); + call->acks_latest_ts = now; + call->cong_tstamp = now; _leave(" = %p", call); return call; } /* - * Begin client call. + * Initiate the call ack/resend/expiry timer. */ -static int rxrpc_begin_client_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static void rxrpc_start_call_timer(struct rxrpc_call *call) { - int ret; + ktime_t now = ktime_get_real(), expire_at; - /* Set up or get a connection record and set the protocol parameters, - * including channel number and call ID. - */ - ret = rxrpc_connect_call(call, cp, srx, gfp); - if (ret < 0) - return ret; - - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; - - spin_lock(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, &call->conn->params.peer->error_targets); - spin_unlock(&call->conn->params.peer->lock); - - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - return 0; + expire_at = ktime_add_ms(now, rxrpc_max_call_lifetime); + call->expire_at = expire_at; + call->ack_at = expire_at; + call->resend_at = expire_at; + call->timer.expires = jiffies + LONG_MAX / 2; + rxrpc_set_timer(call, rxrpc_timer_begin, now); } /* @@ -216,20 +222,21 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, { struct rxrpc_call *call, *xcall; struct rb_node *parent, **pp; + const void *here = __builtin_return_address(0); int ret; _enter("%p,%lx", rx, user_call_ID); - call = rxrpc_alloc_client_call(rx, srx, gfp); + call = rxrpc_alloc_client_call(srx, gfp); if (IS_ERR(call)) { _leave(" = %ld", PTR_ERR(call)); return call; } - /* Publish the call, even though it is incompletely set up as yet */ - call->user_call_ID = user_call_ID; - __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + trace_rxrpc_call(call, rxrpc_call_new_client, atomic_read(&call->usage), + here, (const void *)user_call_ID); + /* Publish the call, even though it is incompletely set up as yet */ write_lock(&rx->call_lock); pp = &rx->calls.rb_node; @@ -243,369 +250,285 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, else if (user_call_ID > xcall->user_call_ID) pp = &(*pp)->rb_right; else - goto found_user_ID_now_present; + goto error_dup_user_ID; } - rxrpc_get_call(call); - + rcu_assign_pointer(call->socket, rx); + call->user_call_ID = user_call_ID; + __set_bit(RXRPC_CALL_HAS_USERID, &call->flags); + rxrpc_get_call(call, rxrpc_call_got_userid); rb_link_node(&call->sock_node, parent, pp); rb_insert_color(&call->sock_node, &rx->calls); + list_add(&call->sock_link, &rx->sock_calls); + write_unlock(&rx->call_lock); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); - ret = rxrpc_begin_client_call(call, cp, srx, gfp); + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); if (ret < 0) goto error; + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, ERR_PTR(ret)); + + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); + + rxrpc_start_call_timer(call); + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); _leave(" = %p [new]", call); return call; -error: - write_lock(&rx->call_lock); - rb_erase(&call->sock_node, &rx->calls); - write_unlock(&rx->call_lock); - rxrpc_put_call(call); - - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); - - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); - _leave(" = %d", ret); - return ERR_PTR(ret); - /* We unexpectedly found the user ID in the list after taking * the call_lock. This shouldn't happen unless the user races * with itself and tries to add the same user ID twice at the * same time in different threads. */ -found_user_ID_now_present: +error_dup_user_ID: write_unlock(&rx->call_lock); - set_bit(RXRPC_CALL_RELEASED, &call->flags); - call->state = RXRPC_CALL_DEAD; - rxrpc_put_call(call); - _leave(" = -EEXIST [%p]", call); - return ERR_PTR(-EEXIST); + ret = -EEXIST; + +error: + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ERR_PTR(ret); } /* - * set up an incoming call - * - called in process context with IRQs enabled + * Set up an incoming call. call->conn points to the connection. + * This is called in BH context and isn't allowed to fail. */ -struct rxrpc_call *rxrpc_incoming_call(struct rxrpc_sock *rx, - struct rxrpc_connection *conn, - struct sk_buff *skb) +void rxrpc_incoming_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct sk_buff *skb) { + struct rxrpc_connection *conn = call->conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call, *candidate; - u32 call_id, chan; + u32 chan; - _enter(",%d", conn->debug_id); + _enter(",%d", call->conn->debug_id); - ASSERT(rx != NULL); + rcu_assign_pointer(call->socket, rx); + call->call_id = sp->hdr.callNumber; + call->service_id = sp->hdr.serviceId; + call->cid = sp->hdr.cid; + call->state = RXRPC_CALL_SERVER_ACCEPTING; + if (sp->hdr.securityIndex > 0) + call->state = RXRPC_CALL_SERVER_SECURING; + call->cong_tstamp = skb->tstamp; - candidate = rxrpc_alloc_call(GFP_NOIO); - if (!candidate) - return ERR_PTR(-EBUSY); - - chan = sp->hdr.cid & RXRPC_CHANNELMASK; - candidate->socket = rx; - candidate->conn = conn; - candidate->cid = sp->hdr.cid; - candidate->call_id = sp->hdr.callNumber; - candidate->channel = chan; - candidate->rx_data_post = 0; - candidate->state = RXRPC_CALL_SERVER_ACCEPTING; - if (conn->security_ix > 0) - candidate->state = RXRPC_CALL_SERVER_SECURING; - - spin_lock(&conn->channel_lock); - - /* set the channel for this call */ - call = rcu_dereference_protected(conn->channels[chan].call, - lockdep_is_held(&conn->channel_lock)); - - _debug("channel[%u] is %p", candidate->channel, call); - if (call && call->call_id == sp->hdr.callNumber) { - /* already set; must've been a duplicate packet */ - _debug("extant call [%d]", call->state); - ASSERTCMP(call->conn, ==, conn); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - rxrpc_queue_call(call); - case RXRPC_CALL_REMOTELY_ABORTED: - read_unlock(&call->state_lock); - goto aborted_call; - default: - rxrpc_get_call(call); - read_unlock(&call->state_lock); - goto extant_call; - } - } - - if (call) { - /* it seems the channel is still in use from the previous call - * - ditch the old binding if its call is now complete */ - _debug("CALL: %u { %s }", - call->debug_id, rxrpc_call_states[call->state]); - - if (call->state >= RXRPC_CALL_COMPLETE) { - __rxrpc_disconnect_call(call); - } else { - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -EBUSY"); - return ERR_PTR(-EBUSY); - } - } - - /* check the call number isn't duplicate */ - _debug("check dup"); - call_id = sp->hdr.callNumber; - - /* We just ignore calls prior to the current call ID. Terminated calls - * are handled via the connection. + /* Set the channel for this call. We don't get channel_lock as we're + * only defending against the data_ready handler (which we're called + * from) and the RESPONSE packet parser (which is only really + * interested in call_counter and can cope with a disagreement with the + * call pointer). */ - if (call_id <= conn->channels[chan].call_counter) - goto old_call; /* TODO: Just drop packet */ - - /* make the call available */ - _debug("new call"); - call = candidate; - candidate = NULL; - conn->channels[chan].call_counter = call_id; + chan = sp->hdr.cid & RXRPC_CHANNELMASK; + conn->channels[chan].call_counter = call->call_id; + conn->channels[chan].call_id = call->call_id; rcu_assign_pointer(conn->channels[chan].call, call); - sock_hold(&rx->sk); - rxrpc_get_connection(conn); - spin_unlock(&conn->channel_lock); spin_lock(&conn->params.peer->lock); hlist_add_head(&call->error_link, &conn->params.peer->error_targets); spin_unlock(&conn->params.peer->lock); - write_lock_bh(&rxrpc_call_lock); - list_add_tail(&call->link, &rxrpc_calls); - write_unlock_bh(&rxrpc_call_lock); - - call->local = conn->params.local; - call->epoch = conn->proto.epoch; - call->service_id = conn->params.service_id; - call->in_clientflag = RXRPC_CLIENT_INITIATED; - _net("CALL incoming %d on CONN %d", call->debug_id, call->conn->debug_id); - call->lifetimer.expires = jiffies + rxrpc_max_call_lifetime; - add_timer(&call->lifetimer); - _leave(" = %p {%d} [new]", call, call->debug_id); - return call; - -extant_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = %p {%d} [extant]", call, call ? call->debug_id : -1); - return call; - -aborted_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNABORTED"); - return ERR_PTR(-ECONNABORTED); - -old_call: - spin_unlock(&conn->channel_lock); - kmem_cache_free(rxrpc_call_jar, candidate); - _leave(" = -ECONNRESET [old]"); - return ERR_PTR(-ECONNRESET); + rxrpc_start_call_timer(call); + _leave(""); } /* - * detach a call from a socket and set up for release + * Queue a call's work processor, getting a ref to pass to the work queue. */ -void rxrpc_release_call(struct rxrpc_call *call) +bool rxrpc_queue_call(struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; - struct rxrpc_sock *rx = call->socket; + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&call->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued, n + 1, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} - _enter("{%d,%d,%d,%d}", - call->debug_id, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - call->rx_first_oos); +/* + * Queue a call's work processor, passing the callers ref to the work queue. + */ +bool __rxrpc_queue_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int n = atomic_read(&call->usage); + ASSERTCMP(n, >=, 1); + if (rxrpc_queue_work(&call->processor)) + trace_rxrpc_call(call, rxrpc_call_queued_ref, n, here, NULL); + else + rxrpc_put_call(call, rxrpc_call_put_noqueue); + return true; +} + +/* + * Note the re-emergence of a call. + */ +void rxrpc_see_call(struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + if (call) { + int n = atomic_read(&call->usage); + + trace_rxrpc_call(call, rxrpc_call_seen, n, here, NULL); + } +} + +/* + * Note the addition of a ref on a call. + */ +void rxrpc_get_call(struct rxrpc_call *call, enum rxrpc_call_trace op) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&call->usage); + + trace_rxrpc_call(call, op, n, here, NULL); +} + +/* + * Detach a call from its owning socket. + */ +void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + struct rxrpc_connection *conn = call->conn; + bool put = false; + int i; + + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); spin_lock_bh(&call->lock); if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); spin_unlock_bh(&call->lock); - /* dissociate from the socket - * - the socket's ref on the call is passed to the death timer - */ - _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + del_timer_sync(&call->timer); - spin_lock(&conn->params.peer->lock); - hlist_del_init(&call->error_link); - spin_unlock(&conn->params.peer->lock); + /* Make sure we don't get any more notifications */ + write_lock_bh(&rx->recvmsg_lock); - write_lock_bh(&rx->call_lock); - if (!list_empty(&call->accept_link)) { + if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", call, call->events, call->flags); - ASSERT(!test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - list_del_init(&call->accept_link); - sk_acceptq_removed(&rx->sk); - } else if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + list_del(&call->recvmsg_link); + put = true; + } + + /* list_empty() must return false in rxrpc_notify_socket() */ + call->recvmsg_link.next = NULL; + call->recvmsg_link.prev = NULL; + + write_unlock_bh(&rx->recvmsg_lock); + if (put) + rxrpc_put_call(call, rxrpc_call_put); + + write_lock(&rx->call_lock); + + if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { rb_erase(&call->sock_node, &rx->calls); memset(&call->sock_node, 0xdd, sizeof(call->sock_node)); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); - } - write_unlock_bh(&rx->call_lock); - - /* free up the channel for reuse */ - write_lock_bh(&call->state_lock); - - if (call->state < RXRPC_CALL_COMPLETE && - call->state != RXRPC_CALL_CLIENT_FINAL_ACK) { - _debug("+++ ABORTING STATE %d +++\n", call->state); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; - } - write_unlock_bh(&call->state_lock); - - rxrpc_disconnect_call(call); - - /* clean up the Rx queue */ - if (!skb_queue_empty(&call->rx_queue) || - !skb_queue_empty(&call->rx_oos_queue)) { - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - - _debug("purge Rx queues"); - - spin_lock_bh(&call->lock); - while ((skb = skb_dequeue(&call->rx_queue)) || - (skb = skb_dequeue(&call->rx_oos_queue))) { - spin_unlock_bh(&call->lock); - - sp = rxrpc_skb(skb); - _debug("- zap %s %%%u #%u", - rxrpc_pkts[sp->hdr.type], - sp->hdr.serial, sp->hdr.seq); - rxrpc_free_skb(skb); - spin_lock_bh(&call->lock); - } - spin_unlock_bh(&call->lock); - - ASSERTCMP(call->state, !=, RXRPC_CALL_COMPLETE); + rxrpc_put_call(call, rxrpc_call_put_userid); } - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->lifetimer); - call->deadspan.expires = jiffies + rxrpc_dead_call_expiry; - add_timer(&call->deadspan); + list_del(&call->sock_link); + write_unlock(&rx->call_lock); + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn); + + if (conn) + rxrpc_disconnect_call(call); + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); + call->rxtx_buffer[i] = NULL; + } _leave(""); } -/* - * handle a dead call being ready for reaping - */ -static void rxrpc_dead_call_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_DEAD; - write_unlock_bh(&call->state_lock); - rxrpc_put_call(call); -} - -/* - * mark a call as to be released, aborting it if it's still in progress - * - called with softirqs disabled - */ -static void rxrpc_mark_call_released(struct rxrpc_call *call) -{ - bool sched; - - write_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - sched = false; - if (call->state < RXRPC_CALL_COMPLETE) { - _debug("abort call %p", call); - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_CALL_DEAD; - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) - sched = true; - } - if (!test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - sched = true; - if (sched) - rxrpc_queue_call(call); - } - write_unlock(&call->state_lock); -} - /* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) { struct rxrpc_call *call; - struct rb_node *p; _enter("%p", rx); - read_lock_bh(&rx->call_lock); - - /* mark all the calls as no longer wanting incoming packets */ - for (p = rb_first(&rx->calls); p; p = rb_next(p)) { - call = rb_entry(p, struct rxrpc_call, sock_node); - rxrpc_mark_call_released(call); + while (!list_empty(&rx->to_be_accepted)) { + call = list_entry(rx->to_be_accepted.next, + struct rxrpc_call, accept_link); + list_del(&call->accept_link); + rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_put_call(call, rxrpc_call_put); } - /* kill the not-yet-accepted incoming calls */ - list_for_each_entry(call, &rx->secureq, accept_link) { - rxrpc_mark_call_released(call); + while (!list_empty(&rx->sock_calls)) { + call = list_entry(rx->sock_calls.next, + struct rxrpc_call, sock_link); + rxrpc_get_call(call, rxrpc_call_got); + rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, ECONNRESET); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + rxrpc_release_call(rx, call); + rxrpc_put_call(call, rxrpc_call_put); } - list_for_each_entry(call, &rx->acceptq, accept_link) { - rxrpc_mark_call_released(call); - } - - read_unlock_bh(&rx->call_lock); _leave(""); } /* * release a call */ -void __rxrpc_put_call(struct rxrpc_call *call) +void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace op) { + const void *here = __builtin_return_address(0); + int n; + ASSERT(call != NULL); - _enter("%p{u=%d}", call, atomic_read(&call->usage)); - - ASSERTCMP(atomic_read(&call->usage), >, 0); - - if (atomic_dec_and_test(&call->usage)) { + n = atomic_dec_return(&call->usage); + trace_rxrpc_call(call, op, n, here, NULL); + ASSERTCMP(n, >=, 0); + if (n == 0) { _debug("call %d dead", call->debug_id); - WARN_ON(atomic_read(&call->skb_count) != 0); - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - rxrpc_queue_work(&call->destroyer); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + + write_lock(&rxrpc_call_lock); + list_del_init(&call->link); + write_unlock(&rxrpc_call_lock); + + rxrpc_cleanup_call(call); } - _leave(""); } /* @@ -615,187 +538,70 @@ static void rxrpc_rcu_destroy_call(struct rcu_head *rcu) { struct rxrpc_call *call = container_of(rcu, struct rxrpc_call, rcu); - rxrpc_purge_queue(&call->rx_queue); + rxrpc_put_peer(call->peer); + kfree(call->rxtx_buffer); + kfree(call->rxtx_annotations); kmem_cache_free(rxrpc_call_jar, call); } /* * clean up a call */ -static void rxrpc_cleanup_call(struct rxrpc_call *call) +void rxrpc_cleanup_call(struct rxrpc_call *call) { - _net("DESTROY CALL %d", call->debug_id); + int i; - ASSERT(call->socket); + _net("DESTROY CALL %d", call->debug_id); memset(&call->sock_node, 0xcd, sizeof(call->sock_node)); - del_timer_sync(&call->lifetimer); - del_timer_sync(&call->deadspan); - del_timer_sync(&call->ack_timer); - del_timer_sync(&call->resend_timer); + del_timer_sync(&call->timer); + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags)); - ASSERTCMP(call->events, ==, 0); - if (work_pending(&call->processor)) { - _debug("defer destroy"); - rxrpc_queue_work(&call->destroyer); - return; - } - ASSERTCMP(call->conn, ==, NULL); - if (call->acks_window) { - _debug("kill Tx window %d", - CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz)); - smp_mb(); - while (CIRC_CNT(call->acks_head, call->acks_tail, - call->acks_winsz) > 0) { - struct rxrpc_skb_priv *sp; - unsigned long _skb; + /* Clean up the Rx/Tx buffer */ + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) + rxrpc_free_skb(call->rxtx_buffer[i], + (call->tx_phase ? rxrpc_skb_tx_cleaned : + rxrpc_skb_rx_cleaned)); - _skb = call->acks_window[call->acks_tail] & ~1; - sp = rxrpc_skb((struct sk_buff *)_skb); - _debug("+++ clear Tx %u", sp->hdr.seq); - rxrpc_free_skb((struct sk_buff *)_skb); - call->acks_tail = - (call->acks_tail + 1) & (call->acks_winsz - 1); - } + rxrpc_free_skb(call->tx_pending, rxrpc_skb_tx_cleaned); - kfree(call->acks_window); - } - - rxrpc_free_skb(call->tx_pending); - - rxrpc_purge_queue(&call->rx_queue); - ASSERT(skb_queue_empty(&call->rx_oos_queue)); - sock_put(&call->socket->sk); call_rcu(&call->rcu, rxrpc_rcu_destroy_call); } /* - * destroy a call - */ -static void rxrpc_destroy_call(struct work_struct *work) -{ - struct rxrpc_call *call = - container_of(work, struct rxrpc_call, destroyer); - - _enter("%p{%d,%d,%p}", - call, atomic_read(&call->usage), call->channel, call->conn); - - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - - write_lock_bh(&rxrpc_call_lock); - list_del_init(&call->link); - write_unlock_bh(&rxrpc_call_lock); - - rxrpc_cleanup_call(call); - _leave(""); -} - -/* - * preemptively destroy all the call records from a transport endpoint rather - * than waiting for them to time out + * Make sure that all calls are gone. */ void __exit rxrpc_destroy_all_calls(void) { struct rxrpc_call *call; _enter(""); - write_lock_bh(&rxrpc_call_lock); + + if (list_empty(&rxrpc_calls)) + return; + + write_lock(&rxrpc_call_lock); while (!list_empty(&rxrpc_calls)) { call = list_entry(rxrpc_calls.next, struct rxrpc_call, link); _debug("Zapping call %p", call); + rxrpc_see_call(call); list_del_init(&call->link); - switch (atomic_read(&call->usage)) { - case 0: - ASSERTCMP(call->state, ==, RXRPC_CALL_DEAD); - break; - case 1: - if (del_timer_sync(&call->deadspan) != 0 && - call->state != RXRPC_CALL_DEAD) - rxrpc_dead_call_expired((unsigned long) call); - if (call->state != RXRPC_CALL_DEAD) - break; - default: - pr_err("Call %p still in use (%d,%d,%s,%lx,%lx)!\n", - call, atomic_read(&call->usage), - atomic_read(&call->ackr_not_idle), - rxrpc_call_states[call->state], - call->flags, call->events); - if (!skb_queue_empty(&call->rx_queue)) - pr_err("Rx queue occupied\n"); - if (!skb_queue_empty(&call->rx_oos_queue)) - pr_err("OOS queue occupied\n"); - break; - } + pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n", + call, atomic_read(&call->usage), + rxrpc_call_states[call->state], + call->flags, call->events); - write_unlock_bh(&rxrpc_call_lock); + write_unlock(&rxrpc_call_lock); cond_resched(); - write_lock_bh(&rxrpc_call_lock); + write_lock(&rxrpc_call_lock); } - write_unlock_bh(&rxrpc_call_lock); - _leave(""); -} - -/* - * handle call lifetime being exceeded - */ -static void rxrpc_call_life_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - _enter("{%d}", call->debug_id); - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - set_bit(RXRPC_CALL_EV_LIFE_TIMER, &call->events); - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); -} - -/* - * handle resend timer expiry - * - may not take call->state_lock as this can deadlock against del_timer_sync() - */ -static void rxrpc_resend_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (!test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); -} - -/* - * handle ACK timer expiry - */ -static void rxrpc_ack_time_expired(unsigned long _call) -{ - struct rxrpc_call *call = (struct rxrpc_call *) _call; - - _enter("{%d}", call->debug_id); - - if (call->state >= RXRPC_CALL_COMPLETE) - return; - - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_ACK, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + write_unlock(&rxrpc_call_lock); } diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 9e91f27b0d0f..60ef9605167e 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -7,6 +7,68 @@ * modify it under the terms of the GNU General Public Licence * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. + * + * + * Client connections need to be cached for a little while after they've made a + * call so as to handle retransmitted DATA packets in case the server didn't + * receive the final ACK or terminating ABORT we sent it. + * + * Client connections can be in one of a number of cache states: + * + * (1) INACTIVE - The connection is not held in any list and may not have been + * exposed to the world. If it has been previously exposed, it was + * discarded from the idle list after expiring. + * + * (2) WAITING - The connection is waiting for the number of client conns to + * drop below the maximum capacity. Calls may be in progress upon it from + * when it was active and got culled. + * + * The connection is on the rxrpc_waiting_client_conns list which is kept + * in to-be-granted order. Culled conns with waiters go to the back of + * the queue just like new conns. + * + * (3) ACTIVE - The connection has at least one call in progress upon it, it + * may freely grant available channels to new calls and calls may be + * waiting on it for channels to become available. + * + * The connection is on the rxrpc_active_client_conns list which is kept + * in activation order for culling purposes. + * + * rxrpc_nr_active_client_conns is held incremented also. + * + * (4) CULLED - The connection got summarily culled to try and free up + * capacity. Calls currently in progress on the connection are allowed to + * continue, but new calls will have to wait. There can be no waiters in + * this state - the conn would have to go to the WAITING state instead. + * + * (5) IDLE - The connection has no calls in progress upon it and must have + * been exposed to the world (ie. the EXPOSED flag must be set). When it + * expires, the EXPOSED flag is cleared and the connection transitions to + * the INACTIVE state. + * + * The connection is on the rxrpc_idle_client_conns list which is kept in + * order of how soon they'll expire. + * + * There are flags of relevance to the cache: + * + * (1) EXPOSED - The connection ID got exposed to the world. If this flag is + * set, an extra ref is added to the connection preventing it from being + * reaped when it has no calls outstanding. This flag is cleared and the + * ref dropped when a conn is discarded from the idle list. + * + * This allows us to move terminal call state retransmission to the + * connection and to discard the call immediately we think it is done + * with. It also give us a chance to reuse the connection. + * + * (2) DONT_REUSE - The connection should be discarded as soon as possible and + * should not be reused. This is set when an exclusive connection is used + * or a call ID counter overflows. + * + * The caching state may only be changed if the cache lock is held. + * + * There are two idle client connection expiry durations. If the total number + * of connections is below the reap threshold, we use the normal duration; if + * it's above, we use the fast duration. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -16,27 +78,50 @@ #include #include "ar-internal.h" +__read_mostly unsigned int rxrpc_max_client_connections = 1000; +__read_mostly unsigned int rxrpc_reap_client_connections = 900; +__read_mostly unsigned int rxrpc_conn_idle_client_expiry = 2 * 60 * HZ; +__read_mostly unsigned int rxrpc_conn_idle_client_fast_expiry = 2 * HZ; + +static unsigned int rxrpc_nr_client_conns; +static unsigned int rxrpc_nr_active_client_conns; +static __read_mostly bool rxrpc_kill_all_client_conns; + +static DEFINE_SPINLOCK(rxrpc_client_conn_cache_lock); +static DEFINE_SPINLOCK(rxrpc_client_conn_discard_mutex); +static LIST_HEAD(rxrpc_waiting_client_conns); +static LIST_HEAD(rxrpc_active_client_conns); +static LIST_HEAD(rxrpc_idle_client_conns); + /* * We use machine-unique IDs for our client connections. */ DEFINE_IDR(rxrpc_client_conn_ids); static DEFINE_SPINLOCK(rxrpc_conn_id_lock); +static void rxrpc_cull_active_client_conns(void); +static void rxrpc_discard_expired_client_conns(struct work_struct *); + +static DECLARE_DELAYED_WORK(rxrpc_client_conn_reap, + rxrpc_discard_expired_client_conns); + +const char rxrpc_conn_cache_states[RXRPC_CONN__NR_CACHE_STATES][5] = { + [RXRPC_CONN_CLIENT_INACTIVE] = "Inac", + [RXRPC_CONN_CLIENT_WAITING] = "Wait", + [RXRPC_CONN_CLIENT_ACTIVE] = "Actv", + [RXRPC_CONN_CLIENT_CULLED] = "Cull", + [RXRPC_CONN_CLIENT_IDLE] = "Idle", +}; + /* * Get a connection ID and epoch for a client connection from the global pool. * The connection struct pointer is then recorded in the idr radix tree. The - * epoch is changed if this wraps. - * - * TODO: The IDR tree gets very expensive on memory if the connection IDs are - * widely scattered throughout the number space, so we shall need to retire - * connections that have, say, an ID more than four times the maximum number of - * client conns away from the current allocation point to try and keep the IDs - * concentrated. We will also need to retire connections from an old epoch. + * epoch doesn't change until the client is rebooted (or, at least, unless the + * module is unloaded). */ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, gfp_t gfp) { - u32 epoch; int id; _enter(""); @@ -44,34 +129,18 @@ static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn, idr_preload(gfp); spin_lock(&rxrpc_conn_id_lock); - epoch = rxrpc_epoch; - - /* We could use idr_alloc_cyclic() here, but we really need to know - * when the thing wraps so that we can advance the epoch. - */ - if (rxrpc_client_conn_ids.cur == 0) - rxrpc_client_conn_ids.cur = 1; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - rxrpc_client_conn_ids.cur, 0x40000000, GFP_NOWAIT); - if (id < 0) { - if (id != -ENOSPC) - goto error; - id = idr_alloc(&rxrpc_client_conn_ids, conn, - 1, 0x40000000, GFP_NOWAIT); - if (id < 0) - goto error; - epoch++; - rxrpc_epoch = epoch; - } - rxrpc_client_conn_ids.cur = id + 1; + id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn, + 1, 0x40000000, GFP_NOWAIT); + if (id < 0) + goto error; spin_unlock(&rxrpc_conn_id_lock); idr_preload_end(); - conn->proto.epoch = epoch; + conn->proto.epoch = rxrpc_epoch; conn->proto.cid = id << RXRPC_CIDSHIFT; set_bit(RXRPC_CONN_HAS_IDR, &conn->flags); - _leave(" [CID %x:%x]", epoch, conn->proto.cid); + _leave(" [CID %x]", conn->proto.cid); return 0; error: @@ -114,8 +183,7 @@ void rxrpc_destroy_client_conn_ids(void) } /* - * Allocate a client connection. The caller must take care to clear any - * padding bytes in *cp. + * Allocate a client connection. */ static struct rxrpc_connection * rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) @@ -131,6 +199,10 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) return ERR_PTR(-ENOMEM); } + atomic_set(&conn->usage, 1); + if (cp->exclusive) + __set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + conn->params = *cp; conn->out_clientflag = RXRPC_CLIENT_INITIATED; conn->state = RXRPC_CONN_CLIENT; @@ -148,7 +220,7 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) goto error_2; write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); write_unlock(&rxrpc_connection_lock); /* We steal the caller's peer ref. */ @@ -156,6 +228,9 @@ rxrpc_alloc_client_connection(struct rxrpc_conn_parameters *cp, gfp_t gfp) rxrpc_get_local(conn->params.local); key_get(conn->params.key); + trace_rxrpc_conn(conn, rxrpc_conn_new_client, atomic_read(&conn->usage), + __builtin_return_address(0)); + trace_rxrpc_client(conn, -1, rxrpc_client_alloc); _leave(" = %p", conn); return conn; @@ -170,32 +245,68 @@ error_0: } /* - * find a connection for a call - * - called in process context with IRQs enabled + * Determine if a connection may be reused. */ -int rxrpc_connect_call(struct rxrpc_call *call, - struct rxrpc_conn_parameters *cp, - struct sockaddr_rxrpc *srx, - gfp_t gfp) +static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn) +{ + int id_cursor, id, distance, limit; + + if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags)) + goto dont_reuse; + + if (conn->proto.epoch != rxrpc_epoch) + goto mark_dont_reuse; + + /* The IDR tree gets very expensive on memory if the connection IDs are + * widely scattered throughout the number space, so we shall want to + * kill off connections that, say, have an ID more than about four + * times the maximum number of client conns away from the current + * allocation point to try and keep the IDs concentrated. + */ + id_cursor = READ_ONCE(rxrpc_client_conn_ids.cur); + id = conn->proto.cid >> RXRPC_CIDSHIFT; + distance = id - id_cursor; + if (distance < 0) + distance = -distance; + limit = round_up(rxrpc_max_client_connections, IDR_SIZE) * 4; + if (distance > limit) + goto mark_dont_reuse; + + return true; + +mark_dont_reuse: + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); +dont_reuse: + return false; +} + +/* + * Create or find a client connection to use for a call. + * + * If we return with a connection, the call will be on its waiting list. It's + * left to the caller to assign a channel and wake up the call. + */ +static int rxrpc_get_client_conn(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) { struct rxrpc_connection *conn, *candidate = NULL; struct rxrpc_local *local = cp->local; struct rb_node *p, **pp, *parent; long diff; - int chan; - - DECLARE_WAITQUEUE(myself, current); + int ret = -ENOMEM; _enter("{%d,%lx},", call->debug_id, call->user_call_ID); cp->peer = rxrpc_lookup_peer(cp->local, srx, gfp); if (!cp->peer) - return -ENOMEM; + goto error; + /* If the connection is not meant to be exclusive, search the available + * connections to see if the connection we want to use already exists. + */ if (!cp->exclusive) { - /* Search for a existing client connection unless this is going - * to be a connection that's used exclusively for a single call. - */ _debug("search 1"); spin_lock(&local->client_conns_lock); p = local->client_conns.rb_node; @@ -206,39 +317,56 @@ int rxrpc_connect_call(struct rxrpc_call *call, diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { p = p->rb_left; - else if (diff > 0) + } else if (diff > 0) { p = p->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The connection needs replacing. It's better + * to effect that when we have something to + * replace it with so that we don't have to + * rebalance the tree twice. + */ + break; + } } spin_unlock(&local->client_conns_lock); } - /* We didn't find a connection or we want an exclusive one. */ - _debug("get new conn"); + /* There wasn't a connection yet or we need an exclusive connection. + * We need to create a candidate and then potentially redo the search + * in case we're racing with another thread also trying to connect on a + * shareable connection. + */ + _debug("new conn"); candidate = rxrpc_alloc_client_connection(cp, gfp); - if (!candidate) { - _leave(" = -ENOMEM"); - return -ENOMEM; + if (IS_ERR(candidate)) { + ret = PTR_ERR(candidate); + goto error_peer; } + /* Add the call to the new connection's waiting list in case we're + * going to have to wait for the connection to come live. It's our + * connection, so we want first dibs on the channel slots. We would + * normally have to take channel_lock but we do this before anyone else + * can see the connection. + */ + list_add_tail(&call->chan_wait_link, &candidate->waiting_calls); + if (cp->exclusive) { - /* Assign the call on an exclusive connection to channel 0 and - * don't add the connection to the endpoint's shareable conn - * lookup tree. - */ - _debug("exclusive chan 0"); - conn = candidate; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); - chan = 0; - goto found_channel; + call->conn = candidate; + call->security_ix = candidate->security_ix; + _leave(" = 0 [exclusive %d]", candidate->debug_id); + return 0; } - /* We need to redo the search before attempting to add a new connection - * lest we race with someone else adding a conflicting instance. + /* Publish the new connection for userspace to find. We need to redo + * the search before doing this lest we race with someone else adding a + * conflicting instance. */ _debug("search 2"); spin_lock(&local->client_conns_lock); @@ -249,124 +377,711 @@ int rxrpc_connect_call(struct rxrpc_call *call, parent = *pp; conn = rb_entry(parent, struct rxrpc_connection, client_node); +#define cmp(X) ((long)conn->params.X - (long)candidate->params.X) diff = (cmp(peer) ?: cmp(key) ?: cmp(security_level)); - if (diff < 0) +#undef cmp + if (diff < 0) { pp = &(*pp)->rb_left; - else if (diff > 0) + } else if (diff > 0) { pp = &(*pp)->rb_right; - else - goto found_extant_conn; + } else { + if (rxrpc_may_reuse_conn(conn) && + rxrpc_get_connection_maybe(conn)) + goto found_extant_conn; + /* The old connection is from an outdated epoch. */ + _debug("replace conn"); + clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); + rb_replace_node(&conn->client_node, + &candidate->client_node, + &local->client_conns); + trace_rxrpc_client(conn, -1, rxrpc_client_replace); + goto candidate_published; + } } - /* The second search also failed; simply add the new connection with - * the new call in channel 0. Note that we need to take the channel - * lock before dropping the client conn lock. - */ _debug("new conn"); - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); rb_link_node(&candidate->client_node, parent, pp); rb_insert_color(&candidate->client_node, &local->client_conns); -attached: - conn = candidate; - candidate = NULL; - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS - 1); - spin_lock(&conn->channel_lock); +candidate_published: + set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); + call->conn = candidate; + call->security_ix = candidate->security_ix; spin_unlock(&local->client_conns_lock); - chan = 0; - -found_channel: - _debug("found chan"); - call->conn = conn; - call->channel = chan; - call->epoch = conn->proto.epoch; - call->cid = conn->proto.cid | chan; - call->call_id = ++conn->channels[chan].call_counter; - conn->channels[chan].call_id = call->call_id; - rcu_assign_pointer(conn->channels[chan].call, call); - - _net("CONNECT call %d on conn %d", call->debug_id, conn->debug_id); - - spin_unlock(&conn->channel_lock); - rxrpc_put_peer(cp->peer); - cp->peer = NULL; - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); + _leave(" = 0 [new %d]", candidate->debug_id); return 0; - /* We found a potentially suitable connection already in existence. If - * we can reuse it (ie. its usage count hasn't been reduced to 0 by the - * reaper), discard any candidate we may have allocated, and try to get - * a channel on this one, otherwise we have to replace it. + /* We come here if we found a suitable connection already in existence. + * Discard any candidate we may have allocated, and try to get a + * channel on this one. */ found_extant_conn: _debug("found conn"); - if (!rxrpc_get_connection_maybe(conn)) { - set_bit(RXRPC_CONN_IN_CLIENT_CONNS, &candidate->flags); - rb_replace_node(&conn->client_node, - &candidate->client_node, - &local->client_conns); - clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags); - goto attached; - } - spin_unlock(&local->client_conns_lock); - rxrpc_put_connection(candidate); - - if (!atomic_add_unless(&conn->avail_chans, -1, 0)) { - if (!gfpflags_allow_blocking(gfp)) { - rxrpc_put_connection(conn); - _leave(" = -EAGAIN"); - return -EAGAIN; - } - - add_wait_queue(&conn->channel_wq, &myself); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - if (atomic_add_unless(&conn->avail_chans, -1, 0)) - break; - if (signal_pending(current)) - goto interrupted; - schedule(); - } - remove_wait_queue(&conn->channel_wq, &myself); - __set_current_state(TASK_RUNNING); + if (candidate) { + trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate); + rxrpc_put_connection(candidate); + candidate = NULL; } - /* The connection allegedly now has a free channel and we can now - * attach the call to it. - */ spin_lock(&conn->channel_lock); + call->conn = conn; + call->security_ix = conn->security_ix; + list_add(&call->chan_wait_link, &conn->waiting_calls); + spin_unlock(&conn->channel_lock); + _leave(" = 0 [extant %d]", conn->debug_id); + return 0; - for (chan = 0; chan < RXRPC_MAXCALLS; chan++) - if (!conn->channels[chan].call) - goto found_channel; - BUG(); - -interrupted: - remove_wait_queue(&conn->channel_wq, &myself); - __set_current_state(TASK_RUNNING); - rxrpc_put_connection(conn); +error_peer: rxrpc_put_peer(cp->peer); cp->peer = NULL; - _leave(" = -ERESTARTSYS"); - return -ERESTARTSYS; +error: + _leave(" = %d", ret); + return ret; } /* - * Remove a client connection from the local endpoint's tree, thereby removing - * it as a target for reuse for new client calls. + * Activate a connection. */ -void rxrpc_unpublish_client_conn(struct rxrpc_connection *conn) +static void rxrpc_activate_conn(struct rxrpc_connection *conn) { - struct rxrpc_local *local = conn->params.local; + trace_rxrpc_client(conn, -1, rxrpc_client_to_active); + conn->cache_state = RXRPC_CONN_CLIENT_ACTIVE; + rxrpc_nr_active_client_conns++; + list_move_tail(&conn->cache_link, &rxrpc_active_client_conns); +} - spin_lock(&local->client_conns_lock); - if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) - rb_erase(&conn->client_node, &local->client_conns); - spin_unlock(&local->client_conns_lock); +/* + * Attempt to animate a connection for a new call. + * + * If it's not exclusive, the connection is in the endpoint tree, and we're in + * the conn's list of those waiting to grab a channel. There is, however, a + * limit on the number of live connections allowed at any one time, so we may + * have to wait for capacity to become available. + * + * Note that a connection on the waiting queue might *also* have active + * channels if it has been culled to make space and then re-requested by a new + * call. + */ +static void rxrpc_animate_client_conn(struct rxrpc_connection *conn) +{ + unsigned int nr_conns; + + _enter("%d,%d", conn->debug_id, conn->cache_state); + + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE) + goto out; + + spin_lock(&rxrpc_client_conn_cache_lock); + + nr_conns = rxrpc_nr_client_conns; + if (!test_and_set_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_count); + rxrpc_nr_client_conns = nr_conns + 1; + } + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + case RXRPC_CONN_CLIENT_WAITING: + break; + + case RXRPC_CONN_CLIENT_INACTIVE: + case RXRPC_CONN_CLIENT_CULLED: + case RXRPC_CONN_CLIENT_IDLE: + if (nr_conns >= rxrpc_max_client_connections) + goto wait_for_capacity; + goto activate_conn; + + default: + BUG(); + } + +out_unlock: + spin_unlock(&rxrpc_client_conn_cache_lock); +out: + _leave(" [%d]", conn->cache_state); + return; + +activate_conn: + _debug("activate"); + rxrpc_activate_conn(conn); + goto out_unlock; + +wait_for_capacity: + _debug("wait"); + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, &rxrpc_waiting_client_conns); + goto out_unlock; +} + +/* + * Deactivate a channel. + */ +static void rxrpc_deactivate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + + rcu_assign_pointer(chan->call, NULL); + conn->active_chans &= ~(1 << channel); +} + +/* + * Assign a channel to the call at the front of the queue and wake the call up. + * We don't increment the callNumber counter until this number has been exposed + * to the world. + */ +static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, + unsigned int channel) +{ + struct rxrpc_channel *chan = &conn->channels[channel]; + struct rxrpc_call *call = list_entry(conn->waiting_calls.next, + struct rxrpc_call, chan_wait_link); + u32 call_id = chan->call_counter + 1; + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); + + write_lock_bh(&call->state_lock); + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + write_unlock_bh(&call->state_lock); + + rxrpc_see_call(call); + list_del_init(&call->chan_wait_link); + conn->active_chans |= 1 << channel; + call->peer = rxrpc_get_peer(conn->params.peer); + call->cid = conn->proto.cid | channel; + call->call_id = call_id; + + _net("CONNECT call %08x:%08x as call %d on conn %d", + call->cid, call->call_id, call->debug_id, conn->debug_id); + + /* Paired with the read barrier in rxrpc_wait_for_channel(). This + * orders cid and epoch in the connection wrt to call_id without the + * need to take the channel_lock. + * + * We provisionally assign a callNumber at this point, but we don't + * confirm it until the call is about to be exposed. + * + * TODO: Pair with a barrier in the data_ready handler when that looks + * at the call ID through a connection channel. + */ + smp_wmb(); + chan->call_id = call_id; + rcu_assign_pointer(chan->call, call); + wake_up(&call->waitq); +} + +/* + * Assign channels and callNumbers to waiting calls with channel_lock + * held by caller. + */ +static void rxrpc_activate_channels_locked(struct rxrpc_connection *conn) +{ + u8 avail, mask; + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + mask = RXRPC_ACTIVE_CHANS_MASK; + break; + default: + return; + } + + while (!list_empty(&conn->waiting_calls) && + (avail = ~conn->active_chans, + avail &= mask, + avail != 0)) + rxrpc_activate_one_channel(conn, __ffs(avail)); +} + +/* + * Assign channels and callNumbers to waiting calls. + */ +static void rxrpc_activate_channels(struct rxrpc_connection *conn) +{ + _enter("%d", conn->debug_id); + + trace_rxrpc_client(conn, -1, rxrpc_client_activate_chans); + + if (conn->active_chans == RXRPC_ACTIVE_CHANS_MASK) + return; + + spin_lock(&conn->channel_lock); + rxrpc_activate_channels_locked(conn); + spin_unlock(&conn->channel_lock); + _leave(""); +} + +/* + * Wait for a callNumber and a channel to be granted to a call. + */ +static int rxrpc_wait_for_channel(struct rxrpc_call *call, gfp_t gfp) +{ + int ret = 0; + + _enter("%d", call->debug_id); + + if (!call->call_id) { + DECLARE_WAITQUEUE(myself, current); + + if (!gfpflags_allow_blocking(gfp)) { + ret = -EAGAIN; + goto out; + } + + add_wait_queue_exclusive(&call->waitq, &myself); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (call->call_id) + break; + if (signal_pending(current)) { + ret = -ERESTARTSYS; + break; + } + schedule(); + } + remove_wait_queue(&call->waitq, &myself); + __set_current_state(TASK_RUNNING); + } + + /* Paired with the write barrier in rxrpc_activate_one_channel(). */ + smp_rmb(); + +out: + _leave(" = %d", ret); + return ret; +} + +/* + * find a connection for a call + * - called in process context with IRQs enabled + */ +int rxrpc_connect_call(struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + int ret; + + _enter("{%d,%lx},", call->debug_id, call->user_call_ID); + + rxrpc_discard_expired_client_conns(NULL); + rxrpc_cull_active_client_conns(); + + ret = rxrpc_get_client_conn(call, cp, srx, gfp); + if (ret < 0) + return ret; + + rxrpc_animate_client_conn(call->conn); + rxrpc_activate_channels(call->conn); + + ret = rxrpc_wait_for_channel(call, gfp); + if (ret < 0) + rxrpc_disconnect_client_call(call); + + _leave(" = %d", ret); + return ret; +} + +/* + * Note that a connection is about to be exposed to the world. Once it is + * exposed, we maintain an extra ref on it that stops it from being summarily + * discarded before it's (a) had a chance to deal with retransmission and (b) + * had a chance at re-use (the per-connection security negotiation is + * expensive). + */ +static void rxrpc_expose_client_conn(struct rxrpc_connection *conn, + unsigned int channel) +{ + if (!test_and_set_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_exposed); + rxrpc_get_connection(conn); + } +} + +/* + * Note that a call, and thus a connection, is about to be exposed to the + * world. + */ +void rxrpc_expose_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + if (!test_and_set_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + /* Mark the call ID as being used. If the callNumber counter + * exceeds ~2 billion, we kill the connection after its + * outstanding calls have finished so that the counter doesn't + * wrap. + */ + chan->call_counter++; + if (chan->call_counter >= INT_MAX) + set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags); + rxrpc_expose_client_conn(conn, channel); + } +} + +/* + * Disconnect a client call. + */ +void rxrpc_disconnect_client_call(struct rxrpc_call *call) +{ + unsigned int channel = call->cid & RXRPC_CHANNELMASK; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_channel *chan = &conn->channels[channel]; + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect); + call->conn = NULL; + + spin_lock(&conn->channel_lock); + + /* Calls that have never actually been assigned a channel can simply be + * discarded. If the conn didn't get used either, it will follow + * immediately unless someone else grabs it in the meantime. + */ + if (!list_empty(&call->chan_wait_link)) { + _debug("call is waiting"); + ASSERTCMP(call->call_id, ==, 0); + ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags)); + list_del_init(&call->chan_wait_link); + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_unstarted); + + /* We must deactivate or idle the connection if it's now + * waiting for nothing. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + if (conn->cache_state == RXRPC_CONN_CLIENT_WAITING && + list_empty(&conn->waiting_calls) && + !conn->active_chans) + goto idle_connection; + goto out; + } + + ASSERTCMP(rcu_access_pointer(chan->call), ==, call); + + /* If a client call was exposed to the world, we save the result for + * retransmission. + * + * We use a barrier here so that the call number and abort code can be + * read without needing to take a lock. + * + * TODO: Make the incoming packet handler check this and handle + * terminal retransmission without requiring access to the call. + */ + if (test_bit(RXRPC_CALL_EXPOSED, &call->flags)) { + _debug("exposed %u,%u", call->call_id, call->abort_code); + __rxrpc_disconnect_call(conn, call); + } + + /* See if we can pass the channel directly to another call. */ + if (conn->cache_state == RXRPC_CONN_CLIENT_ACTIVE && + !list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); + rxrpc_activate_one_channel(conn, channel); + goto out_2; + } + + /* Things are more complex and we need the cache lock. We might be + * able to simply idle the conn or it might now be lurking on the wait + * list. It might even get moved back to the active list whilst we're + * waiting for the lock. + */ + spin_lock(&rxrpc_client_conn_cache_lock); + + switch (conn->cache_state) { + case RXRPC_CONN_CLIENT_ACTIVE: + if (list_empty(&conn->waiting_calls)) { + rxrpc_deactivate_one_channel(conn, channel); + if (!conn->active_chans) { + rxrpc_nr_active_client_conns--; + goto idle_connection; + } + goto out; + } + + trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass); + rxrpc_activate_one_channel(conn, channel); + goto out; + + case RXRPC_CONN_CLIENT_CULLED: + rxrpc_deactivate_one_channel(conn, channel); + ASSERT(list_empty(&conn->waiting_calls)); + if (!conn->active_chans) + goto idle_connection; + goto out; + + case RXRPC_CONN_CLIENT_WAITING: + rxrpc_deactivate_one_channel(conn, channel); + goto out; + + default: + BUG(); + } + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); +out_2: + spin_unlock(&conn->channel_lock); + rxrpc_put_connection(conn); + _leave(""); + return; + +idle_connection: + /* As no channels remain active, the connection gets deactivated + * immediately or moved to the idle list for a short while. + */ + if (test_bit(RXRPC_CONN_EXPOSED, &conn->flags)) { + trace_rxrpc_client(conn, channel, rxrpc_client_to_idle); + conn->idle_timestamp = jiffies; + conn->cache_state = RXRPC_CONN_CLIENT_IDLE; + list_move_tail(&conn->cache_link, &rxrpc_idle_client_conns); + if (rxrpc_idle_client_conns.next == &conn->cache_link && + !rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + rxrpc_conn_idle_client_expiry); + } else { + trace_rxrpc_client(conn, channel, rxrpc_client_to_inactive); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + } + goto out; +} + +/* + * Clean up a dead client connection. + */ +static struct rxrpc_connection * +rxrpc_put_one_client_conn(struct rxrpc_connection *conn) +{ + struct rxrpc_connection *next = NULL; + struct rxrpc_local *local = conn->params.local; + unsigned int nr_conns; + + trace_rxrpc_client(conn, -1, rxrpc_client_cleanup); + + if (test_bit(RXRPC_CONN_IN_CLIENT_CONNS, &conn->flags)) { + spin_lock(&local->client_conns_lock); + if (test_and_clear_bit(RXRPC_CONN_IN_CLIENT_CONNS, + &conn->flags)) + rb_erase(&conn->client_node, &local->client_conns); + spin_unlock(&local->client_conns_lock); + } rxrpc_put_client_connection_id(conn); + + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_INACTIVE); + + if (test_bit(RXRPC_CONN_COUNTED, &conn->flags)) { + trace_rxrpc_client(conn, -1, rxrpc_client_uncount); + spin_lock(&rxrpc_client_conn_cache_lock); + nr_conns = --rxrpc_nr_client_conns; + + if (nr_conns < rxrpc_max_client_connections && + !list_empty(&rxrpc_waiting_client_conns)) { + next = list_entry(rxrpc_waiting_client_conns.next, + struct rxrpc_connection, cache_link); + rxrpc_get_connection(next); + rxrpc_activate_conn(next); + } + + spin_unlock(&rxrpc_client_conn_cache_lock); + } + + rxrpc_kill_connection(conn); + if (next) + rxrpc_activate_channels(next); + + /* We need to get rid of the temporary ref we took upon next, but we + * can't call rxrpc_put_connection() recursively. + */ + return next; +} + +/* + * Clean up a dead client connections. + */ +void rxrpc_put_client_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + do { + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_client, n, here); + if (n > 0) + return; + ASSERTCMP(n, >=, 0); + + conn = rxrpc_put_one_client_conn(conn); + } while (conn); +} + +/* + * Kill the longest-active client connections to make room for new ones. + */ +static void rxrpc_cull_active_client_conns(void) +{ + struct rxrpc_connection *conn; + unsigned int nr_conns = rxrpc_nr_client_conns; + unsigned int nr_active, limit; + + _enter(""); + + ASSERTCMP(nr_conns, >=, 0); + if (nr_conns < rxrpc_max_client_connections) { + _leave(" [ok]"); + return; + } + limit = rxrpc_reap_client_connections; + + spin_lock(&rxrpc_client_conn_cache_lock); + nr_active = rxrpc_nr_active_client_conns; + + while (nr_active > limit) { + ASSERT(!list_empty(&rxrpc_active_client_conns)); + conn = list_entry(rxrpc_active_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERTCMP(conn->cache_state, ==, RXRPC_CONN_CLIENT_ACTIVE); + + if (list_empty(&conn->waiting_calls)) { + trace_rxrpc_client(conn, -1, rxrpc_client_to_culled); + conn->cache_state = RXRPC_CONN_CLIENT_CULLED; + list_del_init(&conn->cache_link); + } else { + trace_rxrpc_client(conn, -1, rxrpc_client_to_waiting); + conn->cache_state = RXRPC_CONN_CLIENT_WAITING; + list_move_tail(&conn->cache_link, + &rxrpc_waiting_client_conns); + } + + nr_active--; + } + + rxrpc_nr_active_client_conns = nr_active; + spin_unlock(&rxrpc_client_conn_cache_lock); + ASSERTCMP(nr_active, >=, 0); + _leave(" [culled]"); +} + +/* + * Discard expired client connections from the idle list. Each conn in the + * idle list has been exposed and holds an extra ref because of that. + * + * This may be called from conn setup or from a work item so cannot be + * considered non-reentrant. + */ +static void rxrpc_discard_expired_client_conns(struct work_struct *work) +{ + struct rxrpc_connection *conn; + unsigned long expiry, conn_expires_at, now; + unsigned int nr_conns; + bool did_discard = false; + + _enter("%c", work ? 'w' : 'n'); + + if (list_empty(&rxrpc_idle_client_conns)) { + _leave(" [empty]"); + return; + } + + /* Don't double up on the discarding */ + if (!spin_trylock(&rxrpc_client_conn_discard_mutex)) { + _leave(" [already]"); + return; + } + + /* We keep an estimate of what the number of conns ought to be after + * we've discarded some so that we don't overdo the discarding. + */ + nr_conns = rxrpc_nr_client_conns; + +next: + spin_lock(&rxrpc_client_conn_cache_lock); + + if (list_empty(&rxrpc_idle_client_conns)) + goto out; + + conn = list_entry(rxrpc_idle_client_conns.next, + struct rxrpc_connection, cache_link); + ASSERT(test_bit(RXRPC_CONN_EXPOSED, &conn->flags)); + + if (!rxrpc_kill_all_client_conns) { + /* If the number of connections is over the reap limit, we + * expedite discard by reducing the expiry timeout. We must, + * however, have at least a short grace period to be able to do + * final-ACK or ABORT retransmission. + */ + expiry = rxrpc_conn_idle_client_expiry; + if (nr_conns > rxrpc_reap_client_connections) + expiry = rxrpc_conn_idle_client_fast_expiry; + + conn_expires_at = conn->idle_timestamp + expiry; + + now = READ_ONCE(jiffies); + if (time_after(conn_expires_at, now)) + goto not_yet_expired; + } + + trace_rxrpc_client(conn, -1, rxrpc_client_discard); + if (!test_and_clear_bit(RXRPC_CONN_EXPOSED, &conn->flags)) + BUG(); + conn->cache_state = RXRPC_CONN_CLIENT_INACTIVE; + list_del_init(&conn->cache_link); + + spin_unlock(&rxrpc_client_conn_cache_lock); + + /* When we cleared the EXPOSED flag, we took on responsibility for the + * reference that that had on the usage count. We deal with that here. + * If someone re-sets the flag and re-gets the ref, that's fine. + */ + rxrpc_put_connection(conn); + did_discard = true; + nr_conns--; + goto next; + +not_yet_expired: + /* The connection at the front of the queue hasn't yet expired, so + * schedule the work item for that point if we discarded something. + * + * We don't worry if the work item is already scheduled - it can look + * after rescheduling itself at a later time. We could cancel it, but + * then things get messier. + */ + _debug("not yet"); + if (!rxrpc_kill_all_client_conns) + queue_delayed_work(rxrpc_workqueue, + &rxrpc_client_conn_reap, + conn_expires_at - now); + +out: + spin_unlock(&rxrpc_client_conn_cache_lock); + spin_unlock(&rxrpc_client_conn_discard_mutex); + _leave(""); +} + +/* + * Preemptively destroy all the client connection records rather than waiting + * for them to time out + */ +void __exit rxrpc_destroy_all_client_connections(void) +{ + _enter(""); + + spin_lock(&rxrpc_client_conn_cache_lock); + rxrpc_kill_all_client_conns = true; + spin_unlock(&rxrpc_client_conn_cache_lock); + + cancel_delayed_work(&rxrpc_client_conn_reap); + + if (!queue_delayed_work(rxrpc_workqueue, &rxrpc_client_conn_reap, 0)) + _debug("destroy: queue failed"); + + _leave(""); } diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index cee0f35bc1cf..3f9d8d7ec632 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -15,20 +15,128 @@ #include #include #include -#include -#include -#include -#include #include #include #include #include "ar-internal.h" +/* + * Retransmit terminal ACK or ABORT of the previous call. + */ +static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, + struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_channel *chan; + struct msghdr msg; + struct kvec iov; + struct { + struct rxrpc_wire_header whdr; + union { + struct { + __be32 code; + } abort; + struct { + struct rxrpc_ackpacket ack; + u8 padding[3]; + struct rxrpc_ackinfo info; + }; + }; + } __attribute__((packed)) pkt; + size_t len; + u32 serial, mtu, call_id; + + _enter("%d", conn->debug_id); + + chan = &conn->channels[sp->hdr.cid & RXRPC_CHANNELMASK]; + + /* If the last call got moved on whilst we were waiting to run, just + * ignore this packet. + */ + call_id = READ_ONCE(chan->last_call); + /* Sync with __rxrpc_disconnect_call() */ + smp_rmb(); + if (call_id != sp->hdr.callNumber) + return; + + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt.whdr.epoch = htonl(sp->hdr.epoch); + pkt.whdr.cid = htonl(sp->hdr.cid); + pkt.whdr.callNumber = htonl(sp->hdr.callNumber); + pkt.whdr.seq = 0; + pkt.whdr.type = chan->last_type; + pkt.whdr.flags = conn->out_clientflag; + pkt.whdr.userStatus = 0; + pkt.whdr.securityIndex = conn->security_ix; + pkt.whdr._rsvd = 0; + pkt.whdr.serviceId = htons(chan->last_service_id); + + len = sizeof(pkt.whdr); + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + pkt.abort.code = htonl(chan->last_abort); + len += sizeof(pkt.abort); + break; + + case RXRPC_PACKET_TYPE_ACK: + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; + pkt.ack.bufferSpace = 0; + pkt.ack.maxSkew = htons(skb->priority); + pkt.ack.firstPacket = htonl(chan->last_seq); + pkt.ack.previousPacket = htonl(chan->last_seq - 1); + pkt.ack.serial = htonl(sp->hdr.serial); + pkt.ack.reason = RXRPC_ACK_DUPLICATE; + pkt.ack.nAcks = 0; + pkt.info.rxMTU = htonl(rxrpc_rx_mtu); + pkt.info.maxMTU = htonl(mtu); + pkt.info.rwind = htonl(rxrpc_rx_window_size); + pkt.info.jumbo_max = htonl(rxrpc_rx_jumbo_max); + pkt.whdr.flags |= RXRPC_SLOW_START_OK; + len += sizeof(pkt.ack) + sizeof(pkt.info); + break; + } + + /* Resync with __rxrpc_disconnect_call() and check that the last call + * didn't get advanced whilst we were filling out the packets. + */ + smp_rmb(); + if (READ_ONCE(chan->last_call) != call_id) + return; + + iov.iov_base = &pkt; + iov.iov_len = len; + + serial = atomic_inc_return(&conn->serial); + pkt.whdr.serial = htonl(serial); + + switch (chan->last_type) { + case RXRPC_PACKET_TYPE_ABORT: + _proto("Tx ABORT %%%u { %d } [re]", serial, conn->local_abort); + break; + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(NULL, serial, chan->last_seq, 0, + RXRPC_ACK_DUPLICATE, 0); + _proto("Tx ACK %%%u [re]", serial); + break; + } + + kernel_sendmsg(conn->params.local->socket, &msg, &iov, 1, len); + _leave(""); + return; +} + /* * pass a connection-level abort onto all calls on that connection */ -static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, - u32 abort_code) +static void rxrpc_abort_calls(struct rxrpc_connection *conn, + enum rxrpc_call_completion compl, + u32 abort_code, int error) { struct rxrpc_call *call; int i; @@ -41,19 +149,15 @@ static void rxrpc_abort_calls(struct rxrpc_connection *conn, int state, call = rcu_dereference_protected( conn->channels[i].call, lockdep_is_held(&conn->channel_lock)); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = state; - if (state == RXRPC_CALL_LOCALLY_ABORTED) { - call->local_abort = conn->local_abort; - set_bit(RXRPC_CALL_EV_CONN_ABORT, &call->events); - } else { - call->remote_abort = conn->remote_abort; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - } - rxrpc_queue_call(call); + if (call) { + if (compl == RXRPC_CALL_LOCALLY_ABORTED) + trace_rxrpc_abort("CON", call->cid, + call->call_id, 0, + abort_code, error); + if (rxrpc_set_call_completion(call, compl, + abort_code, error)) + rxrpc_notify_socket(call); } - write_unlock_bh(&call->state_lock); } spin_unlock(&conn->channel_lock); @@ -78,17 +182,16 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* generate a connection-level abort */ spin_lock_bh(&conn->state_lock); - if (conn->state < RXRPC_CONN_REMOTELY_ABORTED) { - conn->state = RXRPC_CONN_LOCALLY_ABORTED; - conn->error = error; - spin_unlock_bh(&conn->state_lock); - } else { + if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { spin_unlock_bh(&conn->state_lock); _leave(" = 0 [already dead]"); return 0; } - rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code); + conn->state = RXRPC_CONN_LOCALLY_ABORTED; + spin_unlock_bh(&conn->state_lock); + + rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, abort_code, error); msg.msg_name = &conn->params.peer->srx.transport; msg.msg_namelen = conn->params.peer->srx.transport_len; @@ -132,17 +235,18 @@ static int rxrpc_abort_connection(struct rxrpc_connection *conn, /* * mark a call as being on a now-secured channel - * - must be called with softirqs disabled + * - must be called with BH's disabled. */ static void rxrpc_call_is_secure(struct rxrpc_call *call) { _enter("%p", call); if (call) { - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_SECURED, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); + write_lock_bh(&call->state_lock); + if (call->state == RXRPC_CALL_SERVER_SECURING) { + call->state = RXRPC_CALL_SERVER_ACCEPTING; + rxrpc_notify_socket(call); + } + write_unlock_bh(&call->state_lock); } } @@ -159,22 +263,28 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, int loop, ret; if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) { - kleave(" = -ECONNABORTED [%u]", conn->state); + _leave(" = -ECONNABORTED [%u]", conn->state); return -ECONNABORTED; } _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial); switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_DATA: + case RXRPC_PACKET_TYPE_ACK: + rxrpc_conn_retransmit_call(conn, skb); + return 0; + case RXRPC_PACKET_TYPE_ABORT: - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) < 0) return -EPROTO; abort_code = ntohl(wtmp); _proto("Rx ABORT %%%u { ac=%d }", sp->hdr.serial, abort_code); conn->state = RXRPC_CONN_REMOTELY_ABORTED; rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, - abort_code); + abort_code, ECONNABORTED); return -ECONNABORTED; case RXRPC_PACKET_TYPE_CHALLENGE: @@ -199,14 +309,16 @@ static int rxrpc_process_event(struct rxrpc_connection *conn, if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) { conn->state = RXRPC_CONN_SERVICE; + spin_unlock(&conn->state_lock); for (loop = 0; loop < RXRPC_MAXCALLS; loop++) rxrpc_call_is_secure( rcu_dereference_protected( conn->channels[loop].call, lockdep_is_held(&conn->channel_lock))); + } else { + spin_unlock(&conn->state_lock); } - spin_unlock(&conn->state_lock); spin_unlock(&conn->channel_lock); return 0; @@ -269,7 +381,7 @@ void rxrpc_process_connection(struct work_struct *work) u32 abort_code = RX_PROTOCOL_ERROR; int ret; - _enter("{%d}", conn->debug_id); + rxrpc_see_connection(conn); if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events)) rxrpc_secure_connection(conn); @@ -277,6 +389,7 @@ void rxrpc_process_connection(struct work_struct *work) /* go through the conn-level event packets, releasing the ref on this * connection that each one has when we've finished with it */ while ((skb = skb_dequeue(&conn->rx_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); ret = rxrpc_process_event(conn, skb, &abort_code); switch (ret) { case -EPROTO: @@ -287,7 +400,7 @@ void rxrpc_process_connection(struct work_struct *work) goto requeue_and_leave; case -ECONNABORTED: default: - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); break; } } @@ -304,91 +417,7 @@ requeue_and_leave: protocol_error: if (rxrpc_abort_connection(conn, -ret, abort_code) < 0) goto requeue_and_leave; - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [EPROTO]"); goto out; } - -/* - * put a packet up for transport-level abort - */ -void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) -{ - CHECK_SLAB_OKAY(&local->usage); - - skb_queue_tail(&local->reject_queue, skb); - rxrpc_queue_local(local); -} - -/* - * reject packets through the local endpoint - */ -void rxrpc_reject_packets(struct rxrpc_local *local) -{ - union { - struct sockaddr sa; - struct sockaddr_in sin; - } sa; - struct rxrpc_skb_priv *sp; - struct rxrpc_wire_header whdr; - struct sk_buff *skb; - struct msghdr msg; - struct kvec iov[2]; - size_t size; - __be32 code; - - _enter("%d", local->debug_id); - - iov[0].iov_base = &whdr; - iov[0].iov_len = sizeof(whdr); - iov[1].iov_base = &code; - iov[1].iov_len = sizeof(code); - size = sizeof(whdr) + sizeof(code); - - msg.msg_name = &sa; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_flags = 0; - - memset(&sa, 0, sizeof(sa)); - sa.sa.sa_family = local->srx.transport.family; - switch (sa.sa.sa_family) { - case AF_INET: - msg.msg_namelen = sizeof(sa.sin); - break; - default: - msg.msg_namelen = 0; - break; - } - - memset(&whdr, 0, sizeof(whdr)); - whdr.type = RXRPC_PACKET_TYPE_ABORT; - - while ((skb = skb_dequeue(&local->reject_queue))) { - sp = rxrpc_skb(skb); - switch (sa.sa.sa_family) { - case AF_INET: - sa.sin.sin_port = udp_hdr(skb)->source; - sa.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; - code = htonl(skb->priority); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.serviceId = htons(sp->hdr.serviceId); - whdr.flags = sp->hdr.flags; - whdr.flags ^= RXRPC_CLIENT_INITIATED; - whdr.flags &= RXRPC_CLIENT_INITIATED; - - kernel_sendmsg(local->socket, &msg, iov, 2, size); - break; - - default: - break; - } - - rxrpc_free_skb(skb); - } - - _leave(""); -} diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 896d84493a05..e1e83af47866 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -1,6 +1,6 @@ -/* RxRPC virtual connection handler +/* RxRPC virtual connection handler, common bits. * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include "ar-internal.h" /* @@ -27,9 +25,12 @@ unsigned int rxrpc_connection_expiry = 10 * 60; static void rxrpc_connection_reaper(struct work_struct *work); LIST_HEAD(rxrpc_connections); +LIST_HEAD(rxrpc_connection_proc_list); DEFINE_RWLOCK(rxrpc_connection_lock); static DECLARE_DELAYED_WORK(rxrpc_connection_reap, rxrpc_connection_reaper); +static void rxrpc_destroy_connection(struct rcu_head *); + /* * allocate a new connection */ @@ -41,21 +42,18 @@ struct rxrpc_connection *rxrpc_alloc_connection(gfp_t gfp) conn = kzalloc(sizeof(struct rxrpc_connection), gfp); if (conn) { + INIT_LIST_HEAD(&conn->cache_link); spin_lock_init(&conn->channel_lock); - init_waitqueue_head(&conn->channel_wq); + INIT_LIST_HEAD(&conn->waiting_calls); INIT_WORK(&conn->processor, &rxrpc_process_connection); + INIT_LIST_HEAD(&conn->proc_link); INIT_LIST_HEAD(&conn->link); skb_queue_head_init(&conn->rx_queue); conn->security = &rxrpc_no_security; spin_lock_init(&conn->state_lock); - /* We maintain an extra ref on the connection whilst it is - * on the rxrpc_connections list. - */ - atomic_set(&conn->usage, 2); conn->debug_id = atomic_inc_return(&rxrpc_debug_id); - atomic_set(&conn->avail_chans, RXRPC_MAXCALLS); conn->size_align = 4; - conn->header_size = sizeof(struct rxrpc_wire_header); + conn->idle_timestamp = jiffies; } _leave(" = %p{%d}", conn, conn ? conn->debug_id : 0); @@ -135,6 +133,16 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, srx.transport.sin.sin_addr.s_addr) goto not_found; break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + if (peer->srx.transport.sin6.sin6_port != + srx.transport.sin6.sin6_port || + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)) != 0) + goto not_found; + break; +#endif default: BUG(); } @@ -153,25 +161,32 @@ not_found: * terminates. The caller must hold the channel_lock and must release the * call's ref on the connection. */ -void __rxrpc_disconnect_call(struct rxrpc_call *call) +void __rxrpc_disconnect_call(struct rxrpc_connection *conn, + struct rxrpc_call *call) { - struct rxrpc_connection *conn = call->conn; - struct rxrpc_channel *chan = &conn->channels[call->channel]; + struct rxrpc_channel *chan = + &conn->channels[call->cid & RXRPC_CHANNELMASK]; - _enter("%d,%d", conn->debug_id, call->channel); + _enter("%d,%x", conn->debug_id, call->cid); if (rcu_access_pointer(chan->call) == call) { /* Save the result of the call so that we can repeat it if necessary * through the channel, whilst disposing of the actual call record. */ - chan->last_result = call->local_abort; + chan->last_service_id = call->service_id; + if (call->abort_code) { + chan->last_abort = call->abort_code; + chan->last_type = RXRPC_PACKET_TYPE_ABORT; + } else { + chan->last_seq = call->rx_hard_ack; + chan->last_type = RXRPC_PACKET_TYPE_ACK; + } + /* Sync with rxrpc_conn_retransmit(). */ smp_wmb(); chan->last_call = chan->call_id; chan->call_id = chan->call_counter; rcu_assign_pointer(chan->call, NULL); - atomic_inc(&conn->avail_chans); - wake_up(&conn->channel_wq); } _leave(""); @@ -185,34 +200,122 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) { struct rxrpc_connection *conn = call->conn; + spin_lock_bh(&conn->params.peer->lock); + hlist_del_init(&call->error_link); + spin_unlock_bh(&conn->params.peer->lock); + + if (rxrpc_is_client_call(call)) + return rxrpc_disconnect_client_call(call); + spin_lock(&conn->channel_lock); - __rxrpc_disconnect_call(call); + __rxrpc_disconnect_call(conn, call); spin_unlock(&conn->channel_lock); call->conn = NULL; + conn->idle_timestamp = jiffies; rxrpc_put_connection(conn); } /* - * release a virtual connection + * Kill off a connection. */ -void rxrpc_put_connection(struct rxrpc_connection *conn) +void rxrpc_kill_connection(struct rxrpc_connection *conn) { - if (!conn) - return; + ASSERT(!rcu_access_pointer(conn->channels[0].call) && + !rcu_access_pointer(conn->channels[1].call) && + !rcu_access_pointer(conn->channels[2].call) && + !rcu_access_pointer(conn->channels[3].call)); + ASSERT(list_empty(&conn->cache_link)); - _enter("%p{u=%d,d=%d}", - conn, atomic_read(&conn->usage), conn->debug_id); + write_lock(&rxrpc_connection_lock); + list_del_init(&conn->proc_link); + write_unlock(&rxrpc_connection_lock); - ASSERTCMP(atomic_read(&conn->usage), >, 1); + /* Drain the Rx queue. Note that even though we've unpublished, an + * incoming packet could still be being added to our Rx queue, so we + * will need to drain it again in the RCU cleanup handler. + */ + rxrpc_purge_queue(&conn->rx_queue); - conn->put_time = ktime_get_seconds(); - if (atomic_dec_return(&conn->usage) == 1) { - _debug("zombie"); - rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); + /* Leave final destruction to RCU. The connection processor work item + * must carry a ref on the connection to prevent us getting here whilst + * it is queued or running. + */ + call_rcu(&conn->rcu, rxrpc_destroy_connection); +} + +/* + * Queue a connection's work processor, getting a ref to pass to the work + * queue. + */ +bool rxrpc_queue_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n == 0) + return false; + if (rxrpc_queue_work(&conn->processor)) + trace_rxrpc_conn(conn, rxrpc_conn_queued, n + 1, here); + else + rxrpc_put_connection(conn); + return true; +} + +/* + * Note the re-emergence of a connection. + */ +void rxrpc_see_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + if (conn) { + int n = atomic_read(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_seen, n, here); } +} - _leave(""); +/* + * Get a ref on a connection. + */ +void rxrpc_get_connection(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(&conn->usage); + + trace_rxrpc_conn(conn, rxrpc_conn_got, n, here); +} + +/* + * Try to get a ref on a connection. + */ +struct rxrpc_connection * +rxrpc_get_connection_maybe(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + + if (conn) { + int n = __atomic_add_unless(&conn->usage, 1, 0); + if (n > 0) + trace_rxrpc_conn(conn, rxrpc_conn_got, n + 1, here); + else + conn = NULL; + } + return conn; +} + +/* + * Release a service connection + */ +void rxrpc_put_service_conn(struct rxrpc_connection *conn) +{ + const void *here = __builtin_return_address(0); + int n; + + n = atomic_dec_return(&conn->usage); + trace_rxrpc_conn(conn, rxrpc_conn_put_service, n, here); + ASSERTCMP(n, >=, 0); + if (n == 0) + rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); } /* @@ -242,19 +345,19 @@ static void rxrpc_destroy_connection(struct rcu_head *rcu) } /* - * reap dead connections + * reap dead service connections */ static void rxrpc_connection_reaper(struct work_struct *work) { struct rxrpc_connection *conn, *_p; - unsigned long reap_older_than, earliest, put_time, now; + unsigned long reap_older_than, earliest, idle_timestamp, now; LIST_HEAD(graveyard); _enter(""); - now = ktime_get_seconds(); - reap_older_than = now - rxrpc_connection_expiry; + now = jiffies; + reap_older_than = now - rxrpc_connection_expiry * HZ; earliest = ULONG_MAX; write_lock(&rxrpc_connection_lock); @@ -262,11 +365,17 @@ static void rxrpc_connection_reaper(struct work_struct *work) ASSERTCMP(atomic_read(&conn->usage), >, 0); if (likely(atomic_read(&conn->usage) > 1)) continue; + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) + continue; - put_time = READ_ONCE(conn->put_time); - if (time_after(put_time, reap_older_than)) { - if (time_before(put_time, earliest)) - earliest = put_time; + idle_timestamp = READ_ONCE(conn->idle_timestamp); + _debug("reap CONN %d { u=%d,t=%ld }", + conn->debug_id, atomic_read(&conn->usage), + (long)reap_older_than - (long)idle_timestamp); + + if (time_after(idle_timestamp, reap_older_than)) { + if (time_before(idle_timestamp, earliest)) + earliest = idle_timestamp; continue; } @@ -277,7 +386,7 @@ static void rxrpc_connection_reaper(struct work_struct *work) continue; if (rxrpc_conn_is_client(conn)) - rxrpc_unpublish_client_conn(conn); + BUG(); else rxrpc_unpublish_service_conn(conn); @@ -287,9 +396,9 @@ static void rxrpc_connection_reaper(struct work_struct *work) if (earliest != ULONG_MAX) { _debug("reschedule reaper %ld", (long) earliest - now); - ASSERTCMP(earliest, >, now); + ASSERT(time_after(earliest, now)); rxrpc_queue_delayed_work(&rxrpc_connection_reap, - (earliest - now) * HZ); + earliest - now); } while (!list_empty(&graveyard)) { @@ -298,16 +407,15 @@ static void rxrpc_connection_reaper(struct work_struct *work) list_del_init(&conn->link); ASSERTCMP(atomic_read(&conn->usage), ==, 0); - skb_queue_purge(&conn->rx_queue); - call_rcu(&conn->rcu, rxrpc_destroy_connection); + rxrpc_kill_connection(conn); } _leave(""); } /* - * preemptively destroy all the connection records rather than waiting for them - * to time out + * preemptively destroy all the service connection records rather than + * waiting for them to time out */ void __exit rxrpc_destroy_all_connections(void) { @@ -316,6 +424,8 @@ void __exit rxrpc_destroy_all_connections(void) _enter(""); + rxrpc_destroy_all_client_connections(); + rxrpc_connection_expiry = 0; cancel_delayed_work(&rxrpc_connection_reap); rxrpc_queue_delayed_work(&rxrpc_connection_reap, 0); @@ -330,6 +440,8 @@ void __exit rxrpc_destroy_all_connections(void) write_unlock(&rxrpc_connection_lock); BUG_ON(leak); + ASSERT(list_empty(&rxrpc_connection_proc_list)); + /* Make sure the local and peer records pinned by any dying connections * are released. */ diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index fd9027ccba8f..eef551f40dc2 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -65,9 +65,8 @@ done: * Insert a service connection into a peer's tree, thereby making it a target * for incoming packets. */ -static struct rxrpc_connection * -rxrpc_publish_service_conn(struct rxrpc_peer *peer, - struct rxrpc_connection *conn) +static void rxrpc_publish_service_conn(struct rxrpc_peer *peer, + struct rxrpc_connection *conn) { struct rxrpc_connection *cursor = NULL; struct rxrpc_conn_proto k = conn->proto; @@ -96,7 +95,7 @@ conn_published: set_bit(RXRPC_CONN_IN_SERVICE_CONNS, &conn->flags); write_sequnlock_bh(&peer->service_conn_lock); _leave(" = %d [new]", conn->debug_id); - return conn; + return; found_extant_conn: if (atomic_read(&cursor->usage) == 0) @@ -119,100 +118,58 @@ replace_old_connection: } /* - * get a record of an incoming connection + * Preallocate a service connection. The connection is placed on the proc and + * reap lists so that we don't have to get the lock from BH context. */ -struct rxrpc_connection *rxrpc_incoming_connection(struct rxrpc_local *local, - struct sockaddr_rxrpc *srx, - struct sk_buff *skb) +struct rxrpc_connection *rxrpc_prealloc_service_connection(gfp_t gfp) +{ + struct rxrpc_connection *conn = rxrpc_alloc_connection(gfp); + + if (conn) { + /* We maintain an extra ref on the connection whilst it is on + * the rxrpc_connections list. + */ + conn->state = RXRPC_CONN_SERVICE_PREALLOC; + atomic_set(&conn->usage, 2); + + write_lock(&rxrpc_connection_lock); + list_add_tail(&conn->link, &rxrpc_connections); + list_add_tail(&conn->proc_link, &rxrpc_connection_proc_list); + write_unlock(&rxrpc_connection_lock); + + trace_rxrpc_conn(conn, rxrpc_conn_new_service, + atomic_read(&conn->usage), + __builtin_return_address(0)); + } + + return conn; +} + +/* + * Set up an incoming connection. This is called in BH context with the RCU + * read lock held. + */ +void rxrpc_new_incoming_connection(struct rxrpc_connection *conn, + struct sk_buff *skb) { - struct rxrpc_connection *conn; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - const char *new = "old"; _enter(""); - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) { - _debug("no peer"); - return ERR_PTR(-EBUSY); - } - - ASSERT(sp->hdr.flags & RXRPC_CLIENT_INITIATED); - - rcu_read_lock(); - peer = rxrpc_lookup_peer_rcu(local, srx); - if (peer) { - conn = rxrpc_find_service_conn_rcu(peer, skb); - if (conn) { - if (sp->hdr.securityIndex != conn->security_ix) - goto security_mismatch_rcu; - if (rxrpc_get_connection_maybe(conn)) - goto found_extant_connection_rcu; - - /* The conn has expired but we can't remove it without - * the appropriate lock, so we attempt to replace it - * when we have a new candidate. - */ - } - - if (!rxrpc_get_peer_maybe(peer)) - peer = NULL; - } - rcu_read_unlock(); - - if (!peer) { - peer = rxrpc_lookup_peer(local, srx, GFP_NOIO); - if (!peer) - goto enomem; - } - - /* We don't have a matching record yet. */ - conn = rxrpc_alloc_connection(GFP_NOIO); - if (!conn) - goto enomem_peer; - conn->proto.epoch = sp->hdr.epoch; conn->proto.cid = sp->hdr.cid & RXRPC_CIDMASK; - conn->params.local = local; - conn->params.peer = peer; conn->params.service_id = sp->hdr.serviceId; conn->security_ix = sp->hdr.securityIndex; conn->out_clientflag = 0; - conn->state = RXRPC_CONN_SERVICE; - if (conn->params.service_id) + if (conn->security_ix) conn->state = RXRPC_CONN_SERVICE_UNSECURED; - - rxrpc_get_local(local); - - write_lock(&rxrpc_connection_lock); - list_add_tail(&conn->link, &rxrpc_connections); - write_unlock(&rxrpc_connection_lock); + else + conn->state = RXRPC_CONN_SERVICE; /* Make the connection a target for incoming packets. */ - rxrpc_publish_service_conn(peer, conn); + rxrpc_publish_service_conn(conn->params.peer, conn); - new = "new"; - -success: - _net("CONNECTION %s %d {%x}", new, conn->debug_id, conn->proto.cid); - _leave(" = %p {u=%d}", conn, atomic_read(&conn->usage)); - return conn; - -found_extant_connection_rcu: - rcu_read_unlock(); - goto success; - -security_mismatch_rcu: - rcu_read_unlock(); - _leave(" = -EKEYREJECTED"); - return ERR_PTR(-EKEYREJECTED); - -enomem_peer: - rxrpc_put_peer(peer); -enomem: - _leave(" = -ENOMEM"); - return ERR_PTR(-ENOMEM); + _net("CONNECTION new %d {%x}", conn->debug_id, conn->proto.cid); } /* diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 70bb77818dea..3ad9f75031e3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -1,6 +1,6 @@ /* RxRPC packet reception * - * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -27,550 +27,920 @@ #include #include "ar-internal.h" -/* - * queue a packet for recvmsg to pass to userspace - * - the caller must hold a lock on call->lock - * - must not be called with interrupts disabled (sk_filter() disables BH's) - * - eats the packet whether successful or not - * - there must be just one reference to the packet, which the caller passes to - * this function - */ -int rxrpc_queue_rcv_skb(struct rxrpc_call *call, struct sk_buff *skb, - bool force, bool terminal) +static void rxrpc_proto_abort(const char *why, + struct rxrpc_call *call, rxrpc_seq_t seq) { - struct rxrpc_skb_priv *sp; - struct rxrpc_sock *rx = call->socket; - struct sock *sk; - int ret; - - _enter(",,%d,%d", force, terminal); - - ASSERT(!irqs_disabled()); - - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, call); - - /* if we've already posted the terminal message for a call, then we - * don't post any more */ - if (test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags)) { - _debug("already terminated"); - ASSERTCMP(call->state, >=, RXRPC_CALL_COMPLETE); - rxrpc_free_skb(skb); - return 0; + if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, EBADMSG)) { + set_bit(RXRPC_CALL_EV_ABORT, &call->events); + rxrpc_queue_call(call); } - - sk = &rx->sk; - - if (!force) { - /* cast skb->rcvbuf to unsigned... It's pointless, but - * reduces number of warnings when compiling with -W - * --ANK */ -// ret = -ENOBUFS; -// if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= -// (unsigned int) sk->sk_rcvbuf) -// goto out; - - ret = sk_filter(sk, skb); - if (ret < 0) - goto out; - } - - spin_lock_bh(&sk->sk_receive_queue.lock); - if (!test_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags) && - !test_bit(RXRPC_CALL_RELEASED, &call->flags) && - call->socket->sk.sk_state != RXRPC_CLOSE) { - skb->destructor = rxrpc_packet_destructor; - skb->dev = NULL; - skb->sk = sk; - atomic_add(skb->truesize, &sk->sk_rmem_alloc); - - if (terminal) { - _debug("<<<< TERMINAL MESSAGE >>>>"); - set_bit(RXRPC_CALL_TERMINAL_MSG, &call->flags); - } - - /* allow interception by a kernel service */ - if (rx->interceptor) { - rx->interceptor(sk, call->user_call_ID, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - } else { - _net("post skb %p", skb); - __skb_queue_tail(&sk->sk_receive_queue, skb); - spin_unlock_bh(&sk->sk_receive_queue.lock); - - if (!sock_flag(sk, SOCK_DEAD)) - sk->sk_data_ready(sk); - } - skb = NULL; - } else { - spin_unlock_bh(&sk->sk_receive_queue.lock); - } - ret = 0; - -out: - rxrpc_free_skb(skb); - - _leave(" = %d", ret); - return ret; } /* - * process a DATA packet, posting the packet to the appropriate queue - * - eats the packet if successful + * Do TCP-style congestion management [RFC 5681]. */ -static int rxrpc_fast_process_data(struct rxrpc_call *call, - struct sk_buff *skb, u32 seq) +static void rxrpc_congestion_management(struct rxrpc_call *call, + struct sk_buff *skb, + struct rxrpc_ack_summary *summary, + rxrpc_serial_t acked_serial) { - struct rxrpc_skb_priv *sp; - bool terminal; - int ret, ackbit, ack; - u32 serial; - u8 flags; + enum rxrpc_congest_change change = rxrpc_cong_no_change; + unsigned int cumulative_acks = call->cong_cumul_acks; + unsigned int cwnd = call->cong_cwnd; + bool resend = false; - _enter("{%u,%u},,{%u}", call->rx_data_post, call->rx_first_oos, seq); + summary->flight_size = + (call->tx_top - call->tx_hard_ack) - summary->nr_acks; - sp = rxrpc_skb(skb); - ASSERTCMP(sp->call, ==, NULL); - flags = sp->hdr.flags; - serial = sp->hdr.serial; - - spin_lock(&call->lock); - - if (call->state > RXRPC_CALL_COMPLETE) - goto discard; - - ASSERTCMP(call->rx_data_expect, >=, call->rx_data_post); - ASSERTCMP(call->rx_data_post, >=, call->rx_data_recv); - ASSERTCMP(call->rx_data_recv, >=, call->rx_data_eaten); - - if (seq < call->rx_data_post) { - _debug("dup #%u [-%u]", seq, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - ret = -ENOBUFS; - goto discard_and_ack; - } - - /* we may already have the packet in the out of sequence queue */ - ackbit = seq - (call->rx_data_eaten + 1); - ASSERTCMP(ackbit, >=, 0); - if (__test_and_set_bit(ackbit, call->ackr_window)) { - _debug("dup oos #%u [%u,%u]", - seq, call->rx_data_eaten, call->rx_data_post); - ack = RXRPC_ACK_DUPLICATE; - goto discard_and_ack; - } - - if (seq >= call->ackr_win_top) { - _debug("exceed #%u [%u]", seq, call->ackr_win_top); - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_EXCEEDS_WINDOW; - goto discard_and_ack; - } - - if (seq == call->rx_data_expect) { - clear_bit(RXRPC_CALL_EXPECT_OOS, &call->flags); - call->rx_data_expect++; - } else if (seq > call->rx_data_expect) { - _debug("oos #%u [%u]", seq, call->rx_data_expect); - call->rx_data_expect = seq + 1; - if (test_and_set_bit(RXRPC_CALL_EXPECT_OOS, &call->flags)) { - ack = RXRPC_ACK_OUT_OF_SEQUENCE; - goto enqueue_and_ack; + if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { + summary->retrans_timeo = true; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = 1; + if (cwnd >= call->cong_ssthresh && + call->cong_mode == RXRPC_CALL_SLOW_START) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; + cumulative_acks = 0; } - goto enqueue_packet; } - if (seq != call->rx_data_post) { - _debug("ahead #%u [%u]", seq, call->rx_data_post); - goto enqueue_packet; - } + cumulative_acks += summary->nr_new_acks; + cumulative_acks += summary->nr_rot_new_acks; + if (cumulative_acks > 255) + cumulative_acks = 255; - if (test_bit(RXRPC_CALL_RCVD_LAST, &call->flags)) - goto protocol_error; + summary->mode = call->cong_mode; + summary->cwnd = call->cong_cwnd; + summary->ssthresh = call->cong_ssthresh; + summary->cumulative_acks = cumulative_acks; + summary->dup_acks = call->cong_dup_acks; - /* if the packet need security things doing to it, then it goes down - * the slow path */ - if (call->conn->security_ix) - goto enqueue_packet; - - sp->call = call; - rxrpc_get_call(call); - atomic_inc(&call->skb_count); - terminal = ((flags & RXRPC_LAST_PACKET) && - !(flags & RXRPC_CLIENT_INITIATED)); - ret = rxrpc_queue_rcv_skb(call, skb, false, terminal); - if (ret < 0) { - if (ret == -ENOMEM || ret == -ENOBUFS) { - __clear_bit(ackbit, call->ackr_window); - ack = RXRPC_ACK_NOSPACE; - goto discard_and_ack; + switch (call->cong_mode) { + case RXRPC_CALL_SLOW_START: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + if (summary->cumulative_acks > 0) + cwnd += 1; + if (cwnd >= call->cong_ssthresh) { + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_tstamp = skb->tstamp; } goto out; + + case RXRPC_CALL_CONGEST_AVOIDANCE: + if (summary->nr_nacks > 0) + goto packet_loss_detected; + + /* We analyse the number of packets that get ACK'd per RTT + * period and increase the window if we managed to fill it. + */ + if (call->peer->rtt_usage == 0) + goto out; + if (ktime_before(skb->tstamp, + ktime_add_ns(call->cong_tstamp, + call->peer->rtt))) + goto out_no_clear_ca; + change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = skb->tstamp; + if (cumulative_acks >= cwnd) + cwnd++; + goto out; + + case RXRPC_CALL_PACKET_LOSS: + if (summary->nr_nacks == 0) + goto resume_normality; + + if (summary->new_low_nack) { + change = rxrpc_cong_new_low_nack; + call->cong_dup_acks = 1; + if (call->cong_extra > 1) + call->cong_extra = 1; + goto send_extra_data; + } + + call->cong_dup_acks++; + if (call->cong_dup_acks < 3) + goto send_extra_data; + + change = rxrpc_cong_begin_retransmission; + call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; + call->cong_ssthresh = max_t(unsigned int, + summary->flight_size / 2, 2); + cwnd = call->cong_ssthresh + 3; + call->cong_extra = 0; + call->cong_dup_acks = 0; + resend = true; + goto out; + + case RXRPC_CALL_FAST_RETRANSMIT: + if (!summary->new_low_nack) { + if (summary->nr_new_acks == 0) + cwnd += 1; + call->cong_dup_acks++; + if (call->cong_dup_acks == 2) { + change = rxrpc_cong_retransmit_again; + call->cong_dup_acks = 0; + resend = true; + } + } else { + change = rxrpc_cong_progress; + cwnd = call->cong_ssthresh; + if (summary->nr_nacks == 0) + goto resume_normality; + } + goto out; + + default: + BUG(); + goto out; } - skb = NULL; - sp = NULL; - - _debug("post #%u", seq); - ASSERTCMP(call->rx_data_post, ==, seq); - call->rx_data_post++; - - if (flags & RXRPC_LAST_PACKET) - set_bit(RXRPC_CALL_RCVD_LAST, &call->flags); - - /* if we've reached an out of sequence packet then we need to drain - * that queue into the socket Rx queue now */ - if (call->rx_data_post == call->rx_first_oos) { - _debug("drain rx oos now"); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_DRAIN_RX_OOS, &call->events)) - rxrpc_queue_call(call); - read_unlock(&call->state_lock); - } - - spin_unlock(&call->lock); - atomic_inc(&call->ackr_not_idle); - rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, serial, false); - _leave(" = 0 [posted]"); - return 0; - -protocol_error: - ret = -EBADMSG; +resume_normality: + change = rxrpc_cong_cleared_nacks; + call->cong_dup_acks = 0; + call->cong_extra = 0; + call->cong_tstamp = skb->tstamp; + if (cwnd < call->cong_ssthresh) + call->cong_mode = RXRPC_CALL_SLOW_START; + else + call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; out: - spin_unlock(&call->lock); - _leave(" = %d", ret); - return ret; - -discard_and_ack: - _debug("discard and ACK packet %p", skb); - __rxrpc_propose_ACK(call, ack, serial, true); -discard: - spin_unlock(&call->lock); - rxrpc_free_skb(skb); - _leave(" = 0 [discarded]"); - return 0; - -enqueue_and_ack: - __rxrpc_propose_ACK(call, ack, serial, true); -enqueue_packet: - _net("defer skb %p", skb); - spin_unlock(&call->lock); - skb_queue_tail(&call->rx_queue, skb); - atomic_inc(&call->ackr_not_idle); - read_lock(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) + cumulative_acks = 0; +out_no_clear_ca: + if (cwnd >= RXRPC_RXTX_BUFF_SIZE - 1) + cwnd = RXRPC_RXTX_BUFF_SIZE - 1; + call->cong_cwnd = cwnd; + call->cong_cumul_acks = cumulative_acks; + trace_rxrpc_congest(call, summary, acked_serial, change); + if (resend && !test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) rxrpc_queue_call(call); - read_unlock(&call->state_lock); - _leave(" = 0 [queued]"); - return 0; + return; + +packet_loss_detected: + change = rxrpc_cong_saw_nack; + call->cong_mode = RXRPC_CALL_PACKET_LOSS; + call->cong_dup_acks = 0; + goto send_extra_data; + +send_extra_data: + /* Send some previously unsent DATA if we have some to advance the ACK + * state. + */ + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST || + summary->nr_acks != call->tx_top - call->tx_hard_ack) { + call->cong_extra++; + wake_up(&call->waitq); + } + goto out_no_clear_ca; } /* - * assume an implicit ACKALL of the transmission phase of a client socket upon - * reception of the first reply packet + * Ping the other end to fill our RTT cache and to retrieve the rwind + * and MTU parameters. */ -static void rxrpc_assume_implicit_ackall(struct rxrpc_call *call, u32 serial) +static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb, + int skew) { - write_lock_bh(&call->state_lock); + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + ktime_t now = skb->tstamp; + + if (call->peer->rtt_usage < 3 || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + true, true, + rxrpc_propose_ack_ping_for_params); +} + +/* + * Apply a hard ACK by advancing the Tx window. + */ +static void rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, + struct rxrpc_ack_summary *summary) +{ + struct sk_buff *skb, *list = NULL; + int ix; + u8 annotation; + + if (call->acks_lowest_nak == call->tx_hard_ack) { + call->acks_lowest_nak = to; + } else if (before_eq(call->acks_lowest_nak, to)) { + summary->new_low_nack = true; + call->acks_lowest_nak = to; + } + + spin_lock(&call->lock); + + while (before(call->tx_hard_ack, to)) { + call->tx_hard_ack++; + ix = call->tx_hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + annotation = call->rxtx_annotations[ix]; + rxrpc_see_skb(skb, rxrpc_skb_tx_rotated); + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + skb->next = list; + list = skb; + + if (annotation & RXRPC_TX_ANNO_LAST) + set_bit(RXRPC_CALL_TX_LAST, &call->flags); + if ((annotation & RXRPC_TX_ANNO_MASK) != RXRPC_TX_ANNO_ACK) + summary->nr_rot_new_acks++; + } + + spin_unlock(&call->lock); + + trace_rxrpc_transmit(call, (test_bit(RXRPC_CALL_TX_LAST, &call->flags) ? + rxrpc_transmit_rotate_last : + rxrpc_transmit_rotate)); + wake_up(&call->waitq); + + while (list) { + skb = list; + list = skb->next; + skb->next = NULL; + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + } +} + +/* + * End the transmission phase of a call. + * + * This occurs when we get an ACKALL packet, the first DATA packet of a reply, + * or a final ACK packet. + */ +static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, + const char *abort_why) +{ + + ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); + + write_lock(&call->state_lock); switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_CLIENT_AWAIT_REPLY: - call->state = RXRPC_CALL_CLIENT_RECV_REPLY; - call->acks_latest = serial; + if (reply_begun) + call->state = RXRPC_CALL_CLIENT_RECV_REPLY; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; - _debug("implicit ACKALL %%%u", call->acks_latest); - set_bit(RXRPC_CALL_EV_RCVD_ACKALL, &call->events); - write_unlock_bh(&call->state_lock); - - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_RESEND, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - } + case RXRPC_CALL_SERVER_AWAIT_ACK: + __rxrpc_call_completed(call); + rxrpc_notify_socket(call); break; default: - write_unlock_bh(&call->state_lock); - break; + goto bad_state; + } + + write_unlock(&call->state_lock); + if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, + rxrpc_propose_ack_client_tx_end); + trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); + } else { + trace_rxrpc_transmit(call, rxrpc_transmit_end); + } + _leave(" = ok"); + return true; + +bad_state: + write_unlock(&call->state_lock); + kdebug("end_tx %s", rxrpc_call_states[call->state]); + rxrpc_proto_abort(abort_why, call, call->tx_top); + return false; +} + +/* + * Begin the reply reception phase of a call. + */ +static bool rxrpc_receiving_reply(struct rxrpc_call *call) +{ + struct rxrpc_ack_summary summary = { 0 }; + rxrpc_seq_t top = READ_ONCE(call->tx_top); + + if (call->ackr_reason) { + spin_lock_bh(&call->lock); + call->ackr_reason = 0; + call->resend_at = call->expire_at; + call->ack_at = call->expire_at; + spin_unlock_bh(&call->lock); + rxrpc_set_timer(call, rxrpc_timer_init_for_reply, + ktime_get_real()); + } + + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_rotate_tx_window(call, top, &summary); + if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_proto_abort("TXL", call, top); + return false; + } + if (!rxrpc_end_tx_phase(call, true, "ETD")) + return false; + call->tx_phase = false; + return true; +} + +/* + * Scan a jumbo packet to validate its structure and to work out how many + * subpackets it contains. + * + * A jumbo packet is a collection of consecutive packets glued together with + * little headers between that indicate how to change the initial header for + * each subpacket. + * + * RXRPC_JUMBO_PACKET must be set on all but the last subpacket - and all but + * the last are RXRPC_JUMBO_DATALEN in size. The last subpacket may be of any + * size. + */ +static bool rxrpc_validate_jumbo(struct sk_buff *skb) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = skb->len; + int nr_jumbo = 1; + u8 flags = sp->hdr.flags; + + do { + nr_jumbo++; + if (len - offset < RXRPC_JUMBO_SUBPKTLEN) + goto protocol_error; + if (flags & RXRPC_LAST_PACKET) + goto protocol_error; + offset += RXRPC_JUMBO_DATALEN; + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + goto protocol_error; + offset += sizeof(struct rxrpc_jumbo_header); + } while (flags & RXRPC_JUMBO_PACKET); + + sp->nr_jumbo = nr_jumbo; + return true; + +protocol_error: + return false; +} + +/* + * Handle reception of a duplicate packet. + * + * We have to take care to avoid an attack here whereby we're given a series of + * jumbograms, each with a sequence number one before the preceding one and + * filled up to maximum UDP size. If they never send us the first packet in + * the sequence, they can cause us to have to hold on to around 2MiB of kernel + * space until the call times out. + * + * We limit the space usage by only accepting three duplicate jumbo packets per + * call. After that, we tell the other side we're no longer accepting jumbos + * (that information is encoded in the ACK packet). + */ +static void rxrpc_input_dup_data(struct rxrpc_call *call, rxrpc_seq_t seq, + u8 annotation, bool *_jumbo_bad) +{ + /* Discard normal packets that are duplicates. */ + if (annotation == 0) + return; + + /* Skip jumbo subpackets that are duplicates. When we've had three or + * more partially duplicate jumbo packets, we refuse to take any more + * jumbos for this call. + */ + if (!*_jumbo_bad) { + call->nr_jumbo_bad++; + *_jumbo_bad = true; } } /* - * post an incoming packet to the nominated call to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it + * Process a DATA packet, adding the packet to the Rx ring. */ -void rxrpc_fast_process_packet(struct rxrpc_call *call, struct sk_buff *skb) +static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int ix; + rxrpc_serial_t serial = sp->hdr.serial, ack_serial = 0; + rxrpc_seq_t seq = sp->hdr.seq, hard_ack; + bool immediate_ack = false, jumbo_bad = false, queued; + u16 len; + u8 ack = 0, flags, annotation = 0; + + _enter("{%u,%u},{%u,%u}", + call->rx_hard_ack, call->rx_top, skb->len, seq); + + _proto("Rx DATA %%%u { #%u f=%02x }", + sp->hdr.serial, seq, sp->hdr.flags); + + if (call->state >= RXRPC_CALL_COMPLETE) + return; + + /* Received data implicitly ACKs all of the request packets we sent + * when we're acting as a client. + */ + if ((call->state == RXRPC_CALL_CLIENT_SEND_REQUEST || + call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) && + !rxrpc_receiving_reply(call)) + return; + + call->ackr_prev_seq = seq; + + hard_ack = READ_ONCE(call->rx_hard_ack); + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + goto ack; + } + + flags = sp->hdr.flags; + if (flags & RXRPC_JUMBO_PACKET) { + if (call->nr_jumbo_bad > 3) { + ack = RXRPC_ACK_NOSPACE; + ack_serial = serial; + goto ack; + } + annotation = 1; + } + +next_subpacket: + queued = false; + ix = seq & RXRPC_RXTX_BUFF_MASK; + len = skb->len; + if (flags & RXRPC_JUMBO_PACKET) + len = RXRPC_JUMBO_DATALEN; + + if (flags & RXRPC_LAST_PACKET) { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + seq != call->rx_top) + return rxrpc_proto_abort("LSN", call, seq); + } else { + if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) && + after_eq(seq, call->rx_top)) + return rxrpc_proto_abort("LSA", call, seq); + } + + if (before_eq(seq, hard_ack)) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + goto skip; + } + + if (flags & RXRPC_REQUEST_ACK && !ack) { + ack = RXRPC_ACK_REQUESTED; + ack_serial = serial; + } + + if (call->rxtx_buffer[ix]) { + rxrpc_input_dup_data(call, seq, annotation, &jumbo_bad); + if (ack != RXRPC_ACK_DUPLICATE) { + ack = RXRPC_ACK_DUPLICATE; + ack_serial = serial; + } + immediate_ack = true; + goto skip; + } + + /* Queue the packet. We use a couple of memory barriers here as need + * to make sure that rx_top is perceived to be set after the buffer + * pointer and that the buffer pointer is set after the annotation and + * the skb data. + * + * Barriers against rxrpc_recvmsg_data() and rxrpc_rotate_rx_window() + * and also rxrpc_fill_out_ack(). + */ + rxrpc_get_skb(skb, rxrpc_skb_rx_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + if (after(seq, call->rx_top)) { + smp_store_release(&call->rx_top, seq); + } else if (before(seq, call->rx_top)) { + /* Send an immediate ACK if we fill in a hole */ + if (!ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + immediate_ack = true; + } + if (flags & RXRPC_LAST_PACKET) { + set_bit(RXRPC_CALL_RX_LAST, &call->flags); + trace_rxrpc_receive(call, rxrpc_receive_queue_last, serial, seq); + } else { + trace_rxrpc_receive(call, rxrpc_receive_queue, serial, seq); + } + queued = true; + + if (after_eq(seq, call->rx_expect_next)) { + if (after(seq, call->rx_expect_next)) { + _net("OOS %u > %u", seq, call->rx_expect_next); + ack = RXRPC_ACK_OUT_OF_SEQUENCE; + ack_serial = serial; + } + call->rx_expect_next = seq + 1; + } + +skip: + offset += len; + if (flags & RXRPC_JUMBO_PACKET) { + if (skb_copy_bits(skb, offset, &flags, 1) < 0) + return rxrpc_proto_abort("XJF", call, seq); + offset += sizeof(struct rxrpc_jumbo_header); + seq++; + serial++; + annotation++; + if (flags & RXRPC_JUMBO_PACKET) + annotation |= RXRPC_RX_ANNO_JLAST; + if (after(seq, hard_ack + call->rx_winsize)) { + ack = RXRPC_ACK_EXCEEDS_WINDOW; + ack_serial = serial; + if (!jumbo_bad) { + call->nr_jumbo_bad++; + jumbo_bad = true; + } + goto ack; + } + + _proto("Rx DATA Jumbo %%%u", serial); + goto next_subpacket; + } + + if (queued && flags & RXRPC_LAST_PACKET && !ack) { + ack = RXRPC_ACK_DELAY; + ack_serial = serial; + } + +ack: + if (ack) + rxrpc_propose_ACK(call, ack, skew, ack_serial, + immediate_ack, true, + rxrpc_propose_ack_input_data); + + if (sp->hdr.seq == READ_ONCE(call->rx_hard_ack) + 1) + rxrpc_notify_socket(call); + _leave(" [queued]"); +} + +/* + * Process a requested ACK. + */ +static void rxrpc_input_requested_ack(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + ktime_t sent_at; + int ix; + + for (ix = 0; ix < RXRPC_RXTX_BUFF_SIZE; ix++) { + skb = call->rxtx_buffer[ix]; + if (!skb) + continue; + + sp = rxrpc_skb(skb); + if (sp->hdr.serial != orig_serial) + continue; + smp_rmb(); + sent_at = skb->tstamp; + goto found; + } + return; + +found: + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_requested_ack, + orig_serial, ack_serial, sent_at, resp_time); +} + +/* + * Process a ping response. + */ +static void rxrpc_input_ping_response(struct rxrpc_call *call, + ktime_t resp_time, + rxrpc_serial_t orig_serial, + rxrpc_serial_t ack_serial) +{ + rxrpc_serial_t ping_serial; + ktime_t ping_time; + + ping_time = call->ackr_ping_time; + smp_rmb(); + ping_serial = call->ackr_ping; + + if (!test_bit(RXRPC_CALL_PINGING, &call->flags) || + before(orig_serial, ping_serial)) + return; + clear_bit(RXRPC_CALL_PINGING, &call->flags); + if (after(orig_serial, ping_serial)) + return; + + rxrpc_peer_add_rtt(call, rxrpc_rtt_rx_ping_response, + orig_serial, ack_serial, ping_time, resp_time); +} + +/* + * Process the extra information that may be appended to an ACK packet + */ +static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb, + struct rxrpc_ackinfo *ackinfo) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct rxrpc_peer *peer; + unsigned int mtu; + u32 rwind = ntohl(ackinfo->rwind); + + _proto("Rx ACK %%%u Info { rx=%u max=%u rwin=%u jm=%u }", + sp->hdr.serial, + ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU), + rwind, ntohl(ackinfo->jumbo_max)); + + if (rwind > RXRPC_RXTX_BUFF_SIZE - 1) + rwind = RXRPC_RXTX_BUFF_SIZE - 1; + call->tx_winsize = rwind; + if (call->cong_ssthresh > rwind) + call->cong_ssthresh = rwind; + + mtu = min(ntohl(ackinfo->rxMTU), ntohl(ackinfo->maxMTU)); + + peer = call->peer; + if (mtu < peer->maxdata) { + spin_lock_bh(&peer->lock); + peer->maxdata = mtu; + peer->mtu = mtu + peer->hdrsize; + spin_unlock_bh(&peer->lock); + _net("Net MTU %u (maxdata %u)", peer->mtu, peer->maxdata); + } +} + +/* + * Process individual soft ACKs. + * + * Each ACK in the array corresponds to one packet and can be either an ACK or + * a NAK. If we get find an explicitly NAK'd packet we resend immediately; + * packets that lie beyond the end of the ACK list are scheduled for resend by + * the timer on the basis that the peer might just not have processed them at + * the time the ACK was sent. + */ +static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, + rxrpc_seq_t seq, int nr_acks, + struct rxrpc_ack_summary *summary) +{ + int ix; + u8 annotation, anno_type; + + for (; nr_acks > 0; nr_acks--, seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + annotation = call->rxtx_annotations[ix]; + anno_type = annotation & RXRPC_TX_ANNO_MASK; + annotation &= ~RXRPC_TX_ANNO_MASK; + switch (*acks++) { + case RXRPC_ACK_TYPE_ACK: + summary->nr_acks++; + if (anno_type == RXRPC_TX_ANNO_ACK) + continue; + summary->nr_new_acks++; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_ACK | annotation; + break; + case RXRPC_ACK_TYPE_NACK: + if (!summary->nr_nacks && + call->acks_lowest_nak != seq) { + call->acks_lowest_nak = seq; + summary->new_low_nack = true; + } + summary->nr_nacks++; + if (anno_type == RXRPC_TX_ANNO_NAK) + continue; + summary->nr_new_nacks++; + if (anno_type == RXRPC_TX_ANNO_RETRANS) + continue; + call->rxtx_annotations[ix] = + RXRPC_TX_ANNO_NAK | annotation; + break; + default: + return rxrpc_proto_abort("SFT", call, 0); + } + } +} + +/* + * Process an ACK packet. + * + * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet + * in the ACK array. Anything before that is hard-ACK'd and may be discarded. + * + * A hard-ACK means that a packet has been processed and may be discarded; a + * soft-ACK means that the packet may be discarded and retransmission + * requested. A phase is complete when all packets are hard-ACK'd. + */ +static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb, + u16 skew) +{ + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + union { + struct rxrpc_ackpacket ack; + struct rxrpc_ackinfo info; + u8 acks[RXRPC_MAXACKS]; + } buf; + rxrpc_serial_t acked_serial; + rxrpc_seq_t first_soft_ack, hard_ack; + int nr_acks, offset, ioffset; + + _enter(""); + + offset = sizeof(struct rxrpc_wire_header); + if (skb_copy_bits(skb, offset, &buf.ack, sizeof(buf.ack)) < 0) { + _debug("extraction failure"); + return rxrpc_proto_abort("XAK", call, 0); + } + offset += sizeof(buf.ack); + + acked_serial = ntohl(buf.ack.serial); + first_soft_ack = ntohl(buf.ack.firstPacket); + hard_ack = first_soft_ack - 1; + nr_acks = buf.ack.nAcks; + summary.ack_reason = (buf.ack.reason < RXRPC_ACK__INVALID ? + buf.ack.reason : RXRPC_ACK__INVALID); + + trace_rxrpc_rx_ack(call, first_soft_ack, summary.ack_reason, nr_acks); + + _proto("Rx ACK %%%u { m=%hu f=#%u p=#%u s=%%%u r=%s n=%u }", + sp->hdr.serial, + ntohs(buf.ack.maxSkew), + first_soft_ack, + ntohl(buf.ack.previousPacket), + acked_serial, + rxrpc_ack_names[summary.ack_reason], + buf.ack.nAcks); + + if (buf.ack.reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_input_ping_response(call, skb->tstamp, acked_serial, + sp->hdr.serial); + if (buf.ack.reason == RXRPC_ACK_REQUESTED) + rxrpc_input_requested_ack(call, skb->tstamp, acked_serial, + sp->hdr.serial); + + if (buf.ack.reason == RXRPC_ACK_PING) { + _proto("Rx ACK %%%u PING Request", sp->hdr.serial); + rxrpc_propose_ACK(call, RXRPC_ACK_PING_RESPONSE, + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ping); + } else if (sp->hdr.flags & RXRPC_REQUEST_ACK) { + rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, + skew, sp->hdr.serial, true, true, + rxrpc_propose_ack_respond_to_ack); + } + + ioffset = offset + nr_acks + 3; + if (skb->len >= ioffset + sizeof(buf.info)) { + if (skb_copy_bits(skb, ioffset, &buf.info, sizeof(buf.info)) < 0) + return rxrpc_proto_abort("XAI", call, 0); + rxrpc_input_ackinfo(call, skb, &buf.info); + } + + if (first_soft_ack == 0) + return rxrpc_proto_abort("AK0", call, 0); + + /* Ignore ACKs unless we are or have just been transmitting. */ + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + case RXRPC_CALL_CLIENT_AWAIT_REPLY: + case RXRPC_CALL_SERVER_SEND_REPLY: + case RXRPC_CALL_SERVER_AWAIT_ACK: + break; + default: + return; + } + + /* Discard any out-of-order or duplicate ACKs. */ + if (before_eq(sp->hdr.serial, call->acks_latest)) { + _debug("discard ACK %d <= %d", + sp->hdr.serial, call->acks_latest); + return; + } + call->acks_latest_ts = skb->tstamp; + call->acks_latest = sp->hdr.serial; + + if (before(hard_ack, call->tx_hard_ack) || + after(hard_ack, call->tx_top)) + return rxrpc_proto_abort("AKW", call, 0); + if (nr_acks > call->tx_top - hard_ack) + return rxrpc_proto_abort("AKN", call, 0); + + if (after(hard_ack, call->tx_hard_ack)) + rxrpc_rotate_tx_window(call, hard_ack, &summary); + + if (nr_acks > 0) { + if (skb_copy_bits(skb, offset, buf.acks, nr_acks) < 0) + return rxrpc_proto_abort("XSA", call, 0); + rxrpc_input_soft_acks(call, buf.acks, first_soft_ack, nr_acks, + &summary); + } + + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) { + rxrpc_end_tx_phase(call, false, "ETA"); + return; + } + + if (call->rxtx_annotations[call->tx_top & RXRPC_RXTX_BUFF_MASK] & + RXRPC_TX_ANNO_LAST && + summary.nr_acks == call->tx_top - hard_ack) + rxrpc_propose_ACK(call, RXRPC_ACK_PING, skew, sp->hdr.serial, + false, true, + rxrpc_propose_ack_ping_for_lost_reply); + + return rxrpc_congestion_management(call, skb, &summary, acked_serial); +} + +/* + * Process an ACKALL packet. + */ +static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb) +{ + struct rxrpc_ack_summary summary = { 0 }; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + + _proto("Rx ACKALL %%%u", sp->hdr.serial); + + rxrpc_rotate_tx_window(call, call->tx_top, &summary); + if (test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + rxrpc_end_tx_phase(call, false, "ETL"); +} + +/* + * Process an ABORT packet. + */ +static void rxrpc_input_abort(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); __be32 wtmp; - u32 hi_serial, abort_code; + u32 abort_code = RX_CALL_DEAD; + + _enter(""); + + if (skb->len >= 4 && + skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &wtmp, sizeof(wtmp)) >= 0) + abort_code = ntohl(wtmp); + + _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); + + if (rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, + abort_code, ECONNABORTED)) + rxrpc_notify_socket(call); +} + +/* + * Process an incoming call packet. + */ +static void rxrpc_input_call_packet(struct rxrpc_call *call, + struct sk_buff *skb, u16 skew) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); _enter("%p,%p", call, skb); - ASSERT(!irqs_disabled()); - -#if 0 // INJECT RX ERROR - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - static int skip = 0; - if (++skip == 3) { - printk("DROPPED 3RD PACKET!!!!!!!!!!!!!\n"); - skip = 0; - goto free_packet; - } - } -#endif - - /* track the latest serial number on this connection for ACK packet - * information */ - hi_serial = atomic_read(&call->conn->hi_serial); - while (sp->hdr.serial > hi_serial) - hi_serial = atomic_cmpxchg(&call->conn->hi_serial, hi_serial, - sp->hdr.serial); - - /* request ACK generation for any ACK or DATA packet that requests - * it */ - if (sp->hdr.flags & RXRPC_REQUEST_ACK) { - _proto("ACK Requested on %%%u", sp->hdr.serial); - rxrpc_propose_ACK(call, RXRPC_ACK_REQUESTED, sp->hdr.serial, false); - } - switch (sp->hdr.type) { - case RXRPC_PACKET_TYPE_ABORT: - _debug("abort"); + case RXRPC_PACKET_TYPE_DATA: + rxrpc_input_data(call, skb, skew); + break; - if (skb_copy_bits(skb, 0, &wtmp, sizeof(wtmp)) < 0) - goto protocol_error; - - abort_code = ntohl(wtmp); - _proto("Rx ABORT %%%u { %x }", sp->hdr.serial, abort_code); - - write_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_REMOTELY_ABORTED; - call->remote_abort = abort_code; - set_bit(RXRPC_CALL_EV_RCVD_ABORT, &call->events); - rxrpc_queue_call(call); - } - goto free_packet_unlock; + case RXRPC_PACKET_TYPE_ACK: + rxrpc_input_ack(call, skb, skew); + break; case RXRPC_PACKET_TYPE_BUSY: _proto("Rx BUSY %%%u", sp->hdr.serial); - if (rxrpc_conn_is_service(call->conn)) - goto protocol_error; + /* Just ignore BUSY packets from the server; the retry and + * lifespan timers will take care of business. BUSY packets + * from the client don't make sense. + */ + break; - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_SERVER_BUSY; - set_bit(RXRPC_CALL_EV_RCVD_BUSY, &call->events); - rxrpc_queue_call(call); - case RXRPC_CALL_SERVER_BUSY: - goto free_packet_unlock; - default: - goto protocol_error_locked; - } + case RXRPC_PACKET_TYPE_ABORT: + rxrpc_input_abort(call, skb); + break; + + case RXRPC_PACKET_TYPE_ACKALL: + rxrpc_input_ackall(call, skb); + break; default: _proto("Rx %s %%%u", rxrpc_pkts[sp->hdr.type], sp->hdr.serial); - goto protocol_error; - - case RXRPC_PACKET_TYPE_DATA: - _proto("Rx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - if (sp->hdr.seq == 0) - goto protocol_error; - - call->ackr_prev_seq = sp->hdr.seq; - - /* received data implicitly ACKs all of the request packets we - * sent when we're acting as a client */ - if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) - rxrpc_assume_implicit_ackall(call, sp->hdr.serial); - - switch (rxrpc_fast_process_data(call, skb, sp->hdr.seq)) { - case 0: - skb = NULL; - goto done; - - default: - BUG(); - - /* data packet received beyond the last packet */ - case -EBADMSG: - goto protocol_error; - } - - case RXRPC_PACKET_TYPE_ACKALL: - case RXRPC_PACKET_TYPE_ACK: - /* ACK processing is done in process context */ - read_lock_bh(&call->state_lock); - if (call->state < RXRPC_CALL_DEAD) { - skb_queue_tail(&call->rx_queue, skb); - rxrpc_queue_call(call); - skb = NULL; - } - read_unlock_bh(&call->state_lock); - goto free_packet; - } - -protocol_error: - _debug("protocol error"); - write_lock_bh(&call->state_lock); -protocol_error_locked: - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } -free_packet_unlock: - write_unlock_bh(&call->state_lock); -free_packet: - rxrpc_free_skb(skb); -done: - _leave(""); -} - -/* - * split up a jumbo data packet - */ -static void rxrpc_process_jumbo_packet(struct rxrpc_call *call, - struct sk_buff *jumbo) -{ - struct rxrpc_jumbo_header jhdr; - struct rxrpc_skb_priv *sp; - struct sk_buff *part; - - _enter(",{%u,%u}", jumbo->data_len, jumbo->len); - - sp = rxrpc_skb(jumbo); - - do { - sp->hdr.flags &= ~RXRPC_JUMBO_PACKET; - - /* make a clone to represent the first subpacket in what's left - * of the jumbo packet */ - part = skb_clone(jumbo, GFP_ATOMIC); - if (!part) { - /* simply ditch the tail in the event of ENOMEM */ - pskb_trim(jumbo, RXRPC_JUMBO_DATALEN); - break; - } - rxrpc_new_skb(part); - - pskb_trim(part, RXRPC_JUMBO_DATALEN); - - if (!pskb_pull(jumbo, RXRPC_JUMBO_DATALEN)) - goto protocol_error; - - if (skb_copy_bits(jumbo, 0, &jhdr, sizeof(jhdr)) < 0) - goto protocol_error; - if (!pskb_pull(jumbo, sizeof(jhdr))) - BUG(); - - sp->hdr.seq += 1; - sp->hdr.serial += 1; - sp->hdr.flags = jhdr.flags; - sp->hdr._rsvd = ntohs(jhdr._rsvd); - - _proto("Rx DATA Jumbo %%%u", sp->hdr.serial - 1); - - rxrpc_fast_process_packet(call, part); - part = NULL; - - } while (sp->hdr.flags & RXRPC_JUMBO_PACKET); - - rxrpc_fast_process_packet(call, jumbo); - _leave(""); - return; - -protocol_error: - _debug("protocol error"); - rxrpc_free_skb(part); - rxrpc_free_skb(jumbo); - write_lock_bh(&call->state_lock); - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = RX_PROTOCOL_ERROR; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - rxrpc_queue_call(call); - } - write_unlock_bh(&call->state_lock); - _leave(""); -} - -/* - * post an incoming packet to the appropriate call/socket to deal with - * - must get rid of the sk_buff, either by freeing it or by queuing it - */ -static void rxrpc_post_packet_to_call(struct rxrpc_call *call, - struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp; - - _enter("%p,%p", call, skb); - - sp = rxrpc_skb(skb); - - _debug("extant call [%d]", call->state); - - read_lock(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_LOCALLY_ABORTED: - if (!test_and_set_bit(RXRPC_CALL_EV_ABORT, &call->events)) { - rxrpc_queue_call(call); - goto free_unlock; - } - case RXRPC_CALL_REMOTELY_ABORTED: - case RXRPC_CALL_NETWORK_ERROR: - case RXRPC_CALL_DEAD: - goto dead_call; - case RXRPC_CALL_COMPLETE: - case RXRPC_CALL_CLIENT_FINAL_ACK: - /* complete server call */ - if (rxrpc_conn_is_service(call->conn)) - goto dead_call; - /* resend last packet of a completed call */ - _debug("final ack again"); - rxrpc_get_call(call); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - rxrpc_queue_call(call); - goto free_unlock; - default: break; } - read_unlock(&call->state_lock); - rxrpc_get_call(call); - - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - sp->hdr.flags & RXRPC_JUMBO_PACKET) - rxrpc_process_jumbo_packet(call, skb); - else - rxrpc_fast_process_packet(call, skb); - - rxrpc_put_call(call); - goto done; - -dead_call: - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - skb->priority = RX_CALL_DEAD; - rxrpc_reject_packet(call->conn->params.local, skb); - goto unlock; - } -free_unlock: - rxrpc_free_skb(skb); -unlock: - read_unlock(&call->state_lock); -done: _leave(""); } /* * post connection-level events to the connection - * - this includes challenges, responses and some aborts + * - this includes challenges, responses, some aborts and call terminal packet + * retransmission. */ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn, struct sk_buff *skb) @@ -594,6 +964,17 @@ static void rxrpc_post_packet_to_local(struct rxrpc_local *local, rxrpc_queue_local(local); } +/* + * put a packet up for transport-level abort + */ +static void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb) +{ + CHECK_SLAB_OKAY(&local->usage); + + skb_queue_tail(&local->reject_queue, skb); + rxrpc_queue_local(local); +} + /* * Extract the wire header from a packet and translate the byte order. */ @@ -605,8 +986,6 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) /* dig out the RxRPC connection details */ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) return -EBADMSG; - if (!pskb_pull(skb, sizeof(whdr))) - BUG(); memset(sp, 0, sizeof(*sp)); sp->hdr.epoch = ntohl(whdr.epoch); @@ -631,19 +1010,22 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb) * shut down and the local endpoint from going away, thus sk_user_data will not * be cleared until this function returns. */ -void rxrpc_data_ready(struct sock *sk) +void rxrpc_data_ready(struct sock *udp_sk) { struct rxrpc_connection *conn; + struct rxrpc_channel *chan; + struct rxrpc_call *call; struct rxrpc_skb_priv *sp; - struct rxrpc_local *local = sk->sk_user_data; + struct rxrpc_local *local = udp_sk->sk_user_data; struct sk_buff *skb; - int ret; + unsigned int channel; + int ret, skew; - _enter("%p", sk); + _enter("%p", udp_sk); ASSERT(!irqs_disabled()); - skb = skb_recv_datagram(sk, 0, 1, &ret); + skb = skb_recv_datagram(udp_sk, 0, 1, &ret); if (!skb) { if (ret == -EAGAIN) return; @@ -651,13 +1033,13 @@ void rxrpc_data_ready(struct sock *sk) return; } - rxrpc_new_skb(skb); + rxrpc_new_skb(skb, rxrpc_skb_rx_received); _net("recv skb %p", skb); /* we'll probably need to checksum it (didn't call sock_recvmsg) */ if (skb_checksum_complete(skb)) { - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); __UDP_INC_STATS(&init_net, UDP_MIB_INERRORS, 0); _leave(" [CSUM failed]"); return; @@ -671,13 +1053,21 @@ void rxrpc_data_ready(struct sock *sk) skb_orphan(skb); sp = rxrpc_skb(skb); - _net("Rx UDP packet from %08x:%04hu", - ntohl(ip_hdr(skb)->saddr), ntohs(udp_hdr(skb)->source)); - /* dig out the RxRPC connection details */ if (rxrpc_extract_header(sp, skb) < 0) goto bad_message; + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + trace_rxrpc_rx_lose(sp); + rxrpc_lose_skb(skb, rxrpc_skb_rx_lost); + return; + } + } + + trace_rxrpc_rx_packet(sp); + _net("Rx RxRPC %s ep=%x call=%x:%x", sp->hdr.flags & RXRPC_CLIENT_INITIATED ? "ToServer" : "ToClient", sp->hdr.epoch, sp->hdr.cid, sp->hdr.callNumber); @@ -688,70 +1078,125 @@ void rxrpc_data_ready(struct sock *sk) goto bad_message; } - if (sp->hdr.type == RXRPC_PACKET_TYPE_VERSION) { + switch (sp->hdr.type) { + case RXRPC_PACKET_TYPE_VERSION: rxrpc_post_packet_to_local(local, skb); goto out; - } - if (sp->hdr.type == RXRPC_PACKET_TYPE_DATA && - (sp->hdr.callNumber == 0 || sp->hdr.seq == 0)) - goto bad_message; + case RXRPC_PACKET_TYPE_BUSY: + if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) + goto discard; + + case RXRPC_PACKET_TYPE_DATA: + if (sp->hdr.callNumber == 0) + goto bad_message; + if (sp->hdr.flags & RXRPC_JUMBO_PACKET && + !rxrpc_validate_jumbo(skb)) + goto bad_message; + break; + } rcu_read_lock(); conn = rxrpc_find_connection_rcu(local, skb); - if (!conn) - goto cant_route_call; + if (conn) { + if (sp->hdr.securityIndex != conn->security_ix) + goto wrong_security; - if (sp->hdr.callNumber == 0) { - /* Connection-level packet */ - _debug("CONN %p {%d}", conn, conn->debug_id); - rxrpc_post_packet_to_conn(conn, skb); - } else { - /* Call-bound packets are routed by connection channel. */ - unsigned int channel = sp->hdr.cid & RXRPC_CHANNELMASK; - struct rxrpc_channel *chan = &conn->channels[channel]; - struct rxrpc_call *call = rcu_dereference(chan->call); - - if (!call || atomic_read(&call->usage) == 0) - goto cant_route_call; - - rxrpc_post_packet_to_call(call, skb); - } - - rcu_read_unlock(); -out: - return; - -cant_route_call: - rcu_read_unlock(); - - _debug("can't route call"); - if (sp->hdr.flags & RXRPC_CLIENT_INITIATED && - sp->hdr.type == RXRPC_PACKET_TYPE_DATA) { - if (sp->hdr.seq == 1) { - _debug("first packet"); - skb_queue_tail(&local->accept_queue, skb); - rxrpc_queue_work(&local->processor); - _leave(" [incoming]"); - return; + if (sp->hdr.callNumber == 0) { + /* Connection-level packet */ + _debug("CONN %p {%d}", conn, conn->debug_id); + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; } - skb->priority = RX_INVALID_OPERATION; + + /* Note the serial number skew here */ + skew = (int)sp->hdr.serial - (int)conn->hi_serial; + if (skew >= 0) { + if (skew > 0) + conn->hi_serial = sp->hdr.serial; + } else { + skew = -skew; + skew = min(skew, 65535); + } + + /* Call-bound packets are routed by connection channel. */ + channel = sp->hdr.cid & RXRPC_CHANNELMASK; + chan = &conn->channels[channel]; + + /* Ignore really old calls */ + if (sp->hdr.callNumber < chan->last_call) + goto discard_unlock; + + if (sp->hdr.callNumber == chan->last_call) { + /* For the previous service call, if completed successfully, we + * discard all further packets. + */ + if (rxrpc_conn_is_service(conn) && + (chan->last_type == RXRPC_PACKET_TYPE_ACK || + sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)) + goto discard_unlock; + + /* But otherwise we need to retransmit the final packet from + * data cached in the connection record. + */ + rxrpc_post_packet_to_conn(conn, skb); + goto out_unlock; + } + + call = rcu_dereference(chan->call); } else { - skb->priority = RX_CALL_DEAD; + skew = 0; + call = NULL; } - if (sp->hdr.type != RXRPC_PACKET_TYPE_ABORT) { - _debug("reject type %d",sp->hdr.type); - rxrpc_reject_packet(local, skb); - } else { - rxrpc_free_skb(skb); + if (!call || atomic_read(&call->usage) == 0) { + if (!(sp->hdr.type & RXRPC_CLIENT_INITIATED) || + sp->hdr.callNumber == 0 || + sp->hdr.type != RXRPC_PACKET_TYPE_DATA) + goto bad_message_unlock; + if (sp->hdr.seq != 1) + goto discard_unlock; + call = rxrpc_new_incoming_call(local, conn, skb); + if (!call) { + rcu_read_unlock(); + goto reject_packet; + } + rxrpc_send_ping(call, skb, skew); } - _leave(" [no call]"); + + rxrpc_input_call_packet(call, skb, skew); + goto discard_unlock; + +discard_unlock: + rcu_read_unlock(); +discard: + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); +out: + trace_rxrpc_rx_done(0, 0); return; +out_unlock: + rcu_read_unlock(); + goto out; + +wrong_security: + rcu_read_unlock(); + trace_rxrpc_abort("SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RXKADINCONSISTENCY, EBADMSG); + skb->priority = RXKADINCONSISTENCY; + goto post_abort; + +bad_message_unlock: + rcu_read_unlock(); bad_message: + trace_rxrpc_abort("BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq, + RX_PROTOCOL_ERROR, EBADMSG); skb->priority = RX_PROTOCOL_ERROR; +post_abort: + skb->mark = RXRPC_SKB_MARK_LOCAL_ABORT; +reject_packet: + trace_rxrpc_rx_done(skb->mark, skb->priority); rxrpc_reject_packet(local, skb); _leave(" [badmsg]"); } diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c index c21ad213b337..7d4375e557e6 100644 --- a/net/rxrpc/insecure.c +++ b/net/rxrpc/insecure.c @@ -23,31 +23,36 @@ static int none_prime_packet_security(struct rxrpc_connection *conn) } static int none_secure_packet(struct rxrpc_call *call, - struct sk_buff *skb, - size_t data_size, - void *sechdr) + struct sk_buff *skb, + size_t data_size, + void *sechdr) { return 0; } -static int none_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int none_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { return 0; } +static void none_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ +} + static int none_respond_to_challenge(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; } static int none_verify_response(struct rxrpc_connection *conn, - struct sk_buff *skb, - u32 *_abort_code) + struct sk_buff *skb, + u32 *_abort_code) { *_abort_code = RX_PROTOCOL_ERROR; return -EPROTO; @@ -78,6 +83,7 @@ const struct rxrpc_security rxrpc_no_security = { .prime_packet_security = none_prime_packet_security, .secure_packet = none_secure_packet, .verify_packet = none_verify_packet, + .locate_data = none_locate_data, .respond_to_challenge = none_respond_to_challenge, .verify_response = none_verify_response, .clear = none_clear, diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 31a3f86ef2f6..540d3955c1bc 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -15,8 +15,6 @@ #include #include #include -#include -#include #include #include #include @@ -33,7 +31,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, { struct rxrpc_wire_header whdr; struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct sockaddr_in sin; + struct sockaddr_rxrpc srx; struct msghdr msg; struct kvec iov[2]; size_t len; @@ -41,12 +39,11 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - sin.sin_family = AF_INET; - sin.sin_port = udp_hdr(skb)->source; - sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + return; - msg.msg_name = &sin; - msg.msg_namelen = sizeof(sin); + msg.msg_name = &srx.transport; + msg.msg_namelen = srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -93,11 +90,13 @@ void rxrpc_process_local_events(struct rxrpc_local *local) if (skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); _debug("{%d},{%u}", local->debug_id, sp->hdr.type); switch (sp->hdr.type) { case RXRPC_PACKET_TYPE_VERSION: - if (skb_copy_bits(skb, 0, &v, 1) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &v, 1) < 0) return; _proto("Rx VERSION { %02x }", v); if (v == 0) @@ -109,7 +108,7 @@ void rxrpc_process_local_events(struct rxrpc_local *local) break; } - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index a753796fbe8f..ff4864d550b8 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -58,6 +58,17 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local, memcmp(&local->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + /* If the choice of UDP6 port is left up to the transport, then + * the endpoint record doesn't match. + */ + return ((u16 __force)local->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&local->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -75,9 +86,7 @@ static struct rxrpc_local *rxrpc_alloc_local(const struct sockaddr_rxrpc *srx) atomic_set(&local->usage, 1); INIT_LIST_HEAD(&local->link); INIT_WORK(&local->processor, rxrpc_local_processor); - INIT_LIST_HEAD(&local->services); init_rwsem(&local->defrag_sem); - skb_queue_head_init(&local->accept_queue); skb_queue_head_init(&local->reject_queue); skb_queue_head_init(&local->event_queue); local->client_conns = RB_ROOT; @@ -101,11 +110,12 @@ static int rxrpc_open_socket(struct rxrpc_local *local) struct sock *sock; int ret, opt; - _enter("%p{%d}", local, local->srx.transport_type); + _enter("%p{%d,%d}", + local, local->srx.transport_type, local->srx.transport.family); /* create a socket to represent the local endpoint */ - ret = sock_create_kern(&init_net, PF_INET, local->srx.transport_type, - IPPROTO_UDP, &local->socket); + ret = sock_create_kern(&init_net, local->srx.transport.family, + local->srx.transport_type, 0, &local->socket); if (ret < 0) { _leave(" = %d [socket]", ret); return ret; @@ -170,18 +180,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) long diff; int ret; - if (srx->transport.family == AF_INET) { - _enter("{%d,%u,%pI4+%hu}", - srx->transport_type, - srx->transport.family, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); - } else { - _enter("{%d,%u}", - srx->transport_type, - srx->transport.family); - return ERR_PTR(-EAFNOSUPPORT); - } + _enter("{%d,%d,%pISp}", + srx->transport_type, srx->transport.family, &srx->transport); mutex_lock(&rxrpc_local_mutex); @@ -234,13 +234,8 @@ struct rxrpc_local *rxrpc_lookup_local(const struct sockaddr_rxrpc *srx) found: mutex_unlock(&rxrpc_local_mutex); - _net("LOCAL %s %d {%d,%u,%pI4+%hu}", - age, - local->debug_id, - local->srx.transport_type, - local->srx.transport.family, - &local->srx.transport.sin.sin_addr, - ntohs(local->srx.transport.sin.sin_port)); + _net("LOCAL %s %d {%pISp}", + age, local->debug_id, &local->srx.transport); _leave(" = %p", local); return local; @@ -296,7 +291,7 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) mutex_unlock(&rxrpc_local_mutex); ASSERT(RB_EMPTY_ROOT(&local->client_conns)); - ASSERT(list_empty(&local->services)); + ASSERT(!local->service); if (socket) { local->socket = NULL; @@ -308,7 +303,6 @@ static void rxrpc_local_destroyer(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ - rxrpc_purge_queue(&local->accept_queue); rxrpc_purge_queue(&local->reject_queue); rxrpc_purge_queue(&local->event_queue); @@ -332,11 +326,6 @@ static void rxrpc_local_processor(struct work_struct *work) if (atomic_read(&local->usage) == 0) return rxrpc_local_destroyer(local); - if (!skb_queue_empty(&local->accept_queue)) { - rxrpc_accept_incoming_calls(local); - again = true; - } - if (!skb_queue_empty(&local->reject_queue)) { rxrpc_reject_packets(local); again = true; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index bdc5e42fe600..9d1c721bc4e8 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -20,29 +20,34 @@ */ unsigned int rxrpc_max_backlog __read_mostly = 10; +/* + * Maximum lifetime of a call (in mx). + */ +unsigned int rxrpc_max_call_lifetime = 60 * 1000; + /* * How long to wait before scheduling ACK generation after seeing a - * packet with RXRPC_REQUEST_ACK set (in jiffies). + * packet with RXRPC_REQUEST_ACK set (in ms). */ unsigned int rxrpc_requested_ack_delay = 1; /* - * How long to wait before scheduling an ACK with subtype DELAY (in jiffies). + * How long to wait before scheduling an ACK with subtype DELAY (in ms). * * We use this when we've received new data packets. If those packets aren't * all consumed within this time we will send a DELAY ACK if an ACK was not * requested to let the sender know it doesn't need to resend. */ -unsigned int rxrpc_soft_ack_delay = 1 * HZ; +unsigned int rxrpc_soft_ack_delay = 1 * 1000; /* - * How long to wait before scheduling an ACK with subtype IDLE (in jiffies). + * How long to wait before scheduling an ACK with subtype IDLE (in ms). * * We use this when we've consumed some previously soft-ACK'd packets when * further packets aren't immediately received to decide when to send an IDLE * ACK let the other end know that it can free up its Tx buffer space. */ -unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; +unsigned int rxrpc_idle_ack_delay = 0.5 * 1000; /* * Receive window size in packets. This indicates the maximum number of @@ -50,7 +55,10 @@ unsigned int rxrpc_idle_ack_delay = 0.5 * HZ; * limit is hit, we should generate an EXCEEDS_WINDOW ACK and discard further * packets. */ -unsigned int rxrpc_rx_window_size = 32; +unsigned int rxrpc_rx_window_size = RXRPC_INIT_RX_WINDOW_SIZE; +#if (RXRPC_RXTX_BUFF_SIZE - 1) < RXRPC_INIT_RX_WINDOW_SIZE +#error Need to reduce RXRPC_INIT_RX_WINDOW_SIZE +#endif /* * Maximum Rx MTU size. This indicates to the sender the size of jumbo packet @@ -64,6 +72,11 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; +/* + * Time till packet resend (in milliseconds). + */ +unsigned int rxrpc_resend_timeout = 4 * 1000; + const char *const rxrpc_pkts[] = { "?00", "DATA", "ACK", "BUSY", "ABORT", "ACKALL", "CHALL", "RESP", "DEBUG", @@ -75,21 +88,152 @@ const s8 rxrpc_ack_priority[] = { [RXRPC_ACK_DELAY] = 1, [RXRPC_ACK_REQUESTED] = 2, [RXRPC_ACK_IDLE] = 3, - [RXRPC_ACK_PING_RESPONSE] = 4, - [RXRPC_ACK_DUPLICATE] = 5, - [RXRPC_ACK_OUT_OF_SEQUENCE] = 6, - [RXRPC_ACK_EXCEEDS_WINDOW] = 7, - [RXRPC_ACK_NOSPACE] = 8, + [RXRPC_ACK_DUPLICATE] = 4, + [RXRPC_ACK_OUT_OF_SEQUENCE] = 5, + [RXRPC_ACK_EXCEEDS_WINDOW] = 6, + [RXRPC_ACK_NOSPACE] = 7, + [RXRPC_ACK_PING_RESPONSE] = 8, + [RXRPC_ACK_PING] = 9, }; -const char *rxrpc_acks(u8 reason) -{ - static const char *const str[] = { - "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", - "IDL", "-?-" - }; +const char const rxrpc_ack_names[RXRPC_ACK__INVALID + 1][4] = { + "---", "REQ", "DUP", "OOS", "WIN", "MEM", "PNG", "PNR", "DLY", + "IDL", "-?-" +}; - if (reason >= ARRAY_SIZE(str)) - reason = ARRAY_SIZE(str) - 1; - return str[reason]; -} +const char rxrpc_skb_traces[rxrpc_skb__nr_trace][7] = { + [rxrpc_skb_rx_cleaned] = "Rx CLN", + [rxrpc_skb_rx_freed] = "Rx FRE", + [rxrpc_skb_rx_got] = "Rx GOT", + [rxrpc_skb_rx_lost] = "Rx *L*", + [rxrpc_skb_rx_received] = "Rx RCV", + [rxrpc_skb_rx_purged] = "Rx PUR", + [rxrpc_skb_rx_rotated] = "Rx ROT", + [rxrpc_skb_rx_seen] = "Rx SEE", + [rxrpc_skb_tx_cleaned] = "Tx CLN", + [rxrpc_skb_tx_freed] = "Tx FRE", + [rxrpc_skb_tx_got] = "Tx GOT", + [rxrpc_skb_tx_new] = "Tx NEW", + [rxrpc_skb_tx_rotated] = "Tx ROT", + [rxrpc_skb_tx_seen] = "Tx SEE", +}; + +const char rxrpc_conn_traces[rxrpc_conn__nr_trace][4] = { + [rxrpc_conn_new_client] = "NWc", + [rxrpc_conn_new_service] = "NWs", + [rxrpc_conn_queued] = "QUE", + [rxrpc_conn_seen] = "SEE", + [rxrpc_conn_got] = "GOT", + [rxrpc_conn_put_client] = "PTc", + [rxrpc_conn_put_service] = "PTs", +}; + +const char rxrpc_client_traces[rxrpc_client__nr_trace][7] = { + [rxrpc_client_activate_chans] = "Activa", + [rxrpc_client_alloc] = "Alloc ", + [rxrpc_client_chan_activate] = "ChActv", + [rxrpc_client_chan_disconnect] = "ChDisc", + [rxrpc_client_chan_pass] = "ChPass", + [rxrpc_client_chan_unstarted] = "ChUnst", + [rxrpc_client_cleanup] = "Clean ", + [rxrpc_client_count] = "Count ", + [rxrpc_client_discard] = "Discar", + [rxrpc_client_duplicate] = "Duplic", + [rxrpc_client_exposed] = "Expose", + [rxrpc_client_replace] = "Replac", + [rxrpc_client_to_active] = "->Actv", + [rxrpc_client_to_culled] = "->Cull", + [rxrpc_client_to_idle] = "->Idle", + [rxrpc_client_to_inactive] = "->Inac", + [rxrpc_client_to_waiting] = "->Wait", + [rxrpc_client_uncount] = "Uncoun", +}; + +const char rxrpc_transmit_traces[rxrpc_transmit__nr_trace][4] = { + [rxrpc_transmit_wait] = "WAI", + [rxrpc_transmit_queue] = "QUE", + [rxrpc_transmit_queue_last] = "QLS", + [rxrpc_transmit_rotate] = "ROT", + [rxrpc_transmit_rotate_last] = "RLS", + [rxrpc_transmit_await_reply] = "AWR", + [rxrpc_transmit_end] = "END", +}; + +const char rxrpc_receive_traces[rxrpc_receive__nr_trace][4] = { + [rxrpc_receive_incoming] = "INC", + [rxrpc_receive_queue] = "QUE", + [rxrpc_receive_queue_last] = "QLS", + [rxrpc_receive_front] = "FRN", + [rxrpc_receive_rotate] = "ROT", + [rxrpc_receive_end] = "END", +}; + +const char rxrpc_recvmsg_traces[rxrpc_recvmsg__nr_trace][5] = { + [rxrpc_recvmsg_enter] = "ENTR", + [rxrpc_recvmsg_wait] = "WAIT", + [rxrpc_recvmsg_dequeue] = "DEQU", + [rxrpc_recvmsg_hole] = "HOLE", + [rxrpc_recvmsg_next] = "NEXT", + [rxrpc_recvmsg_cont] = "CONT", + [rxrpc_recvmsg_full] = "FULL", + [rxrpc_recvmsg_data_return] = "DATA", + [rxrpc_recvmsg_terminal] = "TERM", + [rxrpc_recvmsg_to_be_accepted] = "TBAC", + [rxrpc_recvmsg_return] = "RETN", +}; + +const char rxrpc_rtt_tx_traces[rxrpc_rtt_tx__nr_trace][5] = { + [rxrpc_rtt_tx_ping] = "PING", + [rxrpc_rtt_tx_data] = "DATA", +}; + +const char rxrpc_rtt_rx_traces[rxrpc_rtt_rx__nr_trace][5] = { + [rxrpc_rtt_rx_ping_response] = "PONG", + [rxrpc_rtt_rx_requested_ack] = "RACK", +}; + +const char rxrpc_timer_traces[rxrpc_timer__nr_trace][8] = { + [rxrpc_timer_begin] = "Begin ", + [rxrpc_timer_expired] = "*EXPR*", + [rxrpc_timer_init_for_reply] = "IniRpl", + [rxrpc_timer_set_for_ack] = "SetAck", + [rxrpc_timer_set_for_send] = "SetTx ", + [rxrpc_timer_set_for_resend] = "SetRTx", +}; + +const char rxrpc_propose_ack_traces[rxrpc_propose_ack__nr_trace][8] = { + [rxrpc_propose_ack_client_tx_end] = "ClTxEnd", + [rxrpc_propose_ack_input_data] = "DataIn ", + [rxrpc_propose_ack_ping_for_lost_ack] = "LostAck", + [rxrpc_propose_ack_ping_for_lost_reply] = "LostRpl", + [rxrpc_propose_ack_ping_for_params] = "Params ", + [rxrpc_propose_ack_respond_to_ack] = "Rsp2Ack", + [rxrpc_propose_ack_respond_to_ping] = "Rsp2Png", + [rxrpc_propose_ack_retry_tx] = "RetryTx", + [rxrpc_propose_ack_rotate_rx] = "RxAck ", + [rxrpc_propose_ack_terminal_ack] = "ClTerm ", +}; + +const char *const rxrpc_propose_ack_outcomes[rxrpc_propose_ack__nr_outcomes] = { + [rxrpc_propose_ack_use] = "", + [rxrpc_propose_ack_update] = " Update", + [rxrpc_propose_ack_subsume] = " Subsume", +}; + +const char rxrpc_congest_modes[NR__RXRPC_CONGEST_MODES][10] = { + [RXRPC_CALL_SLOW_START] = "SlowStart", + [RXRPC_CALL_CONGEST_AVOIDANCE] = "CongAvoid", + [RXRPC_CALL_PACKET_LOSS] = "PktLoss ", + [RXRPC_CALL_FAST_RETRANSMIT] = "FastReTx ", +}; + +const char rxrpc_congest_changes[rxrpc_congest__nr_change][9] = { + [rxrpc_cong_begin_retransmission] = " Retrans", + [rxrpc_cong_cleared_nacks] = " Cleared", + [rxrpc_cong_new_low_nack] = " NewLowN", + [rxrpc_cong_no_change] = "", + [rxrpc_cong_progress] = " Progres", + [rxrpc_cong_retransmit_again] = " ReTxAgn", + [rxrpc_cong_rtt_window_end] = " RttWinE", + [rxrpc_cong_saw_nack] = " SawNack", +}; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index f4bda06b7d2d..0d47db886f6e 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -14,336 +14,326 @@ #include #include #include -#include #include #include #include #include "ar-internal.h" -/* - * Time till packet resend (in jiffies). - */ -unsigned int rxrpc_resend_timeout = 4 * HZ; - -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len); +struct rxrpc_pkt_buffer { + struct rxrpc_wire_header whdr; + union { + struct { + struct rxrpc_ackpacket ack; + u8 acks[255]; + u8 pad[3]; + }; + __be32 abort_code; + }; + struct rxrpc_ackinfo ackinfo; +}; /* - * extract control messages from the sendmsg() control buffer + * Fill out an ACK packet. */ -static int rxrpc_sendmsg_cmsg(struct msghdr *msg, - unsigned long *user_call_ID, - enum rxrpc_command *command, - u32 *abort_code, - bool *_exclusive) +static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, + struct rxrpc_pkt_buffer *pkt, + rxrpc_seq_t *_hard_ack, + rxrpc_seq_t *_top) { - struct cmsghdr *cmsg; - bool got_user_ID = false; - int len; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top, seq; + int ix; + u32 mtu, jmax; + u8 *ackp = pkt->acks; - *command = RXRPC_CMD_SEND_DATA; + /* Barrier against rxrpc_input_data(). */ + serial = call->ackr_serial; + hard_ack = READ_ONCE(call->rx_hard_ack); + top = smp_load_acquire(&call->rx_top); + *_hard_ack = hard_ack; + *_top = top; - if (msg->msg_controllen == 0) - return -EINVAL; + pkt->ack.bufferSpace = htons(8); + pkt->ack.maxSkew = htons(call->ackr_skew); + pkt->ack.firstPacket = htonl(hard_ack + 1); + pkt->ack.previousPacket = htonl(call->ackr_prev_seq); + pkt->ack.serial = htonl(serial); + pkt->ack.reason = call->ackr_reason; + pkt->ack.nAcks = top - hard_ack; - for_each_cmsghdr(cmsg, msg) { - if (!CMSG_OK(msg, cmsg)) - return -EINVAL; + if (pkt->ack.reason == RXRPC_ACK_PING) + pkt->whdr.flags |= RXRPC_REQUEST_ACK; - len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); - _debug("CMSG %d, %d, %d", - cmsg->cmsg_level, cmsg->cmsg_type, len); + if (after(top, hard_ack)) { + seq = hard_ack + 1; + do { + ix = seq & RXRPC_RXTX_BUFF_MASK; + if (call->rxtx_buffer[ix]) + *ackp++ = RXRPC_ACK_TYPE_ACK; + else + *ackp++ = RXRPC_ACK_TYPE_NACK; + seq++; + } while (before_eq(seq, top)); + } - if (cmsg->cmsg_level != SOL_RXRPC) - continue; + mtu = call->conn->params.peer->if_mtu; + mtu -= call->conn->params.peer->hdrsize; + jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; + pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); + pkt->ackinfo.maxMTU = htonl(mtu); + pkt->ackinfo.rwind = htonl(call->rx_winsize); + pkt->ackinfo.jumbo_max = htonl(jmax); - switch (cmsg->cmsg_type) { - case RXRPC_USER_CALL_ID: - if (msg->msg_flags & MSG_CMSG_COMPAT) { - if (len != sizeof(u32)) - return -EINVAL; - *user_call_ID = *(u32 *) CMSG_DATA(cmsg); - } else { - if (len != sizeof(unsigned long)) - return -EINVAL; - *user_call_ID = *(unsigned long *) - CMSG_DATA(cmsg); - } - _debug("User Call ID %lx", *user_call_ID); - got_user_ID = true; - break; + *ackp++ = 0; + *ackp++ = 0; + *ackp++ = 0; + return top - hard_ack + 3; +} - case RXRPC_ABORT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_SEND_ABORT; - if (len != sizeof(*abort_code)) - return -EINVAL; - *abort_code = *(unsigned int *) CMSG_DATA(cmsg); - _debug("Abort %x", *abort_code); - if (*abort_code == 0) - return -EINVAL; - break; +/* + * Send an ACK or ABORT call packet. + */ +int rxrpc_send_call_packet(struct rxrpc_call *call, u8 type) +{ + struct rxrpc_connection *conn = NULL; + struct rxrpc_pkt_buffer *pkt; + struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + size_t len, n; + bool ping = false; + int ioc, ret; + u32 abort_code; - case RXRPC_ACCEPT: - if (*command != RXRPC_CMD_SEND_DATA) - return -EINVAL; - *command = RXRPC_CMD_ACCEPT; - if (len != 0) - return -EINVAL; - break; + _enter("%u,%s", call->debug_id, rxrpc_pkts[type]); - case RXRPC_EXCLUSIVE_CALL: - *_exclusive = true; - if (len != 0) - return -EINVAL; - break; - default: - return -EINVAL; + spin_lock_bh(&call->lock); + if (call->conn) + conn = rxrpc_get_connection_maybe(call->conn); + spin_unlock_bh(&call->lock); + if (!conn) + return -ECONNRESET; + + pkt = kzalloc(sizeof(*pkt), GFP_KERNEL); + if (!pkt) { + rxrpc_put_connection(conn); + return -ENOMEM; + } + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + pkt->whdr.epoch = htonl(conn->proto.epoch); + pkt->whdr.cid = htonl(call->cid); + pkt->whdr.callNumber = htonl(call->call_id); + pkt->whdr.seq = 0; + pkt->whdr.type = type; + pkt->whdr.flags = conn->out_clientflag; + pkt->whdr.userStatus = 0; + pkt->whdr.securityIndex = call->security_ix; + pkt->whdr._rsvd = 0; + pkt->whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = pkt; + iov[0].iov_len = sizeof(pkt->whdr); + len = sizeof(pkt->whdr); + + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + spin_lock_bh(&call->lock); + if (!call->ackr_reason) { + spin_unlock_bh(&call->lock); + ret = 0; + goto out; + } + ping = (call->ackr_reason == RXRPC_ACK_PING); + n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top); + call->ackr_reason = 0; + + spin_unlock_bh(&call->lock); + + + pkt->whdr.flags |= RXRPC_SLOW_START_OK; + + iov[0].iov_len += sizeof(pkt->ack) + n; + iov[1].iov_base = &pkt->ackinfo; + iov[1].iov_len = sizeof(pkt->ackinfo); + len += sizeof(pkt->ack) + n + sizeof(pkt->ackinfo); + ioc = 2; + break; + + case RXRPC_PACKET_TYPE_ABORT: + abort_code = call->abort_code; + pkt->abort_code = htonl(abort_code); + iov[0].iov_len += sizeof(pkt->abort_code); + len += sizeof(pkt->abort_code); + ioc = 1; + break; + + default: + BUG(); + ret = -ENOANO; + goto out; + } + + serial = atomic_inc_return(&conn->serial); + pkt->whdr.serial = htonl(serial); + switch (type) { + case RXRPC_PACKET_TYPE_ACK: + trace_rxrpc_tx_ack(call, serial, + ntohl(pkt->ack.firstPacket), + ntohl(pkt->ack.serial), + pkt->ack.reason, pkt->ack.nAcks); + break; + } + + if (ping) { + call->ackr_ping = serial; + smp_wmb(); + /* We need to stick a time in before we send the packet in case + * the reply gets back before kernel_sendmsg() completes - but + * asking UDP to send the packet can take a relatively long + * time, so we update the time after, on the assumption that + * the packet transmission is more likely to happen towards the + * end of the kernel_sendmsg() call. + */ + call->ackr_ping_time = ktime_get_real(); + set_bit(RXRPC_CALL_PINGING, &call->flags); + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_ping, serial); + } + ret = kernel_sendmsg(conn->params.local->socket, + &msg, iov, ioc, len); + if (ping) + call->ackr_ping_time = ktime_get_real(); + + if (type == RXRPC_PACKET_TYPE_ACK && + call->state < RXRPC_CALL_COMPLETE) { + if (ret < 0) { + clear_bit(RXRPC_CALL_PINGING, &call->flags); + rxrpc_propose_ACK(call, pkt->ack.reason, + ntohs(pkt->ack.maxSkew), + ntohl(pkt->ack.serial), + true, true, + rxrpc_propose_ack_retry_tx); + } else { + spin_lock_bh(&call->lock); + if (after(hard_ack, call->ackr_consumed)) + call->ackr_consumed = hard_ack; + if (after(top, call->ackr_seen)) + call->ackr_seen = top; + spin_unlock_bh(&call->lock); } } - if (!got_user_ID) - return -EINVAL; - _leave(" = 0"); - return 0; -} - -/* - * abort a call, sending an ABORT packet to the peer - */ -static void rxrpc_send_abort(struct rxrpc_call *call, u32 abort_code) -{ - write_lock_bh(&call->state_lock); - - if (call->state <= RXRPC_CALL_COMPLETE) { - call->state = RXRPC_CALL_LOCALLY_ABORTED; - call->local_abort = abort_code; - set_bit(RXRPC_CALL_EV_ABORT, &call->events); - del_timer_sync(&call->resend_timer); - del_timer_sync(&call->ack_timer); - clear_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events); - clear_bit(RXRPC_CALL_EV_ACK, &call->events); - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - rxrpc_queue_call(call); - } - - write_unlock_bh(&call->state_lock); -} - -/* - * Create a new client call for sendmsg(). - */ -static struct rxrpc_call * -rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, - unsigned long user_call_ID, bool exclusive) -{ - struct rxrpc_conn_parameters cp; - struct rxrpc_call *call; - struct key *key; - - DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); - - _enter(""); - - if (!msg->msg_name) - return ERR_PTR(-EDESTADDRREQ); - - key = rx->key; - if (key && !rx->key->payload.data[0]) - key = NULL; - - memset(&cp, 0, sizeof(cp)); - cp.local = rx->local; - cp.key = rx->key; - cp.security_level = rx->min_sec_level; - cp.exclusive = rx->exclusive | exclusive; - cp.service_id = srx->srx_service; - call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); - - _leave(" = %p\n", call); - return call; -} - -/* - * send a message forming part of a client call through an RxRPC socket - * - caller holds the socket locked - * - the socket may be either a client socket or a server socket - */ -int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) -{ - enum rxrpc_command cmd; - struct rxrpc_call *call; - unsigned long user_call_ID = 0; - bool exclusive = false; - u32 abort_code = 0; - int ret; - - _enter(""); - - ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, - &exclusive); - if (ret < 0) - return ret; - - if (cmd == RXRPC_CMD_ACCEPT) { - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) - return -EINVAL; - call = rxrpc_accept_call(rx, user_call_ID); - if (IS_ERR(call)) - return PTR_ERR(call); - rxrpc_put_call(call); - return 0; - } - - call = rxrpc_find_call_by_user_ID(rx, user_call_ID); - if (!call) { - if (cmd != RXRPC_CMD_SEND_DATA) - return -EBADSLT; - call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, - exclusive); - if (IS_ERR(call)) - return PTR_ERR(call); - } - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - /* it's too late for this call */ - ret = -ECONNRESET; - } else if (cmd == RXRPC_CMD_SEND_ABORT) { - rxrpc_send_abort(call, abort_code); - ret = 0; - } else if (cmd != RXRPC_CMD_SEND_DATA) { - ret = -EINVAL; - } else if (!call->in_clientflag && - call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { - /* request phase complete for this client call */ - ret = -EPROTO; - } else if (call->in_clientflag && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - /* Reply phase not begun or not complete for service call. */ - ret = -EPROTO; - } else { - ret = rxrpc_send_data(rx, call, msg, len); - } - - rxrpc_put_call(call); - _leave(" = %d", ret); +out: + rxrpc_put_connection(conn); + kfree(pkt); return ret; } -/** - * rxrpc_kernel_send_data - Allow a kernel service to send data on a call - * @call: The call to send data through - * @msg: The data to send - * @len: The amount of data to send - * - * Allow a kernel service to send data on a call. The call must be in an state - * appropriate to sending data. No control data should be supplied in @msg, - * nor should an address be supplied. MSG_MORE should be flagged if there's - * more data to come, otherwise this data will end the transmission phase. - */ -int rxrpc_kernel_send_data(struct rxrpc_call *call, struct msghdr *msg, - size_t len) -{ - int ret; - - _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); - - ASSERTCMP(msg->msg_name, ==, NULL); - ASSERTCMP(msg->msg_control, ==, NULL); - - lock_sock(&call->socket->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -ESHUTDOWN; /* it's too late for this call */ - } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && - call->state != RXRPC_CALL_SERVER_ACK_REQUEST && - call->state != RXRPC_CALL_SERVER_SEND_REPLY) { - ret = -EPROTO; /* request phase complete for this client call */ - } else { - ret = rxrpc_send_data(call->socket, call, msg, len); - } - - release_sock(&call->socket->sk); - _leave(" = %d", ret); - return ret; -} - -EXPORT_SYMBOL(rxrpc_kernel_send_data); - -/** - * rxrpc_kernel_abort_call - Allow a kernel service to abort a call - * @call: The call to be aborted - * @abort_code: The abort code to stick into the ABORT packet - * - * Allow a kernel service to abort a call, if it's still in an abortable state. - */ -void rxrpc_kernel_abort_call(struct rxrpc_call *call, u32 abort_code) -{ - _enter("{%d},%d", call->debug_id, abort_code); - - lock_sock(&call->socket->sk); - - _debug("CALL %d USR %lx ST %d on CONN %p", - call->debug_id, call->user_call_ID, call->state, call->conn); - - if (call->state < RXRPC_CALL_COMPLETE) - rxrpc_send_abort(call, abort_code); - - release_sock(&call->socket->sk); - _leave(""); -} - -EXPORT_SYMBOL(rxrpc_kernel_abort_call); - /* * send a packet through the transport endpoint */ -int rxrpc_send_data_packet(struct rxrpc_connection *conn, struct sk_buff *skb) +int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool retrans) { - struct kvec iov[1]; + struct rxrpc_connection *conn = call->conn; + struct rxrpc_wire_header whdr; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct msghdr msg; + struct kvec iov[2]; + rxrpc_serial_t serial; + size_t len; + bool lost = false; int ret, opt; _enter(",{%d}", skb->len); - iov[0].iov_base = skb->head; - iov[0].iov_len = skb->len; + /* Each transmission of a Tx packet needs a new serial number */ + serial = atomic_inc_return(&conn->serial); - msg.msg_name = &conn->params.peer->srx.transport; - msg.msg_namelen = conn->params.peer->srx.transport_len; + whdr.epoch = htonl(conn->proto.epoch); + whdr.cid = htonl(call->cid); + whdr.callNumber = htonl(call->call_id); + whdr.seq = htonl(sp->hdr.seq); + whdr.serial = htonl(serial); + whdr.type = RXRPC_PACKET_TYPE_DATA; + whdr.flags = sp->hdr.flags; + whdr.userStatus = 0; + whdr.securityIndex = call->security_ix; + whdr._rsvd = htons(sp->hdr._rsvd); + whdr.serviceId = htons(call->service_id); + + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = skb->head; + iov[1].iov_len = skb->len; + len = iov[0].iov_len + iov[1].iov_len; + + msg.msg_name = &call->peer->srx.transport; + msg.msg_namelen = call->peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; + /* If our RTT cache needs working on, request an ACK. Also request + * ACKs if a DATA packet appears to have been lost. + */ + if (retrans || + call->cong_mode == RXRPC_CALL_SLOW_START || + (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + whdr.flags |= RXRPC_REQUEST_ACK; + + if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) { + static int lose; + if ((lose++ & 7) == 7) { + ret = 0; + lost = true; + goto done; + } + } + + _proto("Tx DATA %%%u { #%u }", serial, sp->hdr.seq); + /* send the packet with the don't fragment bit set if we currently * think it's small enough */ - if (skb->len - sizeof(struct rxrpc_wire_header) < conn->params.peer->maxdata) { - down_read(&conn->params.local->defrag_sem); - /* send the packet by UDP - * - returns -EMSGSIZE if UDP would have to fragment the packet - * to go out of the interface - * - in which case, we'll have processed the ICMP error - * message and update the peer record - */ - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + if (iov[1].iov_len >= call->peer->maxdata) + goto send_fragmentable; - up_read(&conn->params.local->defrag_sem); - if (ret == -EMSGSIZE) - goto send_fragmentable; + down_read(&conn->params.local->defrag_sem); + /* send the packet by UDP + * - returns -EMSGSIZE if UDP would have to fragment the packet + * to go out of the interface + * - in which case, we'll have processed the ICMP error + * message and update the peer record + */ + ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 2, len); - _leave(" = %d [%u]", ret, conn->params.peer->maxdata); - return ret; + up_read(&conn->params.local->defrag_sem); + if (ret == -EMSGSIZE) + goto send_fragmentable; + +done: + trace_rxrpc_tx_data(call, sp->hdr.seq, serial, whdr.flags, + retrans, lost); + if (ret >= 0) { + ktime_t now = ktime_get_real(); + skb->tstamp = now; + smp_wmb(); + sp->hdr.serial = serial; + if (whdr.flags & RXRPC_REQUEST_ACK) { + call->peer->rtt_last_req = now; + trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); + } } + _leave(" = %d [%u]", ret, call->peer->maxdata); + return ret; send_fragmentable: /* attempt to send this message with fragmentation enabled */ @@ -358,8 +348,8 @@ send_fragmentable: SOL_IP, IP_MTU_DISCOVER, (char *)&opt, sizeof(opt)); if (ret == 0) { - ret = kernel_sendmsg(conn->params.local->socket, &msg, iov, 1, - iov[0].iov_len); + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 2, len); opt = IP_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, SOL_IP, @@ -367,355 +357,82 @@ send_fragmentable: (char *)&opt, sizeof(opt)); } break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + opt = IPV6_PMTUDISC_DONT; + ret = kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + if (ret == 0) { + ret = kernel_sendmsg(conn->params.local->socket, &msg, + iov, 1, iov[0].iov_len); + + opt = IPV6_PMTUDISC_DO; + kernel_setsockopt(conn->params.local->socket, + SOL_IPV6, IPV6_MTU_DISCOVER, + (char *)&opt, sizeof(opt)); + } + break; +#endif } up_write(&conn->params.local->defrag_sem); - _leave(" = %d [frag %u]", ret, conn->params.peer->maxdata); - return ret; + goto done; } /* - * wait for space to appear in the transmit/ACK window - * - caller holds the socket locked + * reject packets through the local endpoint */ -static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, - struct rxrpc_call *call, - long *timeo) +void rxrpc_reject_packets(struct rxrpc_local *local) { - DECLARE_WAITQUEUE(myself, current); - int ret; + struct sockaddr_rxrpc srx; + struct rxrpc_skb_priv *sp; + struct rxrpc_wire_header whdr; + struct sk_buff *skb; + struct msghdr msg; + struct kvec iov[2]; + size_t size; + __be32 code; - _enter(",{%d},%ld", - CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz), - *timeo); + _enter("%d", local->debug_id); - add_wait_queue(&call->tx_waitq, &myself); + iov[0].iov_base = &whdr; + iov[0].iov_len = sizeof(whdr); + iov[1].iov_base = &code; + iov[1].iov_len = sizeof(code); + size = sizeof(whdr) + sizeof(code); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - ret = 0; - if (CIRC_SPACE(call->acks_head, ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 0) - break; - if (signal_pending(current)) { - ret = sock_intr_errno(*timeo); - break; + msg.msg_name = &srx.transport; + msg.msg_control = NULL; + msg.msg_controllen = 0; + msg.msg_flags = 0; + + memset(&whdr, 0, sizeof(whdr)); + whdr.type = RXRPC_PACKET_TYPE_ABORT; + + while ((skb = skb_dequeue(&local->reject_queue))) { + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + sp = rxrpc_skb(skb); + + if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + msg.msg_namelen = srx.transport_len; + + code = htonl(skb->priority); + + whdr.epoch = htonl(sp->hdr.epoch); + whdr.cid = htonl(sp->hdr.cid); + whdr.callNumber = htonl(sp->hdr.callNumber); + whdr.serviceId = htons(sp->hdr.serviceId); + whdr.flags = sp->hdr.flags; + whdr.flags ^= RXRPC_CLIENT_INITIATED; + whdr.flags &= RXRPC_CLIENT_INITIATED; + + kernel_sendmsg(local->socket, &msg, iov, 2, size); } - release_sock(&rx->sk); - *timeo = schedule_timeout(*timeo); - lock_sock(&rx->sk); - } - - remove_wait_queue(&call->tx_waitq, &myself); - set_current_state(TASK_RUNNING); - _leave(" = %d", ret); - return ret; -} - -/* - * attempt to schedule an instant Tx resend - */ -static inline void rxrpc_instant_resend(struct rxrpc_call *call) -{ - read_lock_bh(&call->state_lock); - if (try_to_del_timer_sync(&call->resend_timer) >= 0) { - clear_bit(RXRPC_CALL_RUN_RTIMER, &call->flags); - if (call->state < RXRPC_CALL_COMPLETE && - !test_and_set_bit(RXRPC_CALL_EV_RESEND_TIMER, &call->events)) - rxrpc_queue_call(call); - } - read_unlock_bh(&call->state_lock); -} - -/* - * queue a packet for transmission, set the resend timer and attempt - * to send the packet immediately - */ -static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, - bool last) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - int ret; - - _net("queue skb %p [%d]", skb, call->acks_head); - - ASSERT(call->acks_window != NULL); - call->acks_window[call->acks_head] = (unsigned long) skb; - smp_wmb(); - call->acks_head = (call->acks_head + 1) & (call->acks_winsz - 1); - - if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { - _debug("________awaiting reply/ACK__________"); - write_lock_bh(&call->state_lock); - switch (call->state) { - case RXRPC_CALL_CLIENT_SEND_REQUEST: - call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; - break; - case RXRPC_CALL_SERVER_ACK_REQUEST: - call->state = RXRPC_CALL_SERVER_SEND_REPLY; - if (!last) - break; - case RXRPC_CALL_SERVER_SEND_REPLY: - call->state = RXRPC_CALL_SERVER_AWAIT_ACK; - break; - default: - break; - } - write_unlock_bh(&call->state_lock); - } - - _proto("Tx DATA %%%u { #%u }", sp->hdr.serial, sp->hdr.seq); - - sp->need_resend = false; - sp->resend_at = jiffies + rxrpc_resend_timeout; - if (!test_and_set_bit(RXRPC_CALL_RUN_RTIMER, &call->flags)) { - _debug("run timer"); - call->resend_timer.expires = sp->resend_at; - add_timer(&call->resend_timer); - } - - /* attempt to cancel the rx-ACK timer, deferring reply transmission if - * we're ACK'ing the request phase of an incoming call */ - ret = -EAGAIN; - if (try_to_del_timer_sync(&call->ack_timer) >= 0) { - /* the packet may be freed by rxrpc_process_call() before this - * returns */ - ret = rxrpc_send_data_packet(call->conn, skb); - _net("sent skb %p", skb); - } else { - _debug("failed to delete ACK timer"); - } - - if (ret < 0) { - _debug("need instant resend %d", ret); - sp->need_resend = true; - rxrpc_instant_resend(call); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); } _leave(""); } - -/* - * Convert a host-endian header into a network-endian header. - */ -static void rxrpc_insert_header(struct sk_buff *skb) -{ - struct rxrpc_wire_header whdr; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - whdr.epoch = htonl(sp->hdr.epoch); - whdr.cid = htonl(sp->hdr.cid); - whdr.callNumber = htonl(sp->hdr.callNumber); - whdr.seq = htonl(sp->hdr.seq); - whdr.serial = htonl(sp->hdr.serial); - whdr.type = sp->hdr.type; - whdr.flags = sp->hdr.flags; - whdr.userStatus = sp->hdr.userStatus; - whdr.securityIndex = sp->hdr.securityIndex; - whdr._rsvd = htons(sp->hdr._rsvd); - whdr.serviceId = htons(sp->hdr.serviceId); - - memcpy(skb->head, &whdr, sizeof(whdr)); -} - -/* - * send data through a socket - * - must be called in process context - * - caller holds the socket locked - */ -static int rxrpc_send_data(struct rxrpc_sock *rx, - struct rxrpc_call *call, - struct msghdr *msg, size_t len) -{ - struct rxrpc_skb_priv *sp; - struct sk_buff *skb; - struct sock *sk = &rx->sk; - long timeo; - bool more; - int ret, copied; - - timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); - - /* this should be in poll */ - sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) - return -EPIPE; - - more = msg->msg_flags & MSG_MORE; - - skb = call->tx_pending; - call->tx_pending = NULL; - - copied = 0; - do { - if (!skb) { - size_t size, chunk, max, space; - - _debug("alloc"); - - if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) <= 0) { - ret = -EAGAIN; - if (msg->msg_flags & MSG_DONTWAIT) - goto maybe_error; - ret = rxrpc_wait_for_tx_window(rx, call, - &timeo); - if (ret < 0) - goto maybe_error; - } - - max = call->conn->params.peer->maxdata; - max -= call->conn->security_size; - max &= ~(call->conn->size_align - 1UL); - - chunk = max; - if (chunk > msg_data_left(msg) && !more) - chunk = msg_data_left(msg); - - space = chunk + call->conn->size_align; - space &= ~(call->conn->size_align - 1UL); - - size = space + call->conn->header_size; - - _debug("SIZE: %zu/%zu/%zu", chunk, space, size); - - /* create a buffer that we can retain until it's ACK'd */ - skb = sock_alloc_send_skb( - sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); - if (!skb) - goto maybe_error; - - rxrpc_new_skb(skb); - - _debug("ALLOC SEND %p", skb); - - ASSERTCMP(skb->mark, ==, 0); - - _debug("HS: %u", call->conn->header_size); - skb_reserve(skb, call->conn->header_size); - skb->len += call->conn->header_size; - - sp = rxrpc_skb(skb); - sp->remain = chunk; - if (sp->remain > skb_tailroom(skb)) - sp->remain = skb_tailroom(skb); - - _net("skb: hr %d, tr %d, hl %d, rm %d", - skb_headroom(skb), - skb_tailroom(skb), - skb_headlen(skb), - sp->remain); - - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - - _debug("append"); - sp = rxrpc_skb(skb); - - /* append next segment of data to the current buffer */ - if (msg_data_left(msg) > 0) { - int copy = skb_tailroom(skb); - ASSERTCMP(copy, >, 0); - if (copy > msg_data_left(msg)) - copy = msg_data_left(msg); - if (copy > sp->remain) - copy = sp->remain; - - _debug("add"); - ret = skb_add_data(skb, &msg->msg_iter, copy); - _debug("added"); - if (ret < 0) - goto efault; - sp->remain -= copy; - skb->mark += copy; - copied += copy; - } - - /* check for the far side aborting the call or a network error - * occurring */ - if (call->state > RXRPC_CALL_COMPLETE) - goto call_aborted; - - /* add the packet to the send queue if it's now full */ - if (sp->remain <= 0 || - (msg_data_left(msg) == 0 && !more)) { - struct rxrpc_connection *conn = call->conn; - uint32_t seq; - size_t pad; - - /* pad out if we're using security */ - if (conn->security_ix) { - pad = conn->security_size + skb->mark; - pad = conn->size_align - pad; - pad &= conn->size_align - 1; - _debug("pad %zu", pad); - if (pad) - memset(skb_put(skb, pad), 0, pad); - } - - seq = atomic_inc_return(&call->sequence); - - sp->hdr.epoch = conn->proto.epoch; - sp->hdr.cid = call->cid; - sp->hdr.callNumber = call->call_id; - sp->hdr.seq = seq; - sp->hdr.serial = atomic_inc_return(&conn->serial); - sp->hdr.type = RXRPC_PACKET_TYPE_DATA; - sp->hdr.userStatus = 0; - sp->hdr.securityIndex = conn->security_ix; - sp->hdr._rsvd = 0; - sp->hdr.serviceId = call->service_id; - - sp->hdr.flags = conn->out_clientflag; - if (msg_data_left(msg) == 0 && !more) - sp->hdr.flags |= RXRPC_LAST_PACKET; - else if (CIRC_SPACE(call->acks_head, - ACCESS_ONCE(call->acks_tail), - call->acks_winsz) > 1) - sp->hdr.flags |= RXRPC_MORE_PACKETS; - if (more && seq & 1) - sp->hdr.flags |= RXRPC_REQUEST_ACK; - - ret = conn->security->secure_packet( - call, skb, skb->mark, - skb->head + sizeof(struct rxrpc_wire_header)); - if (ret < 0) - goto out; - - rxrpc_insert_header(skb); - rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); - skb = NULL; - } - } while (msg_data_left(msg) > 0); - -success: - ret = copied; -out: - call->tx_pending = skb; - _leave(" = %d", ret); - return ret; - -call_aborted: - rxrpc_free_skb(skb); - if (call->state == RXRPC_CALL_NETWORK_ERROR) - ret = call->error_report < RXRPC_LOCAL_ERROR_OFFSET ? - call->error_report : - call->error_report - RXRPC_LOCAL_ERROR_OFFSET; - else - ret = -ECONNABORTED; - _leave(" = %d", ret); - return ret; - -maybe_error: - if (copied) - goto success; - goto out; - -efault: - ret = -EFAULT; - goto out; -} diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 8940674b5e08..bf13b8470c9a 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -66,6 +66,32 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, } break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + srx.transport.sin6.sin6_port = serr->port; + srx.transport_len = sizeof(struct sockaddr_in6); + switch (serr->ee.ee_origin) { + case SO_EE_ORIGIN_ICMP6: + _net("Rx ICMP6"); + memcpy(&srx.transport.sin6.sin6_addr, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in6_addr)); + break; + case SO_EE_ORIGIN_ICMP: + _net("Rx ICMP on v6 sock"); + memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, + skb_network_header(skb) + serr->addr_offset, + sizeof(struct in_addr)); + break; + default: + memcpy(&srx.transport.sin6.sin6_addr, + &ipv6_hdr(skb)->saddr, + sizeof(struct in6_addr)); + break; + } + break; +#endif + default: BUG(); } @@ -129,22 +155,21 @@ void rxrpc_error_report(struct sock *sk) _leave("UDP socket errqueue empty"); return; } + rxrpc_new_skb(skb, rxrpc_skb_rx_received); serr = SKB_EXT_ERR(skb); if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { _leave("UDP empty message"); - kfree_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); return; } - rxrpc_new_skb(skb); - rcu_read_lock(); peer = rxrpc_lookup_peer_icmp_rcu(local, skb); if (peer && !rxrpc_get_peer_maybe(peer)) peer = NULL; if (!peer) { rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); _leave(" [no peer]"); return; } @@ -154,7 +179,7 @@ void rxrpc_error_report(struct sock *sk) serr->ee.ee_code == ICMP_FRAG_NEEDED)) { rxrpc_adjust_mtu(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); rxrpc_put_peer(peer); _leave(" [MTU update]"); return; @@ -162,7 +187,7 @@ void rxrpc_error_report(struct sock *sk) rxrpc_store_error(peer, serr); rcu_read_unlock(); - rxrpc_free_skb(skb); + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); /* The ref we obtained is passed off to the work item */ rxrpc_queue_work(&peer->error_distributor); @@ -248,13 +273,20 @@ void rxrpc_peer_error_distributor(struct work_struct *work) struct rxrpc_peer *peer = container_of(work, struct rxrpc_peer, error_distributor); struct rxrpc_call *call; - int error_report; + enum rxrpc_call_completion compl; + int error; _enter(""); - error_report = READ_ONCE(peer->error_report); + error = READ_ONCE(peer->error_report); + if (error < RXRPC_LOCAL_ERROR_OFFSET) { + compl = RXRPC_CALL_NETWORK_ERROR; + } else { + compl = RXRPC_CALL_LOCAL_ERROR; + error -= RXRPC_LOCAL_ERROR_OFFSET; + } - _debug("ISSUE ERROR %d", error_report); + _debug("ISSUE ERROR %s %d", rxrpc_call_completions[compl], error); spin_lock_bh(&peer->lock); @@ -262,16 +294,10 @@ void rxrpc_peer_error_distributor(struct work_struct *work) call = hlist_entry(peer->error_targets.first, struct rxrpc_call, error_link); hlist_del_init(&call->error_link); + rxrpc_see_call(call); - write_lock(&call->state_lock); - if (call->state != RXRPC_CALL_COMPLETE && - call->state < RXRPC_CALL_NETWORK_ERROR) { - call->error_report = error_report; - call->state = RXRPC_CALL_NETWORK_ERROR; - set_bit(RXRPC_CALL_EV_RCVD_ERROR, &call->events); - rxrpc_queue_call(call); - } - write_unlock(&call->state_lock); + if (rxrpc_set_call_completion(call, compl, 0, error)) + rxrpc_notify_socket(call); } spin_unlock_bh(&peer->lock); @@ -279,3 +305,44 @@ void rxrpc_peer_error_distributor(struct work_struct *work) rxrpc_put_peer(peer); _leave(""); } + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt; + u64 sum = peer->rtt_sum, avg; + u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; + + rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); + if (rtt < 0) + return; + + /* Replace the oldest datum in the RTT buffer */ + sum -= peer->rtt_cache[cursor]; + sum += rtt; + peer->rtt_cache[cursor] = rtt; + peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); + peer->rtt_sum = sum; + if (usage < RXRPC_RTT_CACHE_SIZE) { + usage++; + peer->rtt_usage = usage; + } + + /* Now recalculate the average */ + if (usage == RXRPC_RTT_CACHE_SIZE) { + avg = sum / RXRPC_RTT_CACHE_SIZE; + } else { + avg = sum; + do_div(avg, usage); + } + + peer->rtt = avg; + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, + usage, avg); +} diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 538e9831c699..941b724d523b 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -16,12 +16,14 @@ #include #include #include +#include #include #include #include #include #include #include +#include #include "ar-internal.h" static DEFINE_HASHTABLE(rxrpc_peer_hash, 10); @@ -50,6 +52,13 @@ static unsigned long rxrpc_peer_hash_key(struct rxrpc_local *local, size = sizeof(srx->transport.sin.sin_addr); p = (u16 *)&srx->transport.sin.sin_addr; break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + hash_key += (u16 __force)srx->transport.sin.sin_port; + size = sizeof(srx->transport.sin6.sin6_addr); + p = (u16 *)&srx->transport.sin6.sin6_addr; + break; +#endif default: WARN(1, "AF_RXRPC: Unsupported transport address family\n"); return 0; @@ -93,6 +102,14 @@ static long rxrpc_peer_cmp_key(const struct rxrpc_peer *peer, memcmp(&peer->srx.transport.sin.sin_addr, &srx->transport.sin.sin_addr, sizeof(struct in_addr)); +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + return ((u16 __force)peer->srx.transport.sin6.sin6_port - + (u16 __force)srx->transport.sin6.sin6_port) ?: + memcmp(&peer->srx.transport.sin6.sin6_addr, + &srx->transport.sin6.sin6_addr, + sizeof(struct in6_addr)); +#endif default: BUG(); } @@ -130,17 +147,7 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); if (peer) { - switch (srx->transport.family) { - case AF_INET: - _net("PEER %d {%d,%u,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - peer->srx.transport.family, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); - break; - } - + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); } return peer; @@ -152,22 +159,53 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local, */ static void rxrpc_assess_MTU_size(struct rxrpc_peer *peer) { + struct dst_entry *dst; struct rtable *rt; - struct flowi4 fl4; + struct flowi fl; + struct flowi4 *fl4 = &fl.u.ip4; +#ifdef CONFIG_AF_RXRPC_IPV6 + struct flowi6 *fl6 = &fl.u.ip6; +#endif peer->if_mtu = 1500; - rt = ip_route_output_ports(&init_net, &fl4, NULL, - peer->srx.transport.sin.sin_addr.s_addr, 0, - htons(7000), htons(7001), - IPPROTO_UDP, 0, 0); - if (IS_ERR(rt)) { - _leave(" [route err %ld]", PTR_ERR(rt)); - return; + memset(&fl, 0, sizeof(fl)); + switch (peer->srx.transport.family) { + case AF_INET: + rt = ip_route_output_ports( + &init_net, fl4, NULL, + peer->srx.transport.sin.sin_addr.s_addr, 0, + htons(7000), htons(7001), IPPROTO_UDP, 0, 0); + if (IS_ERR(rt)) { + _leave(" [route err %ld]", PTR_ERR(rt)); + return; + } + dst = &rt->dst; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + fl6->flowi6_iif = LOOPBACK_IFINDEX; + fl6->flowi6_scope = RT_SCOPE_UNIVERSE; + fl6->flowi6_proto = IPPROTO_UDP; + memcpy(&fl6->daddr, &peer->srx.transport.sin6.sin6_addr, + sizeof(struct in6_addr)); + fl6->fl6_dport = htons(7001); + fl6->fl6_sport = htons(7000); + dst = ip6_route_output(&init_net, NULL, fl6); + if (IS_ERR(dst)) { + _leave(" [route err %ld]", PTR_ERR(dst)); + return; + } + break; +#endif + + default: + BUG(); } - peer->if_mtu = dst_mtu(&rt->dst); - dst_release(&rt->dst); + peer->if_mtu = dst_mtu(dst); + dst_release(dst); _leave(" [if_mtu %u]", peer->if_mtu); } @@ -198,6 +236,41 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) return peer; } +/* + * Initialise peer record. + */ +static void rxrpc_init_peer(struct rxrpc_peer *peer, unsigned long hash_key) +{ + peer->hash_key = hash_key; + rxrpc_assess_MTU_size(peer); + peer->mtu = peer->if_mtu; + peer->rtt_last_req = ktime_get_real(); + + switch (peer->srx.transport.family) { + case AF_INET: + peer->hdrsize = sizeof(struct iphdr); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + peer->hdrsize = sizeof(struct ipv6hdr); + break; +#endif + default: + BUG(); + } + + switch (peer->srx.transport_type) { + case SOCK_DGRAM: + peer->hdrsize += sizeof(struct udphdr); + break; + default: + BUG(); + } + + peer->hdrsize += sizeof(struct rxrpc_wire_header); + peer->maxdata = peer->mtu - peer->hdrsize; +} + /* * Set up a new peer. */ @@ -212,34 +285,43 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local, peer = rxrpc_alloc_peer(local, gfp); if (peer) { - peer->hash_key = hash_key; memcpy(&peer->srx, srx, sizeof(*srx)); - - rxrpc_assess_MTU_size(peer); - peer->mtu = peer->if_mtu; - - if (srx->transport.family == AF_INET) { - peer->hdrsize = sizeof(struct iphdr); - switch (srx->transport_type) { - case SOCK_DGRAM: - peer->hdrsize += sizeof(struct udphdr); - break; - default: - BUG(); - break; - } - } else { - BUG(); - } - - peer->hdrsize += sizeof(struct rxrpc_wire_header); - peer->maxdata = peer->mtu - peer->hdrsize; + rxrpc_init_peer(peer, hash_key); } _leave(" = %p", peer); return peer; } +/* + * Set up a new incoming peer. The address is prestored in the preallocated + * peer. + */ +struct rxrpc_peer *rxrpc_lookup_incoming_peer(struct rxrpc_local *local, + struct rxrpc_peer *prealloc) +{ + struct rxrpc_peer *peer; + unsigned long hash_key; + + hash_key = rxrpc_peer_hash_key(local, &prealloc->srx); + prealloc->local = local; + rxrpc_init_peer(prealloc, hash_key); + + spin_lock(&rxrpc_peer_hash_lock); + + /* Need to check that we aren't racing with someone else */ + peer = __rxrpc_lookup_peer_rcu(local, &prealloc->srx, hash_key); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + peer = prealloc; + hash_add_rcu(rxrpc_peer_hash, &peer->hash_link, hash_key); + } + + spin_unlock(&rxrpc_peer_hash_lock); + return peer; +} + /* * obtain a remote transport endpoint for the specified address */ @@ -249,11 +331,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, struct rxrpc_peer *peer, *candidate; unsigned long hash_key = rxrpc_peer_hash_key(local, srx); - _enter("{%d,%d,%pI4+%hu}", - srx->transport_type, - srx->transport_len, - &srx->transport.sin.sin_addr, - ntohs(srx->transport.sin.sin_port)); + _enter("{%pISp}", &srx->transport); /* search the peer list first */ rcu_read_lock(); @@ -272,7 +350,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, return NULL; } - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); /* Need to check that we aren't racing with someone else */ peer = __rxrpc_lookup_peer_rcu(local, srx, hash_key); @@ -282,7 +360,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, hash_add_rcu(rxrpc_peer_hash, &candidate->hash_link, hash_key); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); if (peer) kfree(candidate); @@ -290,11 +368,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local, peer = candidate; } - _net("PEER %d {%d,%pI4+%hu}", - peer->debug_id, - peer->srx.transport_type, - &peer->srx.transport.sin.sin_addr, - ntohs(peer->srx.transport.sin.sin_port)); + _net("PEER %d {%pISp}", peer->debug_id, &peer->srx.transport); _leave(" = %p {u=%d}", peer, atomic_read(&peer->usage)); return peer; @@ -307,9 +381,24 @@ void __rxrpc_put_peer(struct rxrpc_peer *peer) { ASSERT(hlist_empty(&peer->error_targets)); - spin_lock(&rxrpc_peer_hash_lock); + spin_lock_bh(&rxrpc_peer_hash_lock); hash_del_rcu(&peer->hash_link); - spin_unlock(&rxrpc_peer_hash_lock); + spin_unlock_bh(&rxrpc_peer_hash_lock); kfree_rcu(peer, rcu); } + +/** + * rxrpc_kernel_get_peer - Get the peer address of a call + * @sock: The socket on which the call is in progress. + * @call: The call to query + * @_srx: Where to place the result + * + * Get the address of the remote peer in a call. + */ +void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *_srx) +{ + *_srx = call->peer->srx; +} +EXPORT_SYMBOL(rxrpc_kernel_get_peer); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index ced5f07444e5..65cd980767fa 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -17,12 +17,12 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { [RXRPC_CONN_UNUSED] = "Unused ", [RXRPC_CONN_CLIENT] = "Client ", + [RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc", [RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ", [RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ", [RXRPC_CONN_SERVICE] = "SvSecure", [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort", [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort", - [RXRPC_CONN_NETWORK_ERROR] = "NetError", }; /* @@ -30,6 +30,7 @@ static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = { */ static void *rxrpc_call_seq_start(struct seq_file *seq, loff_t *_pos) { + rcu_read_lock(); read_lock(&rxrpc_call_lock); return seq_list_start_head(&rxrpc_calls, *_pos); } @@ -42,17 +43,21 @@ static void *rxrpc_call_seq_next(struct seq_file *seq, void *v, loff_t *pos) static void rxrpc_call_seq_stop(struct seq_file *seq, void *v) { read_unlock(&rxrpc_call_lock); + rcu_read_unlock(); } static int rxrpc_call_seq_show(struct seq_file *seq, void *v) { - struct rxrpc_connection *conn; + struct rxrpc_local *local; + struct rxrpc_sock *rx; + struct rxrpc_peer *peer; struct rxrpc_call *call; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; if (v == &rxrpc_calls) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID CallID End Use State Abort " " UserID\n"); return 0; @@ -60,30 +65,35 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call = list_entry(v, struct rxrpc_call, link); - sprintf(lbuff, "%pI4:%u", - &call->local->srx.transport.sin.sin_addr, - ntohs(call->local->srx.transport.sin.sin_port)); + rx = rcu_dereference(call->socket); + if (rx) { + local = READ_ONCE(rx->local); + if (local) + sprintf(lbuff, "%pISpc", &local->srx.transport); + else + strcpy(lbuff, "no_local"); + } else { + strcpy(lbuff, "no_socket"); + } - conn = call->conn; - if (conn) - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + peer = call->peer; + if (peer) + sprintf(rbuff, "%pISpc", &peer->srx.transport); else strcpy(rbuff, "no_connection"); seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %lx\n", lbuff, rbuff, call->service_id, call->cid, call->call_id, - call->in_clientflag ? "Svc" : "Clt", + rxrpc_is_service_call(call) ? "Svc" : "Clt", atomic_read(&call->usage), rxrpc_call_states[call->state], - call->remote_abort ?: call->local_abort, + call->abort_code, call->user_call_ID); return 0; @@ -115,13 +125,13 @@ const struct file_operations rxrpc_call_seq_fops = { static void *rxrpc_connection_seq_start(struct seq_file *seq, loff_t *_pos) { read_lock(&rxrpc_connection_lock); - return seq_list_start_head(&rxrpc_connections, *_pos); + return seq_list_start_head(&rxrpc_connection_proc_list, *_pos); } static void *rxrpc_connection_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - return seq_list_next(v, &rxrpc_connections, pos); + return seq_list_next(v, &rxrpc_connection_proc_list, pos); } static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) @@ -132,29 +142,31 @@ static void rxrpc_connection_seq_stop(struct seq_file *seq, void *v) static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) { struct rxrpc_connection *conn; - char lbuff[4 + 4 + 4 + 4 + 5 + 1], rbuff[4 + 4 + 4 + 4 + 5 + 1]; + char lbuff[50], rbuff[50]; - if (v == &rxrpc_connections) { + if (v == &rxrpc_connection_proc_list) { seq_puts(seq, - "Proto Local Remote " + "Proto Local " + " Remote " " SvID ConnID End Use State Key " " Serial ISerial\n" ); return 0; } - conn = list_entry(v, struct rxrpc_connection, link); + conn = list_entry(v, struct rxrpc_connection, proc_link); + if (conn->state == RXRPC_CONN_SERVICE_PREALLOC) { + strcpy(lbuff, "no_local"); + strcpy(rbuff, "no_connection"); + goto print; + } - sprintf(lbuff, "%pI4:%u", - &conn->params.local->srx.transport.sin.sin_addr, - ntohs(conn->params.local->srx.transport.sin.sin_port)); - - sprintf(rbuff, "%pI4:%u", - &conn->params.peer->srx.transport.sin.sin_addr, - ntohs(conn->params.peer->srx.transport.sin.sin_port)); + sprintf(lbuff, "%pISpc", &conn->params.local->srx.transport); + sprintf(rbuff, "%pISpc", &conn->params.peer->srx.transport); +print: seq_printf(seq, - "UDP %-22.22s %-22.22s %4x %08x %s %3u" + "UDP %-47.47s %-47.47s %4x %08x %s %3u" " %s %08x %08x %08x\n", lbuff, rbuff, @@ -165,7 +177,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v) rxrpc_conn_states[conn->state], key_serial(conn->params.key), atomic_read(&conn->serial), - atomic_read(&conn->hi_serial)); + conn->hi_serial); return 0; } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 9ed66d533002..f05ea0a88076 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -19,399 +19,645 @@ #include "ar-internal.h" /* - * removal a call's user ID from the socket tree to make the user ID available - * again and so that it won't be seen again in association with that call + * Post a call for attention by the socket or kernel service. Further + * notifications are suppressed by putting recvmsg_link on a dummy queue. */ -void rxrpc_remove_user_ID(struct rxrpc_sock *rx, struct rxrpc_call *call) +void rxrpc_notify_socket(struct rxrpc_call *call) { - _debug("RELEASE CALL %d", call->debug_id); + struct rxrpc_sock *rx; + struct sock *sk; - if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { - write_lock_bh(&rx->call_lock); - rb_erase(&call->sock_node, &call->socket->calls); - clear_bit(RXRPC_CALL_HAS_USERID, &call->flags); - write_unlock_bh(&rx->call_lock); + _enter("%d", call->debug_id); + + if (!list_empty(&call->recvmsg_link)) + return; + + rcu_read_lock(); + + rx = rcu_dereference(call->socket); + sk = &rx->sk; + if (rx && sk->sk_state < RXRPC_CLOSE) { + if (call->notify_rx) { + call->notify_rx(sk, call, call->user_call_ID); + } else { + write_lock_bh(&rx->recvmsg_lock); + if (list_empty(&call->recvmsg_link)) { + rxrpc_get_call(call, rxrpc_call_got); + list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); + } + write_unlock_bh(&rx->recvmsg_lock); + + if (!sock_flag(sk, SOCK_DEAD)) { + _debug("call %ps", sk->sk_data_ready); + sk->sk_data_ready(sk); + } + } } - read_lock_bh(&call->state_lock); - if (!test_bit(RXRPC_CALL_RELEASED, &call->flags) && - !test_and_set_bit(RXRPC_CALL_EV_RELEASE, &call->events)) - rxrpc_queue_call(call); - read_unlock_bh(&call->state_lock); + rcu_read_unlock(); + _leave(""); } /* - * receive a message from an RxRPC socket + * Pass a call terminating message to userspace. + */ +static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) +{ + u32 tmp = 0; + int ret; + + switch (call->completion) { + case RXRPC_CALL_SUCCEEDED: + ret = 0; + if (rxrpc_is_service_call(call)) + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &tmp); + break; + case RXRPC_CALL_REMOTELY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_LOCALLY_ABORTED: + tmp = call->abort_code; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &tmp); + break; + case RXRPC_CALL_NETWORK_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &tmp); + break; + case RXRPC_CALL_LOCAL_ERROR: + tmp = call->error; + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp); + break; + default: + pr_err("Invalid terminal call state %u\n", call->state); + BUG(); + break; + } + + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_terminal, call->rx_hard_ack, + call->rx_pkt_offset, call->rx_pkt_len, ret); + return ret; +} + +/* + * Pass back notification of a new call. The call is added to the + * to-be-accepted list. This means that the next call to be accepted might not + * be the last call seen awaiting acceptance, but unless we leave this on the + * front of the queue and block all other messages until someone gives us a + * user_ID for it, there's not a lot we can do. + */ +static int rxrpc_recvmsg_new_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, int flags) +{ + int tmp = 0, ret; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &tmp); + + if (ret == 0 && !(flags & MSG_PEEK)) { + _debug("to be accepted"); + write_lock_bh(&rx->recvmsg_lock); + list_del_init(&call->recvmsg_link); + write_unlock_bh(&rx->recvmsg_lock); + + rxrpc_get_call(call, rxrpc_call_got); + write_lock(&rx->call_lock); + list_add_tail(&call->accept_link, &rx->to_be_accepted); + write_unlock(&rx->call_lock); + } + + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_to_be_accepted, 1, 0, 0, ret); + return ret; +} + +/* + * End the packet reception phase. + */ +static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) +{ + _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]); + + trace_rxrpc_receive(call, rxrpc_receive_end, 0, call->rx_top); + ASSERTCMP(call->rx_hard_ack, ==, call->rx_top); + + if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY) { + rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, serial, true, false, + rxrpc_propose_ack_terminal_ack); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } + + write_lock_bh(&call->state_lock); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + __rxrpc_call_completed(call); + break; + + case RXRPC_CALL_SERVER_RECV_REQUEST: + call->tx_phase = true; + call->state = RXRPC_CALL_SERVER_ACK_REQUEST; + break; + default: + break; + } + + write_unlock_bh(&call->state_lock); +} + +/* + * Discard a packet we've used up and advance the Rx window by one. + */ +static void rxrpc_rotate_rx_window(struct rxrpc_call *call) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_serial_t serial; + rxrpc_seq_t hard_ack, top; + u8 flags; + int ix; + + _enter("%d", call->debug_id); + + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + ASSERT(before(hard_ack, top)); + + hard_ack++; + ix = hard_ack & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + rxrpc_see_skb(skb, rxrpc_skb_rx_rotated); + sp = rxrpc_skb(skb); + flags = sp->hdr.flags; + serial = sp->hdr.serial; + if (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) + serial += (call->rxtx_annotations[ix] & RXRPC_RX_ANNO_JUMBO) - 1; + + call->rxtx_buffer[ix] = NULL; + call->rxtx_annotations[ix] = 0; + /* Barrier against rxrpc_input_data(). */ + smp_store_release(&call->rx_hard_ack, hard_ack); + + rxrpc_free_skb(skb, rxrpc_skb_rx_freed); + + _debug("%u,%u,%02x", hard_ack, top, flags); + trace_rxrpc_receive(call, rxrpc_receive_rotate, serial, hard_ack); + if (flags & RXRPC_LAST_PACKET) { + rxrpc_end_rx_phase(call, serial); + } else { + /* Check to see if there's an ACK that needs sending. */ + if (after_eq(hard_ack, call->ackr_consumed + 2) || + after_eq(top, call->ackr_seen + 2) || + (hard_ack == top && after(hard_ack, call->ackr_consumed))) + rxrpc_propose_ACK(call, RXRPC_ACK_DELAY, 0, serial, + true, false, + rxrpc_propose_ack_rotate_rx); + if (call->ackr_reason) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + } +} + +/* + * Decrypt and verify a (sub)packet. The packet's length may be changed due to + * padding, but if this is the case, the packet length will be resident in the + * socket buffer. Note that we can't modify the master skb info as the skb may + * be the home to multiple subpackets. + */ +static int rxrpc_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + u8 annotation, + unsigned int offset, unsigned int len) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + u16 cksum = sp->hdr.cksum; + + _enter(""); + + /* For all but the head jumbo subpacket, the security checksum is in a + * jumbo header immediately prior to the data. + */ + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 1) { + __be16 tmp; + if (skb_copy_bits(skb, offset - 2, &tmp, 2) < 0) + BUG(); + cksum = ntohs(tmp); + seq += (annotation & RXRPC_RX_ANNO_JUMBO) - 1; + } + + return call->conn->security->verify_packet(call, skb, offset, len, + seq, cksum); +} + +/* + * Locate the data within a packet. This is complicated by: + * + * (1) An skb may contain a jumbo packet - so we have to find the appropriate + * subpacket. + * + * (2) The (sub)packets may be encrypted and, if so, the encrypted portion + * contains an extra header which includes the true length of the data, + * excluding any encrypted padding. + */ +static int rxrpc_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + u8 *_annotation, + unsigned int *_offset, unsigned int *_len) +{ + unsigned int offset = sizeof(struct rxrpc_wire_header); + unsigned int len = *_len; + int ret; + u8 annotation = *_annotation; + + /* Locate the subpacket */ + len = skb->len - offset; + if ((annotation & RXRPC_RX_ANNO_JUMBO) > 0) { + offset += (((annotation & RXRPC_RX_ANNO_JUMBO) - 1) * + RXRPC_JUMBO_SUBPKTLEN); + len = (annotation & RXRPC_RX_ANNO_JLAST) ? + skb->len - offset : RXRPC_JUMBO_SUBPKTLEN; + } + + if (!(annotation & RXRPC_RX_ANNO_VERIFIED)) { + ret = rxrpc_verify_packet(call, skb, annotation, offset, len); + if (ret < 0) + return ret; + *_annotation |= RXRPC_RX_ANNO_VERIFIED; + } + + *_offset = offset; + *_len = len; + call->conn->security->locate_data(call, skb, _offset, _len); + return 0; +} + +/* + * Deliver messages to a call. This keeps processing packets until the buffer + * is filled and we find either more DATA (returns 0) or the end of the DATA + * (returns 1). If more packets are required, it returns -EAGAIN. + */ +static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, struct iov_iter *iter, + size_t len, int flags, size_t *_offset) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + rxrpc_seq_t hard_ack, top, seq; + size_t remain; + bool last; + unsigned int rx_pkt_offset, rx_pkt_len; + int ix, copy, ret = -EAGAIN, ret2; + + rx_pkt_offset = call->rx_pkt_offset; + rx_pkt_len = call->rx_pkt_len; + + if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) { + seq = call->rx_hard_ack; + ret = 1; + goto done; + } + + /* Barriers against rxrpc_input_data(). */ + hard_ack = call->rx_hard_ack; + top = smp_load_acquire(&call->rx_top); + for (seq = hard_ack + 1; before_eq(seq, top); seq++) { + ix = seq & RXRPC_RXTX_BUFF_MASK; + skb = call->rxtx_buffer[ix]; + if (!skb) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_hole, seq, + rx_pkt_offset, rx_pkt_len, 0); + break; + } + smp_rmb(); + rxrpc_see_skb(skb, rxrpc_skb_rx_seen); + sp = rxrpc_skb(skb); + + if (!(flags & MSG_PEEK)) + trace_rxrpc_receive(call, rxrpc_receive_front, + sp->hdr.serial, seq); + + if (msg) + sock_recv_timestamp(msg, sock->sk, skb); + + if (rx_pkt_offset == 0) { + ret2 = rxrpc_locate_data(call, skb, + &call->rxtx_annotations[ix], + &rx_pkt_offset, &rx_pkt_len); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_next, seq, + rx_pkt_offset, rx_pkt_len, ret2); + if (ret2 < 0) { + ret = ret2; + goto out; + } + } else { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_cont, seq, + rx_pkt_offset, rx_pkt_len, 0); + } + + /* We have to handle short, empty and used-up DATA packets. */ + remain = len - *_offset; + copy = rx_pkt_len; + if (copy > remain) + copy = remain; + if (copy > 0) { + ret2 = skb_copy_datagram_iter(skb, rx_pkt_offset, iter, + copy); + if (ret2 < 0) { + ret = ret2; + goto out; + } + + /* handle piecemeal consumption of data packets */ + rx_pkt_offset += copy; + rx_pkt_len -= copy; + *_offset += copy; + } + + if (rx_pkt_len > 0) { + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_full, seq, + rx_pkt_offset, rx_pkt_len, 0); + ASSERTCMP(*_offset, ==, len); + ret = 0; + break; + } + + /* The whole packet has been transferred. */ + last = sp->hdr.flags & RXRPC_LAST_PACKET; + if (!(flags & MSG_PEEK)) + rxrpc_rotate_rx_window(call); + rx_pkt_offset = 0; + rx_pkt_len = 0; + + if (last) { + ASSERTCMP(seq, ==, READ_ONCE(call->rx_top)); + ret = 1; + goto out; + } + } + +out: + if (!(flags & MSG_PEEK)) { + call->rx_pkt_offset = rx_pkt_offset; + call->rx_pkt_len = rx_pkt_len; + } +done: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_data_return, seq, + rx_pkt_offset, rx_pkt_len, ret); + return ret; +} + +/* + * Receive a message from an RxRPC socket * - we need to be careful about two or more threads calling recvmsg * simultaneously */ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, int flags) { - struct rxrpc_skb_priv *sp; - struct rxrpc_call *call = NULL, *continue_call = NULL; + struct rxrpc_call *call; struct rxrpc_sock *rx = rxrpc_sk(sock->sk); - struct sk_buff *skb; + struct list_head *l; + size_t copied = 0; long timeo; - int copy, ret, ullen, offset, copied = 0; - u32 abort_code; + int ret; DEFINE_WAIT(wait); - _enter(",,,%zu,%d", len, flags); + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0, 0, 0, 0); if (flags & (MSG_OOB | MSG_TRUNC)) return -EOPNOTSUPP; - ullen = msg->msg_flags & MSG_CMSG_COMPAT ? 4 : sizeof(unsigned long); - timeo = sock_rcvtimeo(&rx->sk, flags & MSG_DONTWAIT); - msg->msg_flags |= MSG_MORE; +try_again: lock_sock(&rx->sk); - for (;;) { - /* return immediately if a client socket has no outstanding - * calls */ - if (RB_EMPTY_ROOT(&rx->calls)) { - if (copied) - goto out; - if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) { - release_sock(&rx->sk); - if (continue_call) - rxrpc_put_call(continue_call); - return -ENODATA; - } - } - - /* get the next message on the Rx queue */ - skb = skb_peek(&rx->sk.sk_receive_queue); - if (!skb) { - /* nothing remains on the queue */ - if (copied && - (flags & MSG_PEEK || timeo == 0)) - goto out; - - /* wait for a message to turn up */ - release_sock(&rx->sk); - prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, - TASK_INTERRUPTIBLE); - ret = sock_error(&rx->sk); - if (ret) - goto wait_error; - - if (skb_queue_empty(&rx->sk.sk_receive_queue)) { - if (signal_pending(current)) - goto wait_interrupted; - timeo = schedule_timeout(timeo); - } - finish_wait(sk_sleep(&rx->sk), &wait); - lock_sock(&rx->sk); - continue; - } - - peek_next_packet: - sp = rxrpc_skb(skb); - call = sp->call; - ASSERT(call != NULL); - - _debug("next pkt %s", rxrpc_pkts[sp->hdr.type]); - - /* make sure we wait for the state to be updated in this call */ - spin_lock_bh(&call->lock); - spin_unlock_bh(&call->lock); - - if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) { - _debug("packet from released call"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - continue; - } - - /* determine whether to continue last data receive */ - if (continue_call) { - _debug("maybe cont"); - if (call != continue_call || - skb->mark != RXRPC_SKB_MARK_DATA) { - release_sock(&rx->sk); - rxrpc_put_call(continue_call); - _leave(" = %d [noncont]", copied); - return copied; - } - } - - rxrpc_get_call(call); - - /* copy the peer address and timestamp */ - if (!continue_call) { - if (msg->msg_name) { - size_t len = - sizeof(call->conn->params.peer->srx); - memcpy(msg->msg_name, - &call->conn->params.peer->srx, len); - msg->msg_namelen = len; - } - sock_recv_timestamp(msg, &rx->sk, skb); - } - - /* receive the message */ - if (skb->mark != RXRPC_SKB_MARK_DATA) - goto receive_non_data_message; - - _debug("recvmsg DATA #%u { %d, %d }", - sp->hdr.seq, skb->len, sp->offset); - - if (!continue_call) { - /* only set the control data once per recvmsg() */ - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); - } - - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - call->rx_data_recv = sp->hdr.seq; - - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - offset = sp->offset; - copy = skb->len - offset; - if (copy > len - copied) - copy = len - copied; - - ret = skb_copy_datagram_msg(skb, offset, msg, copy); - - if (ret < 0) - goto copy_error; - - /* handle piecemeal consumption of data packets */ - _debug("copied %d+%d", copy, copied); - - offset += copy; - copied += copy; - - if (!(flags & MSG_PEEK)) - sp->offset = offset; - - if (sp->offset < skb->len) { - _debug("buffer full"); - ASSERTCMP(copied, ==, len); - break; - } - - /* we transferred the whole data packet */ - if (!(flags & MSG_PEEK)) - rxrpc_kernel_data_consumed(call, skb); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - _debug("last"); - if (rxrpc_conn_is_client(call->conn)) { - /* last byte of reply received */ - ret = copied; - goto terminal_message; - } - - /* last bit of request received */ - if (!(flags & MSG_PEEK)) { - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != - skb) - BUG(); - rxrpc_free_skb(skb); - } - msg->msg_flags &= ~MSG_MORE; - break; - } - - /* move on to the next data message */ - _debug("next"); - if (!continue_call) - continue_call = sp->call; - else - rxrpc_put_call(call); - call = NULL; - - if (flags & MSG_PEEK) { - _debug("peek next"); - skb = skb->next; - if (skb == (struct sk_buff *) &rx->sk.sk_receive_queue) - break; - goto peek_next_packet; - } - - _debug("eat packet"); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); + /* Return immediately if a client socket has no outstanding calls */ + if (RB_EMPTY_ROOT(&rx->calls) && + list_empty(&rx->recvmsg_q) && + rx->sk.sk_state != RXRPC_SERVER_LISTENING) { + release_sock(&rx->sk); + return -ENODATA; } - /* end of non-terminal data packet reception for the moment */ - _debug("end rcv data"); -out: - release_sock(&rx->sk); - if (call) - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d [data]", copied); - return copied; - - /* handle non-DATA messages such as aborts, incoming connections and - * final ACKs */ -receive_non_data_message: - _debug("non-data"); - - if (skb->mark == RXRPC_SKB_MARK_NEW_CALL) { - _debug("RECV NEW CALL"); - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NEW_CALL, 0, &abort_code); - if (ret < 0) - goto copy_error; - if (!(flags & MSG_PEEK)) { - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); + if (list_empty(&rx->recvmsg_q)) { + ret = -EWOULDBLOCK; + if (timeo == 0) { + call = NULL; + goto error_no_call; } - goto out; + + release_sock(&rx->sk); + + /* Wait for something to happen */ + prepare_to_wait_exclusive(sk_sleep(&rx->sk), &wait, + TASK_INTERRUPTIBLE); + ret = sock_error(&rx->sk); + if (ret) + goto wait_error; + + if (list_empty(&rx->recvmsg_q)) { + if (signal_pending(current)) + goto wait_interrupted; + trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, + 0, 0, 0, 0); + timeo = schedule_timeout(timeo); + } + finish_wait(sk_sleep(&rx->sk), &wait); + goto try_again; } - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, - ullen, &call->user_call_ID); - if (ret < 0) - goto copy_error; - ASSERT(test_bit(RXRPC_CALL_HAS_USERID, &call->flags)); + /* Find the next call and dequeue it if we're not just peeking. If we + * do dequeue it, that comes with a ref that we will need to release. + */ + write_lock_bh(&rx->recvmsg_lock); + l = rx->recvmsg_q.next; + call = list_entry(l, struct rxrpc_call, recvmsg_link); + if (!(flags & MSG_PEEK)) + list_del_init(&call->recvmsg_link); + else + rxrpc_get_call(call, rxrpc_call_got); + write_unlock_bh(&rx->recvmsg_lock); - switch (skb->mark) { - case RXRPC_SKB_MARK_DATA: + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0, 0, 0, 0); + + if (test_bit(RXRPC_CALL_RELEASED, &call->flags)) BUG(); - case RXRPC_SKB_MARK_FINAL_ACK: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ACK, 0, &abort_code); + + if (test_bit(RXRPC_CALL_HAS_USERID, &call->flags)) { + if (flags & MSG_CMSG_COMPAT) { + unsigned int id32 = call->user_call_ID; + + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned int), &id32); + } else { + ret = put_cmsg(msg, SOL_RXRPC, RXRPC_USER_CALL_ID, + sizeof(unsigned long), + &call->user_call_ID); + } + if (ret < 0) + goto error; + } + + if (msg->msg_name) { + size_t len = sizeof(call->conn->params.peer->srx); + memcpy(msg->msg_name, &call->conn->params.peer->srx, len); + msg->msg_namelen = len; + } + + switch (call->state) { + case RXRPC_CALL_SERVER_ACCEPTING: + ret = rxrpc_recvmsg_new_call(rx, call, msg, flags); break; - case RXRPC_SKB_MARK_BUSY: - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_BUSY, 0, &abort_code); - break; - case RXRPC_SKB_MARK_REMOTE_ABORT: - abort_code = call->remote_abort; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ABORT: - abort_code = call->local_abort; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_ABORT, 4, &abort_code); - break; - case RXRPC_SKB_MARK_NET_ERROR: - _debug("RECV NET ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_NET_ERROR, 4, &abort_code); - break; - case RXRPC_SKB_MARK_LOCAL_ERROR: - _debug("RECV LOCAL ERROR %d", sp->error); - abort_code = sp->error; - ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, - &abort_code); + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len, + flags, &copied); + if (ret == -EAGAIN) + ret = 0; + + if (after(call->rx_top, call->rx_hard_ack) && + call->rxtx_buffer[(call->rx_hard_ack + 1) & RXRPC_RXTX_BUFF_MASK]) + rxrpc_notify_socket(call); break; default: - pr_err("Unknown packet mark %u\n", skb->mark); - BUG(); + ret = 0; break; } if (ret < 0) - goto copy_error; + goto error; -terminal_message: - _debug("terminal"); - msg->msg_flags &= ~MSG_MORE; - msg->msg_flags |= MSG_EOR; - - if (!(flags & MSG_PEEK)) { - _net("free terminal skb %p", skb); - if (skb_dequeue(&rx->sk.sk_receive_queue) != skb) - BUG(); - rxrpc_free_skb(skb); - rxrpc_remove_user_ID(rx, call); + if (call->state == RXRPC_CALL_COMPLETE) { + ret = rxrpc_recvmsg_term(call, msg); + if (ret < 0) + goto error; + if (!(flags & MSG_PEEK)) + rxrpc_release_call(rx, call); + msg->msg_flags |= MSG_EOR; + ret = 1; } - release_sock(&rx->sk); - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d", ret); - return ret; + if (ret == 0) + msg->msg_flags |= MSG_MORE; + else + msg->msg_flags &= ~MSG_MORE; + ret = copied; -copy_error: - _debug("copy error"); +error: + rxrpc_put_call(call, rxrpc_call_put); +error_no_call: release_sock(&rx->sk); - rxrpc_put_call(call); - if (continue_call) - rxrpc_put_call(continue_call); - _leave(" = %d", ret); + trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, 0, 0, 0, ret); return ret; wait_interrupted: ret = sock_intr_errno(timeo); wait_error: finish_wait(sk_sleep(&rx->sk), &wait); - if (continue_call) - rxrpc_put_call(continue_call); - if (copied) - copied = ret; - _leave(" = %d [waitfail %d]", copied, ret); - return copied; - + call = NULL; + goto error_no_call; } /** - * rxrpc_kernel_is_data_last - Determine if data message is last one - * @skb: Message holding data + * rxrpc_kernel_recv_data - Allow a kernel service to receive data/info + * @sock: The socket that the call exists on + * @call: The call to send data through + * @buf: The buffer to receive into + * @size: The size of the buffer, including data already read + * @_offset: The running offset into the buffer. + * @want_more: True if more data is expected to be read + * @_abort: Where the abort code is stored if -ECONNABORTED is returned * - * Determine if data message is last one for the parent call. - */ -bool rxrpc_kernel_is_data_last(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - ASSERTCMP(skb->mark, ==, RXRPC_SKB_MARK_DATA); - - return sp->hdr.flags & RXRPC_LAST_PACKET; -} - -EXPORT_SYMBOL(rxrpc_kernel_is_data_last); - -/** - * rxrpc_kernel_get_abort_code - Get the abort code from an RxRPC abort message - * @skb: Message indicating an abort + * Allow a kernel service to receive data and pick up information about the + * state of a call. Returns 0 if got what was asked for and there's more + * available, 1 if we got what was asked for and we're at the end of the data + * and -EAGAIN if we need more data. * - * Get the abort code from an RxRPC abort message. + * Note that we may return -EAGAIN to drain empty packets at the end of the + * data, even if we've already copied over the requested data. + * + * This function adds the amount it transfers to *_offset, so this should be + * precleared as appropriate. Note that the amount remaining in the buffer is + * taken to be size - *_offset. + * + * *_abort should also be initialised to 0. */ -u32 rxrpc_kernel_get_abort_code(struct sk_buff *skb) +int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, + void *buf, size_t size, size_t *_offset, + bool want_more, u32 *_abort) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + struct iov_iter iter; + struct kvec iov; + int ret; + + _enter("{%d,%s},%zu/%zu,%d", + call->debug_id, rxrpc_call_states[call->state], + *_offset, size, want_more); + + ASSERTCMP(*_offset, <=, size); + ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_ACCEPTING); + + iov.iov_base = buf + *_offset; + iov.iov_len = size - *_offset; + iov_iter_kvec(&iter, ITER_KVEC | READ, &iov, 1, size - *_offset); + + lock_sock(sock->sk); + + switch (call->state) { + case RXRPC_CALL_CLIENT_RECV_REPLY: + case RXRPC_CALL_SERVER_RECV_REQUEST: + case RXRPC_CALL_SERVER_ACK_REQUEST: + ret = rxrpc_recvmsg_data(sock, call, NULL, &iter, size, 0, + _offset); + if (ret < 0) + goto out; + + /* We can only reach here with a partially full buffer if we + * have reached the end of the data. We must otherwise have a + * full buffer or have been given -EAGAIN. + */ + if (ret == 1) { + if (*_offset < size) + goto short_data; + if (!want_more) + goto read_phase_complete; + ret = 0; + goto out; + } + + if (!want_more) + goto excess_data; + goto out; + + case RXRPC_CALL_COMPLETE: + goto call_complete; - switch (skb->mark) { - case RXRPC_SKB_MARK_REMOTE_ABORT: - return sp->call->remote_abort; - case RXRPC_SKB_MARK_LOCAL_ABORT: - return sp->call->local_abort; default: - BUG(); + ret = -EINPROGRESS; + goto out; } + +read_phase_complete: + ret = 1; +out: + release_sock(sock->sk); + _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); + return ret; + +short_data: + ret = -EBADMSG; + goto out; +excess_data: + ret = -EMSGSIZE; + goto out; +call_complete: + *_abort = call->abort_code; + ret = call->error; + if (call->completion == RXRPC_CALL_SUCCEEDED) { + ret = 1; + if (size > 0) + ret = -ECONNRESET; + } + goto out; } - -EXPORT_SYMBOL(rxrpc_kernel_get_abort_code); - -/** - * rxrpc_kernel_get_error - Get the error number from an RxRPC error message - * @skb: Message indicating an error - * - * Get the error number from an RxRPC error message. - */ -int rxrpc_kernel_get_error_number(struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - return sp->error; -} - -EXPORT_SYMBOL(rxrpc_kernel_get_error_number); +EXPORT_SYMBOL(rxrpc_kernel_recv_data); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 63afa9e9cc08..627abed5f999 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -80,12 +80,10 @@ static int rxkad_init_connection_security(struct rxrpc_connection *conn) case RXRPC_SECURITY_AUTH: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level1_hdr); - conn->header_size += sizeof(struct rxkad_level1_hdr); break; case RXRPC_SECURITY_ENCRYPT: conn->size_align = 8; conn->security_size = sizeof(struct rxkad_level2_hdr); - conn->header_size += sizeof(struct rxkad_level2_hdr); break; default: ret = -EKEYREJECTED; @@ -161,7 +159,7 @@ static int rxkad_secure_packet_auth(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; data_size |= (u32)check << 16; hdr.data_size = htonl(data_size); @@ -205,7 +203,7 @@ static int rxkad_secure_packet_encrypt(const struct rxrpc_call *call, _enter(""); - check = sp->hdr.seq ^ sp->hdr.callNumber; + check = sp->hdr.seq ^ call->call_id; rxkhdr.data_size = htonl(data_size | (u32)check << 16); rxkhdr.checksum = 0; @@ -275,9 +273,9 @@ static int rxkad_secure_packet(struct rxrpc_call *call, memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* calculate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); x |= sp->hdr.seq & 0x3fffffff; - call->crypto_buf[0] = htonl(sp->hdr.callNumber); + call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); sg_init_one(&sg, call->crypto_buf, 8); @@ -316,12 +314,11 @@ static int rxkad_secure_packet(struct rxrpc_call *call, /* * decrypt partial encryption on a packet (level 1 security) */ -static int rxkad_verify_packet_auth(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq) { struct rxkad_level1_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist sg[16]; @@ -332,15 +329,20 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, _enter(""); - sp = rxrpc_skb(skb); + if (len < 8) { + rxrpc_abort_call("V1H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0 || nsg > 16) goto nomem; sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, 8); + skb_to_sgvec(skb, sg, offset, 8); /* start the decryption afresh */ memset(&iv, 0, sizeof(iv)); @@ -351,35 +353,35 @@ static int rxkad_verify_packet_auth(const struct rxrpc_call *call, crypto_skcipher_decrypt(req); skcipher_request_zero(req); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV1", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V1C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) - skb->len = data_size; + if (data_size > len) { + rxrpc_abort_call("V1L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -391,13 +393,12 @@ nomem: /* * wholly decrypt a packet (level 2 security) */ -static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq) { const struct rxrpc_key_token *token; struct rxkad_level2_hdr sechdr; - struct rxrpc_skb_priv *sp; SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); struct rxrpc_crypt iv; struct scatterlist _sg[4], *sg; @@ -408,9 +409,14 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, _enter(",{%d}", skb->len); - sp = rxrpc_skb(skb); + if (len < 8) { + rxrpc_abort_call("V2H", call, seq, RXKADSEALEDINCON, EPROTO); + goto protocol_error; + } - /* we want to decrypt the skbuff in-place */ + /* Decrypt the skbuff in-place. TODO: We really want to decrypt + * directly into the target buffer. + */ nsg = skb_cow_data(skb, 0, &trailer); if (nsg < 0) goto nomem; @@ -423,7 +429,7 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, } sg_init_table(sg, nsg); - skb_to_sgvec(skb, sg, 0, skb->len); + skb_to_sgvec(skb, sg, offset, len); /* decrypt from the session key */ token = call->conn->params.key->payload.data[0]; @@ -431,41 +437,41 @@ static int rxkad_verify_packet_encrypt(const struct rxrpc_call *call, skcipher_request_set_tfm(req, call->conn->cipher); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, sg, sg, skb->len, iv.x); + skcipher_request_set_crypt(req, sg, sg, len, iv.x); crypto_skcipher_decrypt(req); skcipher_request_zero(req); if (sg != _sg) kfree(sg); - /* remove the decrypted packet length */ - if (skb_copy_bits(skb, 0, &sechdr, sizeof(sechdr)) < 0) - goto datalen_error; - if (!skb_pull(skb, sizeof(sechdr))) - BUG(); + /* Extract the decrypted packet length */ + if (skb_copy_bits(skb, offset, &sechdr, sizeof(sechdr)) < 0) { + rxrpc_abort_call("XV2", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } + offset += sizeof(sechdr); + len -= sizeof(sechdr); buf = ntohl(sechdr.data_size); data_size = buf & 0xffff; check = buf >> 16; - check ^= sp->hdr.seq ^ sp->hdr.callNumber; + check ^= seq ^ call->call_id; check &= 0xffff; if (check != 0) { - *_abort_code = RXKADSEALEDINCON; + rxrpc_abort_call("V2C", call, seq, RXKADSEALEDINCON, EPROTO); goto protocol_error; } - /* shorten the packet to remove the padding */ - if (data_size > skb->len) - goto datalen_error; - else if (data_size < skb->len) - skb->len = data_size; + if (data_size > len) { + rxrpc_abort_call("V2L", call, seq, RXKADDATALEN, EPROTO); + goto protocol_error; + } _leave(" = 0 [dlen=%x]", data_size); return 0; -datalen_error: - *_abort_code = RXKADDATALEN; protocol_error: + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO"); return -EPROTO; @@ -475,40 +481,31 @@ nomem: } /* - * verify the security on a received packet + * Verify the security on a received packet or subpacket (if part of a + * jumbo packet). */ -static int rxkad_verify_packet(struct rxrpc_call *call, - struct sk_buff *skb, - u32 *_abort_code) +static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int offset, unsigned int len, + rxrpc_seq_t seq, u16 expected_cksum) { SKCIPHER_REQUEST_ON_STACK(req, call->conn->cipher); - struct rxrpc_skb_priv *sp; struct rxrpc_crypt iv; struct scatterlist sg; u16 cksum; u32 x, y; - int ret; - - sp = rxrpc_skb(skb); _enter("{%d{%x}},{#%u}", - call->debug_id, key_serial(call->conn->params.key), sp->hdr.seq); + call->debug_id, key_serial(call->conn->params.key), seq); if (!call->conn->cipher) return 0; - if (sp->hdr.securityIndex != RXRPC_SECURITY_RXKAD) { - *_abort_code = RXKADINCONSISTENCY; - _leave(" = -EPROTO [not rxkad]"); - return -EPROTO; - } - /* continue encrypting from where we left off */ memcpy(&iv, call->conn->csum_iv.x, sizeof(iv)); /* validate the security checksum */ - x = call->channel << (32 - RXRPC_CIDSHIFT); - x |= sp->hdr.seq & 0x3fffffff; + x = (call->cid & RXRPC_CHANNELMASK) << (32 - RXRPC_CIDSHIFT); + x |= seq & 0x3fffffff; call->crypto_buf[0] = htonl(call->call_id); call->crypto_buf[1] = htonl(x); @@ -524,29 +521,69 @@ static int rxkad_verify_packet(struct rxrpc_call *call, if (cksum == 0) cksum = 1; /* zero checksums are not permitted */ - if (sp->hdr.cksum != cksum) { - *_abort_code = RXKADSEALEDINCON; + if (cksum != expected_cksum) { + rxrpc_abort_call("VCK", call, seq, RXKADSEALEDINCON, EPROTO); + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); _leave(" = -EPROTO [csum failed]"); return -EPROTO; } switch (call->conn->params.security_level) { case RXRPC_SECURITY_PLAIN: - ret = 0; - break; + return 0; case RXRPC_SECURITY_AUTH: - ret = rxkad_verify_packet_auth(call, skb, _abort_code); - break; + return rxkad_verify_packet_1(call, skb, offset, len, seq); case RXRPC_SECURITY_ENCRYPT: - ret = rxkad_verify_packet_encrypt(call, skb, _abort_code); - break; + return rxkad_verify_packet_2(call, skb, offset, len, seq); default: - ret = -ENOANO; - break; + return -ENOANO; } +} - _leave(" = %d", ret); - return ret; +/* + * Locate the data contained in a packet that was partially encrypted. + */ +static void rxkad_locate_data_1(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level1_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in a packet that was completely encrypted. + */ +static void rxkad_locate_data_2(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + struct rxkad_level2_hdr sechdr; + + if (skb_copy_bits(skb, *_offset, &sechdr, sizeof(sechdr)) < 0) + BUG(); + *_offset += sizeof(sechdr); + *_len = ntohl(sechdr.data_size) & 0xffff; +} + +/* + * Locate the data contained in an already decrypted packet. + */ +static void rxkad_locate_data(struct rxrpc_call *call, struct sk_buff *skb, + unsigned int *_offset, unsigned int *_len) +{ + switch (call->conn->params.security_level) { + case RXRPC_SECURITY_AUTH: + rxkad_locate_data_1(call, skb, _offset, _len); + return; + case RXRPC_SECURITY_ENCRYPT: + rxkad_locate_data_2(call, skb, _offset, _len); + return; + default: + return; + } } /* @@ -716,7 +753,7 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, struct rxkad_challenge challenge; struct rxkad_response resp __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); u32 version, nonce, min_level, abort_code; int ret; @@ -734,8 +771,8 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn, } abort_code = RXKADPACKETSHORT; - sp = rxrpc_skb(skb); - if (skb_copy_bits(skb, 0, &challenge, sizeof(challenge)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &challenge, sizeof(challenge)) < 0) goto protocol_error; version = ntohl(challenge.version); @@ -981,7 +1018,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, { struct rxkad_response response __attribute__((aligned(8))); /* must be aligned for crypto */ - struct rxrpc_skb_priv *sp; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; time_t expiry; void *ticket; @@ -992,7 +1029,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, _enter("{%d,%x}", conn->debug_id, key_serial(conn->server_key)); abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, &response, sizeof(response)) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + &response, sizeof(response)) < 0) goto protocol_error; if (!pskb_pull(skb, sizeof(response))) BUG(); @@ -1000,7 +1038,6 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, version = ntohl(response.version); ticket_len = ntohl(response.ticket_len); kvno = ntohl(response.kvno); - sp = rxrpc_skb(skb); _proto("Rx RESPONSE %%%u { v=%u kv=%u tl=%u }", sp->hdr.serial, version, kvno, ticket_len); @@ -1022,7 +1059,8 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, return -ENOMEM; abort_code = RXKADPACKETSHORT; - if (skb_copy_bits(skb, 0, ticket, ticket_len) < 0) + if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header), + ticket, ticket_len) < 0) goto protocol_error_free; ret = rxkad_decrypt_ticket(conn, ticket, ticket_len, &session_key, @@ -1147,6 +1185,7 @@ const struct rxrpc_security rxkad = { .prime_packet_security = rxkad_prime_packet_security, .secure_packet = rxkad_secure_packet, .verify_packet = rxkad_verify_packet, + .locate_data = rxkad_locate_data, .issue_challenge = rxkad_issue_challenge, .respond_to_challenge = rxkad_respond_to_challenge, .verify_response = rxkad_verify_response, diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c index 814d285ff802..7d921e56e715 100644 --- a/net/rxrpc/security.c +++ b/net/rxrpc/security.c @@ -130,20 +130,20 @@ int rxrpc_init_server_conn_security(struct rxrpc_connection *conn) } /* find the service */ - read_lock_bh(&local->services_lock); - list_for_each_entry(rx, &local->services, listen_link) { - if (rx->srx.srx_service == conn->params.service_id) - goto found_service; - } + read_lock(&local->services_lock); + rx = rcu_dereference_protected(local->service, + lockdep_is_held(&local->services_lock)); + if (rx && rx->srx.srx_service == conn->params.service_id) + goto found_service; /* the service appears to have died */ - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOENT"); return -ENOENT; found_service: if (!rx->securities) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = -ENOKEY"); return -ENOKEY; } @@ -152,13 +152,13 @@ found_service: kref = keyring_search(make_key_ref(rx->securities, 1UL), &key_type_rxrpc_s, kdesc); if (IS_ERR(kref)) { - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); _leave(" = %ld [search]", PTR_ERR(kref)); return PTR_ERR(kref); } key = key_ref_to_ptr(kref); - read_unlock_bh(&local->services_lock); + read_unlock(&local->services_lock); conn->server_key = key; conn->security = sec; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c new file mode 100644 index 000000000000..3322543d460a --- /dev/null +++ b/net/rxrpc/sendmsg.c @@ -0,0 +1,606 @@ +/* AF_RXRPC sendmsg() implementation. + * + * Copyright (C) 2007, 2016 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include "ar-internal.h" + +enum rxrpc_command { + RXRPC_CMD_SEND_DATA, /* send data message */ + RXRPC_CMD_SEND_ABORT, /* request abort generation */ + RXRPC_CMD_ACCEPT, /* [server] accept incoming call */ + RXRPC_CMD_REJECT_BUSY, /* [server] reject a call as busy */ +}; + +/* + * wait for space to appear in the transmit/ACK window + * - caller holds the socket locked + */ +static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + DECLARE_WAITQUEUE(myself, current); + int ret; + + _enter(",{%u,%u,%u}", + call->tx_hard_ack, call->tx_top, call->tx_winsize); + + add_wait_queue(&call->waitq, &myself); + + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + ret = 0; + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + break; + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -call->error; + break; + } + if (signal_pending(current)) { + ret = sock_intr_errno(*timeo); + break; + } + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + release_sock(&rx->sk); + *timeo = schedule_timeout(*timeo); + lock_sock(&rx->sk); + } + + remove_wait_queue(&call->waitq, &myself); + set_current_state(TASK_RUNNING); + _leave(" = %d", ret); + return ret; +} + +/* + * Schedule an instant Tx resend. + */ +static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) +{ + spin_lock_bh(&call->lock); + + if (call->state < RXRPC_CALL_COMPLETE) { + call->rxtx_annotations[ix] = RXRPC_TX_ANNO_RETRANS; + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + } + + spin_unlock_bh(&call->lock); +} + +/* + * Queue a DATA packet for transmission, set the resend timeout and send the + * packet immediately + */ +static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, + bool last) +{ + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t seq = sp->hdr.seq; + int ret, ix; + u8 annotation = RXRPC_TX_ANNO_UNACK; + + _net("queue skb %p [%d]", skb, seq); + + ASSERTCMP(seq, ==, call->tx_top + 1); + + if (last) + annotation |= RXRPC_TX_ANNO_LAST; + + /* We have to set the timestamp before queueing as the retransmit + * algorithm can see the packet as soon as we queue it. + */ + skb->tstamp = ktime_get_real(); + + ix = seq & RXRPC_RXTX_BUFF_MASK; + rxrpc_get_skb(skb, rxrpc_skb_tx_got); + call->rxtx_annotations[ix] = annotation; + smp_wmb(); + call->rxtx_buffer[ix] = skb; + call->tx_top = seq; + if (last) + trace_rxrpc_transmit(call, rxrpc_transmit_queue_last); + else + trace_rxrpc_transmit(call, rxrpc_transmit_queue); + + if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) { + _debug("________awaiting reply/ACK__________"); + write_lock_bh(&call->state_lock); + switch (call->state) { + case RXRPC_CALL_CLIENT_SEND_REQUEST: + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + break; + case RXRPC_CALL_SERVER_ACK_REQUEST: + call->state = RXRPC_CALL_SERVER_SEND_REPLY; + if (!last) + break; + case RXRPC_CALL_SERVER_SEND_REPLY: + call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + break; + default: + break; + } + write_unlock_bh(&call->state_lock); + } + + if (seq == 1 && rxrpc_is_client_call(call)) + rxrpc_expose_client_call(call); + + ret = rxrpc_send_data_packet(call, skb, false); + if (ret < 0) { + _debug("need instant resend %d", ret); + rxrpc_instant_resend(call, ix); + } else { + ktime_t now = ktime_get_real(), resend_at; + + resend_at = ktime_add_ms(now, rxrpc_resend_timeout); + + if (ktime_before(resend_at, call->resend_at)) { + call->resend_at = resend_at; + rxrpc_set_timer(call, rxrpc_timer_set_for_send, now); + } + } + + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(""); +} + +/* + * send data through a socket + * - must be called in process context + * - caller holds the socket locked + */ +static int rxrpc_send_data(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + struct rxrpc_skb_priv *sp; + struct sk_buff *skb; + struct sock *sk = &rx->sk; + long timeo; + bool more; + int ret, copied; + + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); + + /* this should be in poll */ + sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); + + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + return -EPIPE; + + more = msg->msg_flags & MSG_MORE; + + skb = call->tx_pending; + call->tx_pending = NULL; + rxrpc_see_skb(skb, rxrpc_skb_tx_seen); + + copied = 0; + do { + /* Check to see if there's a ping ACK to reply to. */ + if (call->ackr_reason == RXRPC_ACK_PING_RESPONSE) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ACK); + + if (!skb) { + size_t size, chunk, max, space; + + _debug("alloc"); + + if (call->tx_top - call->tx_hard_ack >= + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) { + ret = -EAGAIN; + if (msg->msg_flags & MSG_DONTWAIT) + goto maybe_error; + ret = rxrpc_wait_for_tx_window(rx, call, + &timeo); + if (ret < 0) + goto maybe_error; + } + + max = RXRPC_JUMBO_DATALEN; + max -= call->conn->security_size; + max &= ~(call->conn->size_align - 1UL); + + chunk = max; + if (chunk > msg_data_left(msg) && !more) + chunk = msg_data_left(msg); + + space = chunk + call->conn->size_align; + space &= ~(call->conn->size_align - 1UL); + + size = space + call->conn->security_size; + + _debug("SIZE: %zu/%zu/%zu", chunk, space, size); + + /* create a buffer that we can retain until it's ACK'd */ + skb = sock_alloc_send_skb( + sk, size, msg->msg_flags & MSG_DONTWAIT, &ret); + if (!skb) + goto maybe_error; + + rxrpc_new_skb(skb, rxrpc_skb_tx_new); + + _debug("ALLOC SEND %p", skb); + + ASSERTCMP(skb->mark, ==, 0); + + _debug("HS: %u", call->conn->security_size); + skb_reserve(skb, call->conn->security_size); + skb->len += call->conn->security_size; + + sp = rxrpc_skb(skb); + sp->remain = chunk; + if (sp->remain > skb_tailroom(skb)) + sp->remain = skb_tailroom(skb); + + _net("skb: hr %d, tr %d, hl %d, rm %d", + skb_headroom(skb), + skb_tailroom(skb), + skb_headlen(skb), + sp->remain); + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + _debug("append"); + sp = rxrpc_skb(skb); + + /* append next segment of data to the current buffer */ + if (msg_data_left(msg) > 0) { + int copy = skb_tailroom(skb); + ASSERTCMP(copy, >, 0); + if (copy > msg_data_left(msg)) + copy = msg_data_left(msg); + if (copy > sp->remain) + copy = sp->remain; + + _debug("add"); + ret = skb_add_data(skb, &msg->msg_iter, copy); + _debug("added"); + if (ret < 0) + goto efault; + sp->remain -= copy; + skb->mark += copy; + copied += copy; + } + + /* check for the far side aborting the call or a network error + * occurring */ + if (call->state == RXRPC_CALL_COMPLETE) + goto call_terminated; + + /* add the packet to the send queue if it's now full */ + if (sp->remain <= 0 || + (msg_data_left(msg) == 0 && !more)) { + struct rxrpc_connection *conn = call->conn; + uint32_t seq; + size_t pad; + + /* pad out if we're using security */ + if (conn->security_ix) { + pad = conn->security_size + skb->mark; + pad = conn->size_align - pad; + pad &= conn->size_align - 1; + _debug("pad %zu", pad); + if (pad) + memset(skb_put(skb, pad), 0, pad); + } + + seq = call->tx_top + 1; + + sp->hdr.seq = seq; + sp->hdr._rsvd = 0; + sp->hdr.flags = conn->out_clientflag; + + if (msg_data_left(msg) == 0 && !more) + sp->hdr.flags |= RXRPC_LAST_PACKET; + else if (call->tx_top - call->tx_hard_ack < + call->tx_winsize) + sp->hdr.flags |= RXRPC_MORE_PACKETS; + + ret = conn->security->secure_packet( + call, skb, skb->mark, skb->head); + if (ret < 0) + goto out; + + rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); + skb = NULL; + } + } while (msg_data_left(msg) > 0); + +success: + ret = copied; +out: + call->tx_pending = skb; + _leave(" = %d", ret); + return ret; + +call_terminated: + rxrpc_free_skb(skb, rxrpc_skb_tx_freed); + _leave(" = %d", -call->error); + return -call->error; + +maybe_error: + if (copied) + goto success; + goto out; + +efault: + ret = -EFAULT; + goto out; +} + +/* + * extract control messages from the sendmsg() control buffer + */ +static int rxrpc_sendmsg_cmsg(struct msghdr *msg, + unsigned long *user_call_ID, + enum rxrpc_command *command, + u32 *abort_code, + bool *_exclusive) +{ + struct cmsghdr *cmsg; + bool got_user_ID = false; + int len; + + *command = RXRPC_CMD_SEND_DATA; + + if (msg->msg_controllen == 0) + return -EINVAL; + + for_each_cmsghdr(cmsg, msg) { + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + len = cmsg->cmsg_len - CMSG_ALIGN(sizeof(struct cmsghdr)); + _debug("CMSG %d, %d, %d", + cmsg->cmsg_level, cmsg->cmsg_type, len); + + if (cmsg->cmsg_level != SOL_RXRPC) + continue; + + switch (cmsg->cmsg_type) { + case RXRPC_USER_CALL_ID: + if (msg->msg_flags & MSG_CMSG_COMPAT) { + if (len != sizeof(u32)) + return -EINVAL; + *user_call_ID = *(u32 *) CMSG_DATA(cmsg); + } else { + if (len != sizeof(unsigned long)) + return -EINVAL; + *user_call_ID = *(unsigned long *) + CMSG_DATA(cmsg); + } + _debug("User Call ID %lx", *user_call_ID); + got_user_ID = true; + break; + + case RXRPC_ABORT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_SEND_ABORT; + if (len != sizeof(*abort_code)) + return -EINVAL; + *abort_code = *(unsigned int *) CMSG_DATA(cmsg); + _debug("Abort %x", *abort_code); + if (*abort_code == 0) + return -EINVAL; + break; + + case RXRPC_ACCEPT: + if (*command != RXRPC_CMD_SEND_DATA) + return -EINVAL; + *command = RXRPC_CMD_ACCEPT; + if (len != 0) + return -EINVAL; + break; + + case RXRPC_EXCLUSIVE_CALL: + *_exclusive = true; + if (len != 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + } + + if (!got_user_ID) + return -EINVAL; + _leave(" = 0"); + return 0; +} + +/* + * Create a new client call for sendmsg(). + */ +static struct rxrpc_call * +rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, + unsigned long user_call_ID, bool exclusive) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_call *call; + struct key *key; + + DECLARE_SOCKADDR(struct sockaddr_rxrpc *, srx, msg->msg_name); + + _enter(""); + + if (!msg->msg_name) + return ERR_PTR(-EDESTADDRREQ); + + key = rx->key; + if (key && !rx->key->payload.data[0]) + key = NULL; + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = rx->key; + cp.security_level = rx->min_sec_level; + cp.exclusive = rx->exclusive | exclusive; + cp.service_id = srx->srx_service; + call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, GFP_KERNEL); + + _leave(" = %p\n", call); + return call; +} + +/* + * send a message forming part of a client call through an RxRPC socket + * - caller holds the socket locked + * - the socket may be either a client socket or a server socket + */ +int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) +{ + enum rxrpc_command cmd; + struct rxrpc_call *call; + unsigned long user_call_ID = 0; + bool exclusive = false; + u32 abort_code = 0; + int ret; + + _enter(""); + + ret = rxrpc_sendmsg_cmsg(msg, &user_call_ID, &cmd, &abort_code, + &exclusive); + if (ret < 0) + return ret; + + if (cmd == RXRPC_CMD_ACCEPT) { + if (rx->sk.sk_state != RXRPC_SERVER_LISTENING) + return -EINVAL; + call = rxrpc_accept_call(rx, user_call_ID, NULL); + if (IS_ERR(call)) + return PTR_ERR(call); + rxrpc_put_call(call, rxrpc_call_put); + return 0; + } + + call = rxrpc_find_call_by_user_ID(rx, user_call_ID); + if (!call) { + if (cmd != RXRPC_CMD_SEND_DATA) + return -EBADSLT; + call = rxrpc_new_client_call_for_sendmsg(rx, msg, user_call_ID, + exclusive); + if (IS_ERR(call)) + return PTR_ERR(call); + } + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + /* it's too late for this call */ + ret = -ESHUTDOWN; + } else if (cmd == RXRPC_CMD_SEND_ABORT) { + ret = 0; + if (rxrpc_abort_call("CMD", call, 0, abort_code, ECONNABORTED)) + ret = rxrpc_send_call_packet(call, + RXRPC_PACKET_TYPE_ABORT); + } else if (cmd != RXRPC_CMD_SEND_DATA) { + ret = -EINVAL; + } else if (rxrpc_is_client_call(call) && + call->state != RXRPC_CALL_CLIENT_SEND_REQUEST) { + /* request phase complete for this client call */ + ret = -EPROTO; + } else if (rxrpc_is_service_call(call) && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + /* Reply phase not begun or not complete for service call. */ + ret = -EPROTO; + } else { + ret = rxrpc_send_data(rx, call, msg, len); + } + + rxrpc_put_call(call, rxrpc_call_put); + _leave(" = %d", ret); + return ret; +} + +/** + * rxrpc_kernel_send_data - Allow a kernel service to send data on a call + * @sock: The socket the call is on + * @call: The call to send data through + * @msg: The data to send + * @len: The amount of data to send + * + * Allow a kernel service to send data on a call. The call must be in an state + * appropriate to sending data. No control data should be supplied in @msg, + * nor should an address be supplied. MSG_MORE should be flagged if there's + * more data to come, otherwise this data will end the transmission phase. + */ +int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, + struct msghdr *msg, size_t len) +{ + int ret; + + _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]); + + ASSERTCMP(msg->msg_name, ==, NULL); + ASSERTCMP(msg->msg_control, ==, NULL); + + lock_sock(sock->sk); + + _debug("CALL %d USR %lx ST %d on CONN %p", + call->debug_id, call->user_call_ID, call->state, call->conn); + + if (call->state >= RXRPC_CALL_COMPLETE) { + ret = -ESHUTDOWN; /* it's too late for this call */ + } else if (call->state != RXRPC_CALL_CLIENT_SEND_REQUEST && + call->state != RXRPC_CALL_SERVER_ACK_REQUEST && + call->state != RXRPC_CALL_SERVER_SEND_REPLY) { + ret = -EPROTO; /* request phase complete for this client call */ + } else { + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + } + + release_sock(sock->sk); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_send_data); + +/** + * rxrpc_kernel_abort_call - Allow a kernel service to abort a call + * @sock: The socket the call is on + * @call: The call to be aborted + * @abort_code: The abort code to stick into the ABORT packet + * @error: Local error value + * @why: 3-char string indicating why. + * + * Allow a kernel service to abort a call, if it's still in an abortable state. + */ +void rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call, + u32 abort_code, int error, const char *why) +{ + _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why); + + lock_sock(sock->sk); + + if (rxrpc_abort_call(why, call, 0, abort_code, error)) + rxrpc_send_call_packet(call, RXRPC_PACKET_TYPE_ABORT); + + release_sock(sock->sk); + _leave(""); +} + +EXPORT_SYMBOL(rxrpc_kernel_abort_call); diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 06c51d4b622d..67b02c45271b 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -18,148 +18,82 @@ #include #include "ar-internal.h" +#define select_skb_count(op) (op >= rxrpc_skb_tx_cleaned ? &rxrpc_n_tx_skbs : &rxrpc_n_rx_skbs) + /* - * set up for the ACK at the end of the receive phase when we discard the final - * receive phase data packet - * - called with softirqs disabled + * Note the allocation or reception of a socket buffer. */ -static void rxrpc_request_final_ACK(struct rxrpc_call *call) +void rxrpc_new_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - /* the call may be aborted before we have a chance to ACK it */ - write_lock(&call->state_lock); - - switch (call->state) { - case RXRPC_CALL_CLIENT_RECV_REPLY: - call->state = RXRPC_CALL_CLIENT_FINAL_ACK; - _debug("request final ACK"); - - /* get an extra ref on the call for the final-ACK generator to - * release */ - rxrpc_get_call(call); - set_bit(RXRPC_CALL_EV_ACK_FINAL, &call->events); - if (try_to_del_timer_sync(&call->ack_timer) >= 0) - rxrpc_queue_call(call); - break; - - case RXRPC_CALL_SERVER_RECV_REQUEST: - call->state = RXRPC_CALL_SERVER_ACK_REQUEST; - default: - break; - } - - write_unlock(&call->state_lock); + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } /* - * drop the bottom ACK off of the call ACK window and advance the window + * Note the re-emergence of a socket buffer from a queue or buffer. */ -static void rxrpc_hard_ACK_data(struct rxrpc_call *call, - struct rxrpc_skb_priv *sp) +void rxrpc_see_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - int loop; - u32 seq; - - spin_lock_bh(&call->lock); - - _debug("hard ACK #%u", sp->hdr.seq); - - for (loop = 0; loop < RXRPC_ACKR_WINDOW_ASZ; loop++) { - call->ackr_window[loop] >>= 1; - call->ackr_window[loop] |= - call->ackr_window[loop + 1] << (BITS_PER_LONG - 1); + const void *here = __builtin_return_address(0); + if (skb) { + int n = atomic_read(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); } - - seq = sp->hdr.seq; - ASSERTCMP(seq, ==, call->rx_data_eaten + 1); - call->rx_data_eaten = seq; - - if (call->ackr_win_top < UINT_MAX) - call->ackr_win_top++; - - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_post, >=, call->rx_data_recv); - ASSERTIFCMP(call->state <= RXRPC_CALL_COMPLETE, - call->rx_data_recv, >=, call->rx_data_eaten); - - if (sp->hdr.flags & RXRPC_LAST_PACKET) { - rxrpc_request_final_ACK(call); - } else if (atomic_dec_and_test(&call->ackr_not_idle) && - test_and_clear_bit(RXRPC_CALL_TX_SOFT_ACK, &call->flags)) { - /* We previously soft-ACK'd some received packets that have now - * been consumed, so send a hard-ACK if no more packets are - * immediately forthcoming to allow the transmitter to free up - * its Tx bufferage. - */ - _debug("send Rx idle ACK"); - __rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, sp->hdr.serial, - false); - } - - spin_unlock_bh(&call->lock); } -/** - * rxrpc_kernel_data_consumed - Record consumption of data message - * @call: The call to which the message pertains. - * @skb: Message holding data - * - * Record the consumption of a data message and generate an ACK if appropriate. - * The call state is shifted if this was the final packet. The caller must be - * in process context with no spinlocks held. - * - * TODO: Actually generate the ACK here rather than punting this to the - * workqueue. - */ -void rxrpc_kernel_data_consumed(struct rxrpc_call *call, struct sk_buff *skb) -{ - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - - _enter("%d,%p{%u}", call->debug_id, skb, sp->hdr.seq); - - ASSERTCMP(sp->call, ==, call); - ASSERTCMP(sp->hdr.type, ==, RXRPC_PACKET_TYPE_DATA); - - /* TODO: Fix the sequence number tracking */ - ASSERTCMP(sp->hdr.seq, >=, call->rx_data_recv); - ASSERTCMP(sp->hdr.seq, <=, call->rx_data_recv + 1); - ASSERTCMP(sp->hdr.seq, >, call->rx_data_eaten); - - call->rx_data_recv = sp->hdr.seq; - rxrpc_hard_ACK_data(call, sp); -} -EXPORT_SYMBOL(rxrpc_kernel_data_consumed); - /* - * Destroy a packet that has an RxRPC control buffer + * Note the addition of a ref on a socket buffer. */ -void rxrpc_packet_destructor(struct sk_buff *skb) +void rxrpc_get_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_call *call = sp->call; + const void *here = __builtin_return_address(0); + int n = atomic_inc_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + skb_get(skb); +} - _enter("%p{%p}", skb, call); - - if (call) { - if (atomic_dec_return(&call->skb_count) < 0) - BUG(); - rxrpc_put_call(call); - sp->call = NULL; +/* + * Note the destruction of a socket buffer. + */ +void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) +{ + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); } - - if (skb->sk) - sock_rfree(skb); - _leave(""); } -/** - * rxrpc_kernel_free_skb - Free an RxRPC socket buffer - * @skb: The socket buffer to be freed - * - * Let RxRPC free its own socket buffer, permitting it to maintain debug - * accounting. +/* + * Note the injected loss of a socket buffer. */ -void rxrpc_kernel_free_skb(struct sk_buff *skb) +void rxrpc_lose_skb(struct sk_buff *skb, enum rxrpc_skb_trace op) { - rxrpc_free_skb(skb); + const void *here = __builtin_return_address(0); + if (skb) { + int n; + CHECK_SLAB_OKAY(&skb->users); + n = atomic_dec_return(select_skb_count(op)); + trace_rxrpc_skb(skb, op, atomic_read(&skb->users), n, here); + kfree_skb(skb); + } +} + +/* + * Clear a queue of socket buffers. + */ +void rxrpc_purge_queue(struct sk_buff_head *list) +{ + const void *here = __builtin_return_address(0); + struct sk_buff *skb; + while ((skb = skb_dequeue((list))) != NULL) { + int n = atomic_dec_return(select_skb_count(rxrpc_skb_rx_purged)); + trace_rxrpc_skb(skb, rxrpc_skb_rx_purged, + atomic_read(&skb->users), n, here); + kfree_skb(skb); + } } -EXPORT_SYMBOL(rxrpc_kernel_free_skb); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 03ad08774d4e..34c706d2f79c 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -20,7 +20,7 @@ static const unsigned int one = 1; static const unsigned int four = 4; static const unsigned int thirtytwo = 32; static const unsigned int n_65535 = 65535; -static const unsigned int n_max_acks = RXRPC_MAXACKS; +static const unsigned int n_max_acks = RXRPC_RXTX_BUFF_SIZE - 1; /* * RxRPC operating parameters. @@ -35,7 +35,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_requested_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&zero, }, { @@ -43,7 +43,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_soft_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -51,7 +51,7 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_idle_ack_delay, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, { @@ -59,6 +59,22 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_resend_timeout, .maxlen = sizeof(unsigned int), .mode = 0644, + .proc_handler = proc_dointvec, + .extra1 = (void *)&one, + }, + { + .procname = "idle_conn_expiry", + .data = &rxrpc_conn_idle_client_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + .extra1 = (void *)&one, + }, + { + .procname = "idle_conn_fast_expiry", + .data = &rxrpc_conn_idle_client_fast_expiry, + .maxlen = sizeof(unsigned int), + .mode = 0644, .proc_handler = proc_dointvec_ms_jiffies, .extra1 = (void *)&one, }, @@ -69,29 +85,28 @@ static struct ctl_table rxrpc_sysctl_table[] = { .data = &rxrpc_max_call_lifetime, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - .extra1 = (void *)&one, - }, - { - .procname = "dead_call_expiry", - .data = &rxrpc_dead_call_expiry, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - .extra1 = (void *)&one, - }, - - /* Values measured in seconds */ - { - .procname = "connection_expiry", - .data = &rxrpc_connection_expiry, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_dointvec, .extra1 = (void *)&one, }, /* Non-time values */ + { + .procname = "max_client_conns", + .data = &rxrpc_max_client_connections, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&rxrpc_reap_client_connections, + }, + { + .procname = "reap_client_conns", + .data = &rxrpc_reap_client_connections, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&one, + .extra2 = (void *)&rxrpc_max_client_connections, + }, { .procname = "max_backlog", .data = &rxrpc_max_backlog, diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index b88914d53ca5..ff7af71c4b49 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -30,6 +30,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; return 0; +#ifdef CONFIG_AF_RXRPC_IPV6 case ETH_P_IPV6: srx->transport_type = SOCK_DGRAM; srx->transport_len = sizeof(srx->transport.sin6); @@ -37,6 +38,7 @@ int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) srx->transport.sin6.sin6_port = udp_hdr(skb)->source; srx->transport.sin6.sin6_addr = ipv6_hdr(skb)->saddr; return 0; +#endif default: pr_warn_ratelimited("AF_RXRPC: Unknown eth protocol %u\n", diff --git a/net/sched/Kconfig b/net/sched/Kconfig index ccf931b3b94c..87956a768d1b 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -749,6 +749,17 @@ config NET_ACT_CONNMARK To compile this code as a module, choose M here: the module will be called act_connmark. +config NET_ACT_SKBMOD + tristate "skb data modification action" + depends on NET_CLS_ACT + ---help--- + Say Y here to allow modification of skb data + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_skbmod. + config NET_ACT_IFE tristate "Inter-FE action based on IETF ForCES InterFE LFB" depends on NET_CLS_ACT @@ -761,6 +772,17 @@ config NET_ACT_IFE To compile this code as a module, choose M here: the module will be called act_ife. +config NET_ACT_TUNNEL_KEY + tristate "IP tunnel metadata manipulation" + depends on NET_CLS_ACT + ---help--- + Say Y here to set/release ip tunnel metadata. + + If unsure, say N. + + To compile this code as a module, choose M here: the + module will be called act_tunnel_key. + config NET_IFE_SKBMARK tristate "Support to encoding decoding skb mark on IFE action" depends on NET_ACT_IFE @@ -771,6 +793,11 @@ config NET_IFE_SKBPRIO depends on NET_ACT_IFE ---help--- +config NET_IFE_SKBTCINDEX + tristate "Support to encoding decoding skb tcindex on IFE action" + depends on NET_ACT_IFE + ---help--- + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index ae088a5a9d95..4bdda3634e0b 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -19,9 +19,12 @@ obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_ACT_VLAN) += act_vlan.o obj-$(CONFIG_NET_ACT_BPF) += act_bpf.o obj-$(CONFIG_NET_ACT_CONNMARK) += act_connmark.o +obj-$(CONFIG_NET_ACT_SKBMOD) += act_skbmod.o obj-$(CONFIG_NET_ACT_IFE) += act_ife.o obj-$(CONFIG_NET_IFE_SKBMARK) += act_meta_mark.o obj-$(CONFIG_NET_IFE_SKBPRIO) += act_meta_skbprio.o +obj-$(CONFIG_NET_IFE_SKBTCINDEX) += act_meta_skbtcindex.o +obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index d09d0687594b..c9102172ce3b 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -592,9 +592,19 @@ err_out: return ERR_PTR(err); } -int tcf_action_init(struct net *net, struct nlattr *nla, - struct nlattr *est, char *name, int ovr, - int bind, struct list_head *actions) +static void cleanup_a(struct list_head *actions, int ovr) +{ + struct tc_action *a; + + if (!ovr) + return; + + list_for_each_entry(a, actions, list) + a->tcfa_refcnt--; +} + +int tcf_action_init(struct net *net, struct nlattr *nla, struct nlattr *est, + char *name, int ovr, int bind, struct list_head *actions) { struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; struct tc_action *act; @@ -612,8 +622,15 @@ int tcf_action_init(struct net *net, struct nlattr *nla, goto err; } act->order = i; + if (ovr) + act->tcfa_refcnt++; list_add_tail(&act->list, actions); } + + /* Remove the temp refcnt which was necessary to protect against + * destroying an existing action which was being replaced + */ + cleanup_a(actions, ovr); return 0; err: @@ -883,6 +900,8 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } act->order = i; + if (event == RTM_GETACTION) + act->tcfa_refcnt++; list_add_tail(&act->list, &actions); } @@ -923,9 +942,8 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions, return err; } -static int -tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 portid, int ovr) +static int tcf_action_add(struct net *net, struct nlattr *nla, + struct nlmsghdr *n, u32 portid, int ovr) { int ret = 0; LIST_HEAD(actions); @@ -988,8 +1006,7 @@ replay: return ret; } -static struct nlattr * -find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(const struct nlmsghdr *n) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -1016,8 +1033,7 @@ find_dump_kind(const struct nlmsghdr *n) return kind; } -static int -tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) +static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); struct nlmsghdr *nlh; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index bfa870731e74..1d3960033f61 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -39,13 +39,10 @@ static struct tc_action_ops act_bpf_ops; static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, struct tcf_result *res) { + bool at_ingress = skb_at_tc_ingress(skb); struct tcf_bpf *prog = to_bpf(act); struct bpf_prog *filter; int action, filter_res; - bool at_ingress = G_TC_AT(skb->tc_verd) & AT_INGRESS; - - if (unlikely(!skb_mac_header_was_set(skb))) - return TC_ACT_UNSPEC; tcf_lastuse_update(&prog->tcf_tm); bstats_cpu_update(this_cpu_ptr(prog->common.cpu_bstats), skb); diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index b5dbf633a863..e0defcef376d 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -116,8 +116,8 @@ static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, return (void *)(skb_network_header(skb) + ihl); } -static int tcf_csum_ipv4_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmphdr *icmph; @@ -152,8 +152,8 @@ static int tcf_csum_ipv4_igmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_icmp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct icmp6hdr *icmp6h; const struct ipv6hdr *ip6h; @@ -174,8 +174,8 @@ static int tcf_csum_ipv6_icmp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct iphdr *iph; @@ -195,8 +195,8 @@ static int tcf_csum_ipv4_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv6_tcp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl) +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl) { struct tcphdr *tcph; const struct ipv6hdr *ip6h; @@ -217,8 +217,8 @@ static int tcf_csum_ipv6_tcp(struct sk_buff *skb, return 1; } -static int tcf_csum_ipv4_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct iphdr *iph; @@ -270,8 +270,8 @@ ignore_obscure_skb: return 1; } -static int tcf_csum_ipv6_udp(struct sk_buff *skb, - unsigned int ihl, unsigned int ipl, int udplite) +static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, + unsigned int ipl, int udplite) { struct udphdr *udph; const struct ipv6hdr *ip6h; @@ -380,8 +380,8 @@ fail: return 0; } -static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, - unsigned int ixhl, unsigned int *pl) +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, unsigned int ixhl, + unsigned int *pl) { int off, len, optlen; unsigned char *xh = (void *)ip6xh; @@ -494,8 +494,8 @@ fail: return 0; } -static int tcf_csum(struct sk_buff *skb, - const struct tc_action *a, struct tcf_result *res) +static int tcf_csum(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) { struct tcf_csum *p = to_tcf_csum(a); int action; @@ -531,8 +531,8 @@ drop: return TC_ACT_SHOT; } -static int tcf_csum_dump(struct sk_buff *skb, - struct tc_action *a, int bind, int ref) +static int tcf_csum_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_csum *p = to_tcf_csum(a); diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e24a4093d6f6..e0aa30f83c6c 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -156,7 +156,8 @@ static void tcf_gact_stats_update(struct tc_action *a, u64 bytes, u32 packets, int action = READ_ONCE(gact->tcf_action); struct tcf_t *tm = &gact->tcf_tm; - _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, packets); + _bstats_cpu_update(this_cpu_ptr(gact->common.cpu_bstats), bytes, + packets); if (action == TC_ACT_SHOT) this_cpu_ptr(gact->common.cpu_qstats)->drops += packets; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 4a60cd5e1875..95c463cbb9a6 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -63,6 +63,23 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) } EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); +int ife_encode_meta_u16(u16 metaval, void *skbdata, struct tcf_meta_info *mi) +{ + u16 edata = 0; + + if (mi->metaval) + edata = *(u16 *)mi->metaval; + else if (metaval) + edata = metaval; + + if (!edata) /* will not encode */ + return 0; + + edata = htons(edata); + return ife_tlv_meta_encode(skbdata, mi->metaid, 2, &edata); +} +EXPORT_SYMBOL_GPL(ife_encode_meta_u16); + int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi) { if (mi->metaval) @@ -81,6 +98,15 @@ int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_check_meta_u32); +int ife_check_meta_u16(u16 metaval, struct tcf_meta_info *mi) +{ + if (metaval || mi->metaval) + return 8; /* T+L+(V) == 2+2+(2+2bytepad) */ + + return 0; +} +EXPORT_SYMBOL_GPL(ife_check_meta_u16); + int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi) { u32 edata = metaval; diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c new file mode 100644 index 000000000000..3b35774ce890 --- /dev/null +++ b/net/sched/act_meta_skbtcindex.c @@ -0,0 +1,79 @@ +/* + * net/sched/act_meta_tc_index.c IFE skb->tc_index metadata module + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * copyright Jamal Hadi Salim (2016) + * +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static int skbtcindex_encode(struct sk_buff *skb, void *skbdata, + struct tcf_meta_info *e) +{ + u32 ifetc_index = skb->tc_index; + + return ife_encode_meta_u16(ifetc_index, skbdata, e); +} + +static int skbtcindex_decode(struct sk_buff *skb, void *data, u16 len) +{ + u16 ifetc_index = *(u16 *)data; + + skb->tc_index = ntohs(ifetc_index); + return 0; +} + +static int skbtcindex_check(struct sk_buff *skb, struct tcf_meta_info *e) +{ + return ife_check_meta_u16(skb->tc_index, e); +} + +static struct tcf_meta_ops ife_skbtcindex_ops = { + .metaid = IFE_META_TCINDEX, + .metatype = NLA_U16, + .name = "tc_index", + .synopsis = "skb tc_index 16 bit metadata", + .check_presence = skbtcindex_check, + .encode = skbtcindex_encode, + .decode = skbtcindex_decode, + .get = ife_get_meta_u16, + .alloc = ife_alloc_meta_u16, + .release = ife_release_meta_gen, + .validate = ife_validate_meta_u16, + .owner = THIS_MODULE, +}; + +static int __init ifetc_index_init_module(void) +{ + return register_ife_op(&ife_skbtcindex_ops); +} + +static void __exit ifetc_index_cleanup_module(void) +{ + unregister_ife_op(&ife_skbtcindex_ops); +} + +module_init(ifetc_index_init_module); +module_exit(ifetc_index_cleanup_module); + +MODULE_AUTHOR("Jamal Hadi Salim(2016)"); +MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 6038c85d92f5..667dc382df82 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -204,7 +204,15 @@ out: return retval; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static void tcf_stats_update(struct tc_action *a, u64 bytes, u32 packets, + u64 lastuse) +{ + tcf_lastuse_update(&a->tcfa_tm); + _bstats_cpu_update(this_cpu_ptr(a->cpu_bstats), bytes, packets); +} + +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, + int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_mirred *m = to_mirred(a); @@ -280,6 +288,7 @@ static struct tc_action_ops act_mirred_ops = { .type = TCA_ACT_MIRRED, .owner = THIS_MODULE, .act = tcf_mirred, + .stats_update = tcf_stats_update, .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, diff --git a/net/sched/act_police.c b/net/sched/act_police.c index 8a3be1d99775..d1bd248fe146 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -249,6 +249,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, police->tcfp_t_c = now; police->tcfp_toks = toks; police->tcfp_ptoks = ptoks; + if (police->tcfp_result == TC_ACT_SHOT) + police->tcf_qstats.drops++; spin_unlock(&police->tcf_lock); return police->tcfp_result; } @@ -261,8 +263,8 @@ static int tcf_act_police(struct sk_buff *skb, const struct tc_action *a, return police->tcf_action; } -static int -tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_police *police = to_police(a); @@ -347,14 +349,12 @@ static struct pernet_operations police_net_ops = { .size = sizeof(struct tc_action_net), }; -static int __init -police_init_module(void) +static int __init police_init_module(void) { return tcf_register_action(&act_police_ops, &police_net_ops); } -static void __exit -police_cleanup_module(void) +static void __exit police_cleanup_module(void) { tcf_unregister_action(&act_police_ops, &police_net_ops); } diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c new file mode 100644 index 000000000000..e7d96381c908 --- /dev/null +++ b/net/sched/act_skbmod.c @@ -0,0 +1,301 @@ +/* + * net/sched/act_skbmod.c skb data modifier + * + * Copyright (c) 2016 Jamal Hadi Salim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define SKBMOD_TAB_MASK 15 + +static int skbmod_net_id; +static struct tc_action_ops act_skbmod_ops; + +#define MAX_EDIT_LEN ETH_HLEN +static int tcf_skbmod_run(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_skbmod *d = to_skbmod(a); + int action; + struct tcf_skbmod_params *p; + u64 flags; + int err; + + tcf_lastuse_update(&d->tcf_tm); + bstats_cpu_update(this_cpu_ptr(d->common.cpu_bstats), skb); + + /* XXX: if you are going to edit more fields beyond ethernet header + * (example when you add IP header replacement or vlan swap) + * then MAX_EDIT_LEN needs to change appropriately + */ + err = skb_ensure_writable(skb, MAX_EDIT_LEN); + if (unlikely(err)) { /* best policy is to drop on the floor */ + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + return TC_ACT_SHOT; + } + + rcu_read_lock(); + action = READ_ONCE(d->tcf_action); + if (unlikely(action == TC_ACT_SHOT)) { + qstats_overlimit_inc(this_cpu_ptr(d->common.cpu_qstats)); + rcu_read_unlock(); + return action; + } + + p = rcu_dereference(d->skbmod_p); + flags = p->flags; + if (flags & SKBMOD_F_DMAC) + ether_addr_copy(eth_hdr(skb)->h_dest, p->eth_dst); + if (flags & SKBMOD_F_SMAC) + ether_addr_copy(eth_hdr(skb)->h_source, p->eth_src); + if (flags & SKBMOD_F_ETYPE) + eth_hdr(skb)->h_proto = p->eth_type; + rcu_read_unlock(); + + if (flags & SKBMOD_F_SWAPMAC) { + u16 tmpaddr[ETH_ALEN / 2]; /* ether_addr_copy() requirement */ + /*XXX: I am sure we can come up with more efficient swapping*/ + ether_addr_copy((u8 *)tmpaddr, eth_hdr(skb)->h_dest); + ether_addr_copy(eth_hdr(skb)->h_dest, eth_hdr(skb)->h_source); + ether_addr_copy(eth_hdr(skb)->h_source, (u8 *)tmpaddr); + } + + return action; +} + +static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = { + [TCA_SKBMOD_PARMS] = { .len = sizeof(struct tc_skbmod) }, + [TCA_SKBMOD_DMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_SMAC] = { .len = ETH_ALEN }, + [TCA_SKBMOD_ETYPE] = { .type = NLA_U16 }, +}; + +static int tcf_skbmod_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct nlattr *tb[TCA_SKBMOD_MAX + 1]; + struct tcf_skbmod_params *p, *p_old; + struct tc_skbmod *parm; + struct tcf_skbmod *d; + bool exists = false; + u8 *daddr = NULL; + u8 *saddr = NULL; + u16 eth_type = 0; + u32 lflags = 0; + int ret = 0, err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_SKBMOD_MAX, nla, skbmod_policy); + if (err < 0) + return err; + + if (!tb[TCA_SKBMOD_PARMS]) + return -EINVAL; + + if (tb[TCA_SKBMOD_DMAC]) { + daddr = nla_data(tb[TCA_SKBMOD_DMAC]); + lflags |= SKBMOD_F_DMAC; + } + + if (tb[TCA_SKBMOD_SMAC]) { + saddr = nla_data(tb[TCA_SKBMOD_SMAC]); + lflags |= SKBMOD_F_SMAC; + } + + if (tb[TCA_SKBMOD_ETYPE]) { + eth_type = nla_get_u16(tb[TCA_SKBMOD_ETYPE]); + lflags |= SKBMOD_F_ETYPE; + } + + parm = nla_data(tb[TCA_SKBMOD_PARMS]); + if (parm->flags & SKBMOD_F_SWAPMAC) + lflags = SKBMOD_F_SWAPMAC; + + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + if (!lflags) + return -EINVAL; + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + d = to_skbmod(*a); + + ASSERT_RTNL(); + p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); + if (unlikely(!p)) { + if (ovr) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + p->flags = lflags; + d->tcf_action = parm->action; + + p_old = rtnl_dereference(d->skbmod_p); + + if (ovr) + spin_lock_bh(&d->tcf_lock); + + if (lflags & SKBMOD_F_DMAC) + ether_addr_copy(p->eth_dst, daddr); + if (lflags & SKBMOD_F_SMAC) + ether_addr_copy(p->eth_src, saddr); + if (lflags & SKBMOD_F_ETYPE) + p->eth_type = htons(eth_type); + + rcu_assign_pointer(d->skbmod_p, p); + if (ovr) + spin_unlock_bh(&d->tcf_lock); + + if (p_old) + kfree_rcu(p_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + return ret; +} + +static void tcf_skbmod_cleanup(struct tc_action *a, int bind) +{ + struct tcf_skbmod *d = to_skbmod(a); + struct tcf_skbmod_params *p; + + p = rcu_dereference_protected(d->skbmod_p, 1); + kfree_rcu(p, rcu); +} + +static int tcf_skbmod_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + struct tcf_skbmod *d = to_skbmod(a); + unsigned char *b = skb_tail_pointer(skb); + struct tcf_skbmod_params *p = rtnl_dereference(d->skbmod_p); + struct tc_skbmod opt = { + .index = d->tcf_index, + .refcnt = d->tcf_refcnt - ref, + .bindcnt = d->tcf_bindcnt - bind, + .action = d->tcf_action, + }; + struct tcf_t t; + + opt.flags = p->flags; + if (nla_put(skb, TCA_SKBMOD_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_DMAC) && + nla_put(skb, TCA_SKBMOD_DMAC, ETH_ALEN, p->eth_dst)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_SMAC) && + nla_put(skb, TCA_SKBMOD_SMAC, ETH_ALEN, p->eth_src)) + goto nla_put_failure; + if ((p->flags & SKBMOD_F_ETYPE) && + nla_put_u16(skb, TCA_SKBMOD_ETYPE, ntohs(p->eth_type))) + goto nla_put_failure; + + tcf_tm_dump(&t, &d->tcf_tm); + if (nla_put_64bit(skb, TCA_SKBMOD_TM, sizeof(t), &t, TCA_SKBMOD_PAD)) + goto nla_put_failure; + + return skb->len; +nla_put_failure: + rcu_read_unlock(); + nlmsg_trim(skb, b); + return -1; +} + +static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_skbmod_ops = { + .kind = "skbmod", + .type = TCA_ACT_SKBMOD, + .owner = THIS_MODULE, + .act = tcf_skbmod_run, + .dump = tcf_skbmod_dump, + .init = tcf_skbmod_init, + .cleanup = tcf_skbmod_cleanup, + .walk = tcf_skbmod_walker, + .lookup = tcf_skbmod_search, + .size = sizeof(struct tcf_skbmod), +}; + +static __net_init int skbmod_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); +} + +static void __net_exit skbmod_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, skbmod_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations skbmod_net_ops = { + .init = skbmod_init_net, + .exit = skbmod_exit_net, + .id = &skbmod_net_id, + .size = sizeof(struct tc_action_net), +}; + +MODULE_AUTHOR("Jamal Hadi Salim, "); +MODULE_DESCRIPTION("SKB data mod-ing"); +MODULE_LICENSE("GPL"); + +static int __init skbmod_init_module(void) +{ + return tcf_register_action(&act_skbmod_ops, &skbmod_net_ops); +} + +static void __exit skbmod_cleanup_module(void) +{ + tcf_unregister_action(&act_skbmod_ops, &skbmod_net_ops); +} + +module_init(skbmod_init_module); +module_exit(skbmod_cleanup_module); diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c new file mode 100644 index 000000000000..af47bdf2f483 --- /dev/null +++ b/net/sched/act_tunnel_key.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 2016, Amir Vadai + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#define TUNNEL_KEY_TAB_MASK 15 + +static int tunnel_key_net_id; +static struct tc_action_ops act_tunnel_key_ops; + +static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, + struct tcf_result *res) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + int action; + + rcu_read_lock(); + + params = rcu_dereference(t->params); + + tcf_lastuse_update(&t->tcf_tm); + bstats_cpu_update(this_cpu_ptr(t->common.cpu_bstats), skb); + action = params->action; + + switch (params->tcft_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + skb_dst_drop(skb); + break; + case TCA_TUNNEL_KEY_ACT_SET: + skb_dst_drop(skb); + skb_dst_set(skb, dst_clone(¶ms->tcft_enc_metadata->dst)); + break; + default: + WARN_ONCE(1, "Bad tunnel_key action %d.\n", + params->tcft_action); + break; + } + + rcu_read_unlock(); + + return action; +} + +static const struct nla_policy tunnel_key_policy[TCA_TUNNEL_KEY_MAX + 1] = { + [TCA_TUNNEL_KEY_PARMS] = { .len = sizeof(struct tc_tunnel_key) }, + [TCA_TUNNEL_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_TUNNEL_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_TUNNEL_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, +}; + +static int tunnel_key_init(struct net *net, struct nlattr *nla, + struct nlattr *est, struct tc_action **a, + int ovr, int bind) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; + struct tcf_tunnel_key_params *params_old; + struct tcf_tunnel_key_params *params_new; + struct metadata_dst *metadata = NULL; + struct tc_tunnel_key *parm; + struct tcf_tunnel_key *t; + bool exists = false; + __be64 key_id; + int ret = 0; + int err; + + if (!nla) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_TUNNEL_KEY_MAX, nla, tunnel_key_policy); + if (err < 0) + return err; + + if (!tb[TCA_TUNNEL_KEY_PARMS]) + return -EINVAL; + + parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); + exists = tcf_hash_check(tn, parm->index, a, bind); + if (exists && bind) + return 0; + + switch (parm->t_action) { + case TCA_TUNNEL_KEY_ACT_RELEASE: + break; + case TCA_TUNNEL_KEY_ACT_SET: + if (!tb[TCA_TUNNEL_KEY_ENC_KEY_ID]) { + ret = -EINVAL; + goto err_out; + } + + key_id = key32_to_tunnel_id(nla_get_be32(tb[TCA_TUNNEL_KEY_ENC_KEY_ID])); + + if (tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]) { + __be32 saddr; + __be32 daddr; + + saddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_SRC]); + daddr = nla_get_in_addr(tb[TCA_TUNNEL_KEY_ENC_IPV4_DST]); + + metadata = __ip_tun_set_dst(saddr, daddr, 0, 0, + TUNNEL_KEY, key_id, 0); + } else if (tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC] && + tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]) { + struct in6_addr saddr; + struct in6_addr daddr; + + saddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_SRC]); + daddr = nla_get_in6_addr(tb[TCA_TUNNEL_KEY_ENC_IPV6_DST]); + + metadata = __ipv6_tun_set_dst(&saddr, &daddr, 0, 0, 0, + TUNNEL_KEY, key_id, 0); + } + + if (!metadata) { + ret = -EINVAL; + goto err_out; + } + + metadata->u.tun_info.mode |= IP_TUNNEL_INFO_TX; + break; + default: + goto err_out; + } + + if (!exists) { + ret = tcf_hash_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); + if (ret) + return ret; + + ret = ACT_P_CREATED; + } else { + tcf_hash_release(*a, bind); + if (!ovr) + return -EEXIST; + } + + t = to_tunnel_key(*a); + + ASSERT_RTNL(); + params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); + if (unlikely(!params_new)) { + if (ret == ACT_P_CREATED) + tcf_hash_release(*a, bind); + return -ENOMEM; + } + + params_old = rtnl_dereference(t->params); + + params_new->action = parm->action; + params_new->tcft_action = parm->t_action; + params_new->tcft_enc_metadata = metadata; + + rcu_assign_pointer(t->params, params_new); + + if (params_old) + kfree_rcu(params_old, rcu); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(tn, *a); + + return ret; + +err_out: + if (exists) + tcf_hash_release(*a, bind); + return ret; +} + +static void tunnel_key_release(struct tc_action *a, int bind) +{ + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + + params = rcu_dereference_protected(t->params, 1); + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) + dst_release(¶ms->tcft_enc_metadata->dst); + + kfree_rcu(params, rcu); +} + +static int tunnel_key_dump_addresses(struct sk_buff *skb, + const struct ip_tunnel_info *info) +{ + unsigned short family = ip_tunnel_info_af(info); + + if (family == AF_INET) { + __be32 saddr = info->key.u.ipv4.src; + __be32 daddr = info->key.u.ipv4.dst; + + if (!nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_SRC, saddr) && + !nla_put_in_addr(skb, TCA_TUNNEL_KEY_ENC_IPV4_DST, daddr)) + return 0; + } + + if (family == AF_INET6) { + const struct in6_addr *saddr6 = &info->key.u.ipv6.src; + const struct in6_addr *daddr6 = &info->key.u.ipv6.dst; + + if (!nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_SRC, saddr6) && + !nla_put_in6_addr(skb, + TCA_TUNNEL_KEY_ENC_IPV6_DST, daddr6)) + return 0; + } + + return -EINVAL; +} + +static int tunnel_key_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_tunnel_key *t = to_tunnel_key(a); + struct tcf_tunnel_key_params *params; + struct tc_tunnel_key opt = { + .index = t->tcf_index, + .refcnt = t->tcf_refcnt - ref, + .bindcnt = t->tcf_bindcnt - bind, + }; + struct tcf_t tm; + + params = rtnl_dereference(t->params); + + opt.t_action = params->tcft_action; + opt.action = params->action; + + if (nla_put(skb, TCA_TUNNEL_KEY_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + if (params->tcft_action == TCA_TUNNEL_KEY_ACT_SET) { + struct ip_tunnel_key *key = + ¶ms->tcft_enc_metadata->u.tun_info.key; + __be32 key_id = tunnel_id_to_key32(key->tun_id); + + if (nla_put_be32(skb, TCA_TUNNEL_KEY_ENC_KEY_ID, key_id) || + tunnel_key_dump_addresses(skb, + ¶ms->tcft_enc_metadata->u.tun_info)) + goto nla_put_failure; + } + + tcf_tm_dump(&tm, &t->tcf_tm); + if (nla_put_64bit(skb, TCA_TUNNEL_KEY_TM, sizeof(tm), + &tm, TCA_TUNNEL_KEY_PAD)) + goto nla_put_failure; + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tunnel_key_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_generic_walker(tn, skb, cb, type, ops); +} + +static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tcf_hash_search(tn, a, index); +} + +static struct tc_action_ops act_tunnel_key_ops = { + .kind = "tunnel_key", + .type = TCA_ACT_TUNNEL_KEY, + .owner = THIS_MODULE, + .act = tunnel_key_act, + .dump = tunnel_key_dump, + .init = tunnel_key_init, + .cleanup = tunnel_key_release, + .walk = tunnel_key_walker, + .lookup = tunnel_key_search, + .size = sizeof(struct tcf_tunnel_key), +}; + +static __net_init int tunnel_key_init_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); +} + +static void __net_exit tunnel_key_exit_net(struct net *net) +{ + struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + + tc_action_net_exit(tn); +} + +static struct pernet_operations tunnel_key_net_ops = { + .init = tunnel_key_init_net, + .exit = tunnel_key_exit_net, + .id = &tunnel_key_net_id, + .size = sizeof(struct tc_action_net), +}; + +static int __init tunnel_key_init_module(void) +{ + return tcf_register_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +static void __exit tunnel_key_cleanup_module(void) +{ + tcf_unregister_action(&act_tunnel_key_ops, &tunnel_key_net_ops); +} + +module_init(tunnel_key_init_module); +module_exit(tunnel_key_cleanup_module); + +MODULE_AUTHOR("Amir Vadai "); +MODULE_DESCRIPTION("ip tunnel manipulation actions"); +MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 691409de3e1a..b57fcbcefea1 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -30,12 +30,19 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_vlan *v = to_vlan(a); int action; int err; + u16 tci; spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); bstats_update(&v->tcf_bstats, skb); action = v->tcf_action; + /* Ensure 'data' points at mac_header prior calling vlan manipulating + * functions. + */ + if (skb_at_tc_ingress(skb)) + skb_push_rcsum(skb, skb->mac_len); + switch (v->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); @@ -43,10 +50,35 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid); + err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | + (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; + case TCA_VLAN_ACT_MODIFY: + /* No-op if no vlan tag (either hw-accel or in-payload) */ + if (!skb_vlan_tagged(skb)) + goto unlock; + /* extract existing tag (and guarantee no hw-accel tag) */ + if (skb_vlan_tag_present(skb)) { + tci = skb_vlan_tag_get(skb); + skb->vlan_tci = 0; + } else { + /* in-payload vlan tag, pop it */ + err = __skb_vlan_pop(skb, &tci); + if (err) + goto drop; + } + /* replace the vid */ + tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + /* replace prio bits, if tcfv_push_prio specified */ + if (v->tcfv_push_prio) { + tci &= ~VLAN_PRIO_MASK; + tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + } + /* put updated tci as hwaccel tag */ + __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + break; default: BUG(); } @@ -57,6 +89,9 @@ drop: action = TC_ACT_SHOT; v->tcf_qstats.drops++; unlock: + if (skb_at_tc_ingress(skb)) + skb_pull_rcsum(skb, skb->mac_len); + spin_unlock(&v->tcf_lock); return action; } @@ -65,6 +100,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = { [TCA_VLAN_PARMS] = { .len = sizeof(struct tc_vlan) }, [TCA_VLAN_PUSH_VLAN_ID] = { .type = NLA_U16 }, [TCA_VLAN_PUSH_VLAN_PROTOCOL] = { .type = NLA_U16 }, + [TCA_VLAN_PUSH_VLAN_PRIORITY] = { .type = NLA_U8 }, }; static int tcf_vlan_init(struct net *net, struct nlattr *nla, @@ -78,6 +114,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, int action; __be16 push_vid = 0; __be16 push_proto = 0; + u8 push_prio = 0; bool exists = false; int ret = 0, err; @@ -99,6 +136,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_POP: break; case TCA_VLAN_ACT_PUSH: + case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) tcf_hash_release(*a, bind); @@ -123,6 +161,9 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, } else { push_proto = htons(ETH_P_8021Q); } + + if (tb[TCA_VLAN_PUSH_VLAN_PRIORITY]) + push_prio = nla_get_u8(tb[TCA_VLAN_PUSH_VLAN_PRIORITY]); break; default: if (exists) @@ -150,6 +191,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, v->tcfv_action = action; v->tcfv_push_vid = push_vid; + v->tcfv_push_prio = push_prio; v->tcfv_push_proto = push_proto; v->tcf_action = parm->action; @@ -178,10 +220,13 @@ static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if (v->tcfv_action == TCA_VLAN_ACT_PUSH && + if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || + v->tcfv_action == TCA_VLAN_ACT_MODIFY) && (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto))) + v->tcfv_push_proto) || + (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, + v->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7c5645373af..11da7da0b7c4 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -344,13 +344,15 @@ replay: if (err == 0) { struct tcf_proto *next = rtnl_dereference(tp->next); - tfilter_notify(net, skb, n, tp, fh, RTM_DELTFILTER); + tfilter_notify(net, skb, n, tp, fh, + RTM_DELTFILTER); if (tcf_destroy(tp, false)) RCU_INIT_POINTER(*back, next); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER); + err = tfilter_notify(net, skb, n, tp, fh, + RTM_NEWTFILTER); goto errout; default: err = -EINVAL; @@ -448,7 +450,8 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, struct net *net = sock_net(a->skb->sk); return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTFILTER); } /* called with RTNL */ @@ -552,7 +555,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) EXPORT_SYMBOL(tcf_exts_destroy); int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, - struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) + struct nlattr *rate_tlv, struct tcf_exts *exts, bool ovr) { #ifdef CONFIG_NET_CLS_ACT { @@ -560,8 +563,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, if (exts->police && tb[exts->police]) { act = tcf_action_init_1(net, tb[exts->police], rate_tlv, - "police", ovr, - TCA_ACT_BIND); + "police", ovr, TCA_ACT_BIND); if (IS_ERR(act)) return PTR_ERR(act); @@ -573,8 +575,8 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, int err, i = 0; err = tcf_action_init(net, tb[exts->action], rate_tlv, - NULL, ovr, - TCA_ACT_BIND, &actions); + NULL, ovr, TCA_ACT_BIND, + &actions); if (err) return err; list_for_each_entry(act, &actions, list) diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 0b8c3ace671f..eb219b78cd49 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -138,10 +138,12 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; struct tcf_ematch_tree t; - tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); if (err < 0) @@ -189,7 +191,10 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_BASIC_ACT, TCA_BASIC_POLICE); + if (err < 0) + goto errout; + err = -EINVAL; if (handle) { fnew->handle = handle; @@ -226,6 +231,7 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c3002c2c68bb..bb1d5a487081 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -27,6 +27,8 @@ MODULE_AUTHOR("Daniel Borkmann "); MODULE_DESCRIPTION("TC BPF based classifier"); #define CLS_BPF_NAME_LEN 256 +#define CLS_BPF_SUPPORTED_GEN_FLAGS \ + (TCA_CLS_FLAGS_SKIP_HW | TCA_CLS_FLAGS_SKIP_SW) struct cls_bpf_head { struct list_head plist; @@ -39,6 +41,8 @@ struct cls_bpf_prog { struct list_head link; struct tcf_result res; bool exts_integrated; + bool offloaded; + u32 gen_flags; struct tcf_exts exts; u32 handle; union { @@ -54,8 +58,10 @@ struct cls_bpf_prog { static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { [TCA_BPF_CLASSID] = { .type = NLA_U32 }, [TCA_BPF_FLAGS] = { .type = NLA_U32 }, + [TCA_BPF_FLAGS_GEN] = { .type = NLA_U32 }, [TCA_BPF_FD] = { .type = NLA_U32 }, - [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, .len = CLS_BPF_NAME_LEN }, + [TCA_BPF_NAME] = { .type = NLA_NUL_STRING, + .len = CLS_BPF_NAME_LEN }, [TCA_BPF_OPS_LEN] = { .type = NLA_U16 }, [TCA_BPF_OPS] = { .type = NLA_BINARY, .len = sizeof(struct sock_filter) * BPF_MAXINSNS }, @@ -83,9 +89,6 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_bpf_prog *prog; int ret = -1; - if (unlikely(!skb_mac_header_was_set(skb))) - return -1; - /* Needed here for accessing maps. */ rcu_read_lock(); list_for_each_entry_rcu(prog, &head->plist, link) { @@ -93,7 +96,9 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, qdisc_skb_cb(skb)->tc_classid = prog->res.classid; - if (at_ingress) { + if (tc_skip_sw(prog->gen_flags)) { + filter_res = prog->exts_integrated ? TC_ACT_UNSPEC : 0; + } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); bpf_compute_data_end(skb); @@ -140,6 +145,91 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) return !prog->bpf_ops; } +static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, + enum tc_clsbpf_command cmd) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct tc_cls_bpf_offload bpf_offload = {}; + struct tc_to_netdev offload; + + offload.type = TC_SETUP_CLSBPF; + offload.cls_bpf = &bpf_offload; + + bpf_offload.command = cmd; + bpf_offload.exts = &prog->exts; + bpf_offload.prog = prog->filter; + bpf_offload.name = prog->bpf_name; + bpf_offload.exts_integrated = prog->exts_integrated; + bpf_offload.gen_flags = prog->gen_flags; + + return dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); +} + +static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, + struct cls_bpf_prog *oldprog) +{ + struct net_device *dev = tp->q->dev_queue->dev; + struct cls_bpf_prog *obj = prog; + enum tc_clsbpf_command cmd; + bool skip_sw; + int ret; + + skip_sw = tc_skip_sw(prog->gen_flags) || + (oldprog && tc_skip_sw(oldprog->gen_flags)); + + if (oldprog && oldprog->offloaded) { + if (tc_should_offload(dev, tp, prog->gen_flags)) { + cmd = TC_CLSBPF_REPLACE; + } else if (!tc_skip_sw(prog->gen_flags)) { + obj = oldprog; + cmd = TC_CLSBPF_DESTROY; + } else { + return -EINVAL; + } + } else { + if (!tc_should_offload(dev, tp, prog->gen_flags)) + return skip_sw ? -EINVAL : 0; + cmd = TC_CLSBPF_ADD; + } + + ret = cls_bpf_offload_cmd(tp, obj, cmd); + if (ret) + return skip_sw ? ret : 0; + + obj->offloaded = true; + if (oldprog) + oldprog->offloaded = false; + + return 0; +} + +static void cls_bpf_stop_offload(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + int err; + + if (!prog->offloaded) + return; + + err = cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + if (err) { + pr_err("Stopping hardware offload failed: %d\n", err); + return; + } + + prog->offloaded = false; +} + +static void cls_bpf_offload_update_stats(struct tcf_proto *tp, + struct cls_bpf_prog *prog) +{ + if (!prog->offloaded) + return; + + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_STATS); +} + static int cls_bpf_init(struct tcf_proto *tp) { struct cls_bpf_head *head; @@ -179,6 +269,7 @@ static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg) { struct cls_bpf_prog *prog = (struct cls_bpf_prog *) arg; + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -195,6 +286,7 @@ static bool cls_bpf_destroy(struct tcf_proto *tp, bool force) return false; list_for_each_entry_safe(prog, tmp, &head->plist, link) { + cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); call_rcu(&prog->rcu, __cls_bpf_delete_prog); @@ -304,6 +396,7 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, { bool is_bpf, is_ebpf, have_exts = false; struct tcf_exts exts; + u32 gen_flags = 0; int ret; is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS]; @@ -311,30 +404,39 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); if (ret < 0) return ret; + ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); + if (ret < 0) + goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - tcf_exts_destroy(&exts); - return -EINVAL; + ret = -EINVAL; + goto errout; } have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } + if (tb[TCA_BPF_FLAGS_GEN]) { + gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); + if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || + !tc_flags_valid(gen_flags)) { + ret = -EINVAL; + goto errout; + } + } prog->exts_integrated = have_exts; + prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : cls_bpf_prog_from_efd(tb, prog, tp); - if (ret < 0) { - tcf_exts_destroy(&exts); - return ret; - } + if (ret < 0) + goto errout; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); @@ -343,6 +445,10 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, tcf_exts_change(tp, &prog->exts, &exts); return 0; + +errout: + tcf_exts_destroy(&exts); + return ret; } static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, @@ -388,7 +494,9 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, if (!prog) return -ENOBUFS; - tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_init(&prog->exts, TCA_BPF_ACT, TCA_BPF_POLICE); + if (ret < 0) + goto errout; if (oldprog) { if (handle && oldprog->handle != handle) { @@ -406,10 +514,17 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], ovr); + ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], + ovr); if (ret < 0) goto errout; + ret = cls_bpf_offload(tp, prog, oldprog); + if (ret) { + cls_bpf_delete_prog(tp, prog); + return ret; + } + if (oldprog) { list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); @@ -420,9 +535,10 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, *arg = (unsigned long) prog; return 0; -errout: - kfree(prog); +errout: + tcf_exts_destroy(&prog->exts); + kfree(prog); return ret; } @@ -470,6 +586,8 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, tm->tcm_handle = prog->handle; + cls_bpf_offload_update_stats(tp, prog); + nest = nla_nest_start(skb, TCA_OPTIONS); if (nest == NULL) goto nla_put_failure; @@ -492,6 +610,9 @@ static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, bpf_flags |= TCA_BPF_FLAG_ACT_DIRECT; if (bpf_flags && nla_put_u32(skb, TCA_BPF_FLAGS, bpf_flags)) goto nla_put_failure; + if (prog->gen_flags && + nla_put_u32(skb, TCA_BPF_FLAGS_GEN, prog->gen_flags)) + goto nla_put_failure; nla_nest_end(skb, nest); diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 4c85bd3a750c..85233c470035 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -93,7 +93,9 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (!new) return -ENOBUFS; - tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_init(&new->exts, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + if (err < 0) + goto errout; new->handle = handle; new->tp = tp; err = nla_parse_nested(tb, TCA_CGROUP_MAX, tca[TCA_OPTIONS], @@ -101,10 +103,14 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); if (err < 0) goto errout; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) { + tcf_exts_destroy(&e); + goto errout; + } err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); if (err < 0) { @@ -120,6 +126,7 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, call_rcu(&head->rcu, cls_cgroup_destroy_rcu); return 0; errout: + tcf_exts_destroy(&new->exts); kfree(new); return err; } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index fbfec6a18839..e39672394c7b 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -29,7 +29,7 @@ #include #include -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #include #endif @@ -87,12 +87,14 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow) return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb); } -static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto(const struct sk_buff *skb, + const struct flow_keys *flow) { return flow->basic.ip_proto; } -static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.src); @@ -100,7 +102,8 @@ static u32 flow_get_proto_src(const struct sk_buff *skb, const struct flow_keys return addr_fold(skb->sk); } -static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { if (flow->ports.ports) return ntohs(flow->ports.dst); @@ -125,14 +128,14 @@ static u32 flow_get_mark(const struct sk_buff *skb) static u32 flow_get_nfct(const struct sk_buff *skb) { -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) return addr_fold(skb->nfct); #else return 0; #endif } -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK) #define CTTUPLE(skb, member) \ ({ \ enum ip_conntrack_info ctinfo; \ @@ -149,7 +152,8 @@ static u32 flow_get_nfct(const struct sk_buff *skb) }) #endif -static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_src(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -161,7 +165,8 @@ fallback: return flow_get_src(skb, flow); } -static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { switch (tc_skb_protocol(skb)) { case htons(ETH_P_IP): @@ -173,14 +178,16 @@ fallback: return flow_get_dst(skb, flow); } -static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_src(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, src.u.all)); fallback: return flow_get_proto_src(skb, flow); } -static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, const struct flow_keys *flow) +static u32 flow_get_nfct_proto_dst(const struct sk_buff *skb, + const struct flow_keys *flow) { return ntohs(CTTUPLE(skb, dst.u.all)); fallback: @@ -418,10 +425,12 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err1; err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); if (err < 0) - return err; + goto err1; err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); if (err < 0) @@ -432,13 +441,15 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (!fnew) goto err2; - tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) + goto err3; fold = (struct flow_filter *)*arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err2; + goto err3; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -458,31 +469,31 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err2; + goto err3; if (!tb[TCA_FLOW_KEYS]) - goto err2; + goto err3; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err2; + goto err3; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err2; + goto err3; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } @@ -542,6 +553,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, call_rcu(&fold->rcu, flow_destroy_filter); return 0; +err3: + tcf_exts_destroy(&fnew->exts); err2: tcf_em_tree_destroy(&t); kfree(fnew); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 5060801a2f6d..f6f40fba599b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -23,17 +23,26 @@ #include #include +#include +#include + struct fl_flow_key { int indev_ifindex; struct flow_dissector_key_control control; + struct flow_dissector_key_control enc_control; struct flow_dissector_key_basic basic; struct flow_dissector_key_eth_addrs eth; - struct flow_dissector_key_addrs ipaddrs; + struct flow_dissector_key_vlan vlan; union { struct flow_dissector_key_ipv4_addrs ipv4; struct flow_dissector_key_ipv6_addrs ipv6; }; struct flow_dissector_key_ports tp; + struct flow_dissector_key_keyid enc_key_id; + union { + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; + }; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -123,11 +132,31 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; + struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); + + info = skb_tunnel_info(skb); + if (info) { + struct ip_tunnel_key *key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_key.enc_ipv4.src = key->u.ipv4.src; + skb_key.enc_ipv4.dst = key->u.ipv4.dst; + break; + case AF_INET6: + skb_key.enc_ipv6.src = key->u.ipv6.src; + skb_key.enc_ipv6.dst = key->u.ipv6.dst; + break; + } + + skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); + } + skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -212,7 +241,8 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, tc.type = TC_SETUP_CLSFLOWER; tc.cls_flower = &offload; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, &tc); + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->protocol, + &tc); if (tc_skip_sw(flags)) return err; @@ -293,6 +323,22 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_TCP_DST] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_SRC] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_UDP_DST] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_VLAN_ID] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_VLAN_PRIO] = { .type = NLA_U8 }, + [TCA_FLOWER_KEY_VLAN_ETH_TYPE] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_ENC_KEY_ID] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV4_DST_MASK] = { .type = NLA_U32 }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_ENC_IPV6_DST_MASK] = { .len = sizeof(struct in6_addr) }, + [TCA_FLOWER_KEY_TCP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_TCP_DST_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_SRC_MASK] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_UDP_DST_MASK] = { .type = NLA_U16 }, }; static void fl_set_key_val(struct nlattr **tb, @@ -308,9 +354,29 @@ static void fl_set_key_val(struct nlattr **tb, memcpy(mask, nla_data(tb[mask_type]), len); } +static void fl_set_key_vlan(struct nlattr **tb, + struct flow_dissector_key_vlan *key_val, + struct flow_dissector_key_vlan *key_mask) +{ +#define VLAN_PRIORITY_MASK 0x7 + + if (tb[TCA_FLOWER_KEY_VLAN_ID]) { + key_val->vlan_id = + nla_get_u16(tb[TCA_FLOWER_KEY_VLAN_ID]) & VLAN_VID_MASK; + key_mask->vlan_id = VLAN_VID_MASK; + } + if (tb[TCA_FLOWER_KEY_VLAN_PRIO]) { + key_val->vlan_priority = + nla_get_u8(tb[TCA_FLOWER_KEY_VLAN_PRIO]) & + VLAN_PRIORITY_MASK; + key_mask->vlan_priority = VLAN_PRIORITY_MASK; + } +} + static int fl_set_key(struct net *net, struct nlattr **tb, struct fl_flow_key *key, struct fl_flow_key *mask) { + __be16 ethertype; #ifdef CONFIG_NET_CLS_IND if (tb[TCA_FLOWER_INDEV]) { int err = tcf_change_indev(net, tb[TCA_FLOWER_INDEV]); @@ -328,9 +394,20 @@ static int fl_set_key(struct net *net, struct nlattr **tb, mask->eth.src, TCA_FLOWER_KEY_ETH_SRC_MASK, sizeof(key->eth.src)); - fl_set_key_val(tb, &key->basic.n_proto, TCA_FLOWER_KEY_ETH_TYPE, - &mask->basic.n_proto, TCA_FLOWER_UNSPEC, - sizeof(key->basic.n_proto)); + if (tb[TCA_FLOWER_KEY_ETH_TYPE]) { + ethertype = nla_get_be16(tb[TCA_FLOWER_KEY_ETH_TYPE]); + + if (ethertype == htons(ETH_P_8021Q)) { + fl_set_key_vlan(tb, &key->vlan, &mask->vlan); + fl_set_key_val(tb, &key->basic.n_proto, + TCA_FLOWER_KEY_VLAN_ETH_TYPE, + &mask->basic.n_proto, TCA_FLOWER_UNSPEC, + sizeof(key->basic.n_proto)); + } else { + key->basic.n_proto = ethertype; + mask->basic.n_proto = cpu_to_be16(~0); + } + } if (key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) { @@ -359,20 +436,54 @@ static int fl_set_key(struct net *net, struct nlattr **tb, if (key->basic.ip_proto == IPPROTO_TCP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)); } else if (key->basic.ip_proto == IPPROTO_UDP) { fl_set_key_val(tb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)); fl_set_key_val(tb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)); } + if (tb[TCA_FLOWER_KEY_ENC_IPV4_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV4_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + fl_set_key_val(tb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, + &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)); + fl_set_key_val(tb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, + &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)); + } + + if (tb[TCA_FLOWER_KEY_ENC_IPV6_SRC] || + tb[TCA_FLOWER_KEY_ENC_IPV6_DST]) { + key->enc_control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + fl_set_key_val(tb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, + &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)); + fl_set_key_val(tb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)); + } + + fl_set_key_val(tb, &key->enc_key_id.keyid, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id.keyid, TCA_FLOWER_UNSPEC, + sizeof(key->enc_key_id.keyid)); + return 0; } @@ -404,12 +515,10 @@ static int fl_init_hashtable(struct cls_fl_head *head, #define FL_KEY_MEMBER_OFFSET(member) offsetof(struct fl_flow_key, member) #define FL_KEY_MEMBER_SIZE(member) (sizeof(((struct fl_flow_key *) 0)->member)) -#define FL_KEY_MEMBER_END_OFFSET(member) \ - (FL_KEY_MEMBER_OFFSET(member) + FL_KEY_MEMBER_SIZE(member)) -#define FL_KEY_IN_RANGE(mask, member) \ - (FL_KEY_MEMBER_OFFSET(member) <= (mask)->range.end && \ - FL_KEY_MEMBER_END_OFFSET(member) >= (mask)->range.start) +#define FL_KEY_IS_MASKED(mask, member) \ + memchr_inv(((char *)mask) + FL_KEY_MEMBER_OFFSET(member), \ + 0, FL_KEY_MEMBER_SIZE(member)) \ #define FL_KEY_SET(keys, cnt, id, member) \ do { \ @@ -418,9 +527,9 @@ static int fl_init_hashtable(struct cls_fl_head *head, cnt++; \ } while(0); -#define FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, id, member) \ +#define FL_KEY_SET_IF_MASKED(mask, keys, cnt, id, member) \ do { \ - if (FL_KEY_IN_RANGE(mask, member)) \ + if (FL_KEY_IS_MASKED(mask, member)) \ FL_KEY_SET(keys, cnt, id, member); \ } while(0); @@ -432,14 +541,16 @@ static void fl_init_dissector(struct cls_fl_head *head, FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_CONTROL, control); FL_KEY_SET(keys, cnt, FLOW_DISSECTOR_KEY_BASIC, basic); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); - FL_KEY_SET_IF_IN_RANGE(mask, keys, cnt, - FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_ETH_ADDRS, eth); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IPV4_ADDRS, ipv4); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_IPV6_ADDRS, ipv6); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_PORTS, tp); + FL_KEY_SET_IF_MASKED(&mask->key, keys, cnt, + FLOW_DISSECTOR_KEY_VLAN, vlan); skb_flow_dissector_init(&head->dissector, keys, cnt); } @@ -478,10 +589,12 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, struct tcf_exts e; int err; - tcf_exts_init(&e, TCA_FLOWER_ACT, 0); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -550,7 +663,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, if (!fnew) return -ENOBUFS; - tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + err = tcf_exts_init(&fnew->exts, TCA_FLOWER_ACT, 0); + if (err < 0) + goto errout; if (!handle) { handle = fl_grab_new_handle(tp, head); @@ -614,6 +729,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } @@ -668,6 +784,29 @@ static int fl_dump_key_val(struct sk_buff *skb, return 0; } +static int fl_dump_key_vlan(struct sk_buff *skb, + struct flow_dissector_key_vlan *vlan_key, + struct flow_dissector_key_vlan *vlan_mask) +{ + int err; + + if (!memchr_inv(vlan_mask, 0, sizeof(*vlan_mask))) + return 0; + if (vlan_mask->vlan_id) { + err = nla_put_u16(skb, TCA_FLOWER_KEY_VLAN_ID, + vlan_key->vlan_id); + if (err) + return err; + } + if (vlan_mask->vlan_priority) { + err = nla_put_u8(skb, TCA_FLOWER_KEY_VLAN_PRIO, + vlan_key->vlan_priority); + if (err) + return err; + } + return 0; +} + static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { @@ -712,6 +851,10 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, &mask->basic.n_proto, TCA_FLOWER_UNSPEC, sizeof(key->basic.n_proto))) goto nla_put_failure; + + if (fl_dump_key_vlan(skb, &key->vlan, &mask->vlan)) + goto nla_put_failure; + if ((key->basic.n_proto == htons(ETH_P_IP) || key->basic.n_proto == htons(ETH_P_IPV6)) && fl_dump_key_val(skb, &key->basic.ip_proto, TCA_FLOWER_KEY_IP_PROTO, @@ -738,21 +881,48 @@ static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, if (key->basic.ip_proto == IPPROTO_TCP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_TCP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_TCP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_TCP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_TCP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; else if (key->basic.ip_proto == IPPROTO_UDP && (fl_dump_key_val(skb, &key->tp.src, TCA_FLOWER_KEY_UDP_SRC, - &mask->tp.src, TCA_FLOWER_UNSPEC, + &mask->tp.src, TCA_FLOWER_KEY_UDP_SRC_MASK, sizeof(key->tp.src)) || fl_dump_key_val(skb, &key->tp.dst, TCA_FLOWER_KEY_UDP_DST, - &mask->tp.dst, TCA_FLOWER_UNSPEC, + &mask->tp.dst, TCA_FLOWER_KEY_UDP_DST_MASK, sizeof(key->tp.dst)))) goto nla_put_failure; + if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC, &mask->enc_ipv4.src, + TCA_FLOWER_KEY_ENC_IPV4_SRC_MASK, + sizeof(key->enc_ipv4.src)) || + fl_dump_key_val(skb, &key->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST, &mask->enc_ipv4.dst, + TCA_FLOWER_KEY_ENC_IPV4_DST_MASK, + sizeof(key->enc_ipv4.dst)))) + goto nla_put_failure; + else if (key->enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS && + (fl_dump_key_val(skb, &key->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC, &mask->enc_ipv6.src, + TCA_FLOWER_KEY_ENC_IPV6_SRC_MASK, + sizeof(key->enc_ipv6.src)) || + fl_dump_key_val(skb, &key->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST, + &mask->enc_ipv6.dst, + TCA_FLOWER_KEY_ENC_IPV6_DST_MASK, + sizeof(key->enc_ipv6.dst)))) + goto nla_put_failure; + + if (fl_dump_key_val(skb, &key->enc_key_id, TCA_FLOWER_KEY_ENC_KEY_ID, + &mask->enc_key_id, TCA_FLOWER_UNSPEC, + sizeof(key->enc_key_id))) + goto nla_put_failure; + nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags); if (tcf_exts_dump(skb, &f->exts)) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index f23a3b68bba6..9dc63d54e167 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -57,7 +57,7 @@ static u32 fw_hash(u32 handle) } static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, - struct tcf_result *res) + struct tcf_result *res) { struct fw_head *head = rcu_dereference_bh(tp->root); struct fw_filter *f; @@ -188,17 +188,20 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { static int fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, bool ovr) + struct nlattr **tb, struct nlattr **tca, unsigned long base, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct tcf_exts e; u32 mask; int err; - tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -235,9 +238,8 @@ errout: static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + u32 handle, struct nlattr **tca, unsigned long *arg, + bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f = (struct fw_filter *) *arg; @@ -270,10 +272,15 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, #endif /* CONFIG_NET_CLS_IND */ fnew->tp = f->tp; - tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&fnew->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) { + kfree(fnew); + return err; + } err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { + tcf_exts_destroy(&fnew->exts); kfree(fnew); return err; } @@ -313,7 +320,9 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) return -ENOBUFS; - tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_init(&f->exts, TCA_FW_ACT, TCA_FW_POLICE); + if (err < 0) + goto errout; f->id = handle; f->tp = tp; @@ -328,6 +337,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 08a3b0a6f5ab..455fc8f83d0a 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -268,8 +268,7 @@ static int route4_init(struct tcf_proto *tp) return 0; } -static void -route4_delete_filter(struct rcu_head *head) +static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); @@ -383,17 +382,19 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr **tb, struct nlattr *est, int new, bool ovr) { - int err; u32 id = 0, to = 0, nhandle = 0x8000; struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { @@ -472,10 +473,8 @@ errout: } static int route4_change(struct net *net, struct sk_buff *in_skb, - struct tcf_proto *tp, unsigned long base, - u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct tcf_proto *tp, unsigned long base, u32 handle, + struct nlattr **tca, unsigned long *arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -503,7 +502,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (!f) goto errout; - tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_init(&f->exts, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + if (err < 0) + goto errout; + if (fold) { f->id = fold->id; f->iif = fold->iif; @@ -557,6 +559,8 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, return 0; errout: + if (f) + tcf_exts_destroy(&f->exts); kfree(f); return err; } diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index f9c9fc075fe6..4f05a19fb073 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -487,10 +487,12 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + err = tcf_exts_init(&e, TCA_RSVP_ACT, TCA_RSVP_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); + if (err < 0) + goto errout2; f = (struct rsvp_filter *)*arg; if (f) { @@ -506,7 +508,11 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, goto errout2; } - tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&n->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) { + kfree(n); + goto errout2; + } if (tb[TCA_RSVP_CLASSID]) { n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]); @@ -530,7 +536,9 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (f == NULL) goto errout2; - tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + err = tcf_exts_init(&f->exts, TCA_RSVP_ACT, TCA_RSVP_POLICE); + if (err < 0) + goto errout; h2 = 16; if (tb[TCA_RSVP_SRC]) { memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src)); @@ -627,6 +635,7 @@ insert: goto insert; errout: + tcf_exts_destroy(&f->exts); kfree(f); errout2: tcf_exts_destroy(&e); diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 944c8ff45055..96144bdf30db 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -50,14 +50,13 @@ struct tcindex_data { struct rcu_head rcu; }; -static inline int -tcindex_filter_is_set(struct tcindex_filter_result *r) +static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { return tcf_exts_is_predicative(&r->exts) || r->res.classid; } -static struct tcindex_filter_result * -tcindex_lookup(struct tcindex_data *p, u16 key) +static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, + u16 key) { if (p->perfect) { struct tcindex_filter_result *f = p->perfect + key; @@ -144,7 +143,8 @@ static void tcindex_destroy_rexts(struct rcu_head *head) static void tcindex_destroy_fexts(struct rcu_head *head) { - struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); + struct tcindex_filter *f = container_of(head, struct tcindex_filter, + rcu); tcf_exts_destroy(&f->result.exts); kfree(f); @@ -219,10 +219,10 @@ static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = { [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 }, }; -static void tcindex_filter_result_init(struct tcindex_filter_result *r) +static int tcindex_filter_result_init(struct tcindex_filter_result *r) { memset(r, 0, sizeof(*r)); - tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + return tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); } static void __tcindex_partial_destroy(struct rcu_head *head) @@ -233,23 +233,57 @@ static void __tcindex_partial_destroy(struct rcu_head *head) kfree(p); } +static void tcindex_free_perfect_hash(struct tcindex_data *cp) +{ + int i; + + for (i = 0; i < cp->hash; i++) + tcf_exts_destroy(&cp->perfect[i].exts); + kfree(cp->perfect); +} + +static int tcindex_alloc_perfect_hash(struct tcindex_data *cp) +{ + int i, err = 0; + + cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result), + GFP_KERNEL); + if (!cp->perfect) + return -ENOMEM; + + for (i = 0; i < cp->hash; i++) { + err = tcf_exts_init(&cp->perfect[i].exts, + TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + if (err < 0) + goto errout; + } + + return 0; + +errout: + tcindex_free_perfect_hash(cp); + return err; +} + static int tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, u32 handle, struct tcindex_data *p, struct tcindex_filter_result *r, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err, balloc = 0; struct tcindex_filter_result new_filter_result, *old_r = r; struct tcindex_filter_result cr; - struct tcindex_data *cp, *oldp; + struct tcindex_data *cp = NULL, *oldp; struct tcindex_filter *f = NULL; /* make gcc behave */ + int err, balloc = 0; struct tcf_exts e; - tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -ENOMEM; /* tcindex_data attributes must look atomic to classifier/lookup so @@ -270,19 +304,20 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (p->perfect) { int i; - cp->perfect = kmemdup(p->perfect, - sizeof(*r) * cp->hash, GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout; for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE); + cp->perfect[i].res = p->perfect[i].res; balloc = 1; } cp->h = p->h; - tcindex_filter_result_init(&new_filter_result); - tcindex_filter_result_init(&cr); + err = tcindex_filter_result_init(&new_filter_result); + if (err < 0) + goto errout1; + err = tcindex_filter_result_init(&cr); + if (err < 0) + goto errout1; if (old_r) cr.res = r->res; @@ -338,15 +373,8 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, err = -ENOMEM; if (!cp->perfect && !cp->h) { if (valid_perfect_hash(cp)) { - int i; - - cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL); - if (!cp->perfect) + if (tcindex_alloc_perfect_hash(cp) < 0) goto errout_alloc; - for (i = 0; i < cp->hash; i++) - tcf_exts_init(&cp->perfect[i].exts, - TCA_TCINDEX_ACT, - TCA_TCINDEX_POLICE); balloc = 1; } else { struct tcindex_filter __rcu **hash; @@ -373,8 +401,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, if (!f) goto errout_alloc; f->key = handle; - tcindex_filter_result_init(&f->result); f->next = NULL; + err = tcindex_filter_result_init(&f->result); + if (err < 0) { + kfree(f); + goto errout_alloc; + } } if (tb[TCA_TCINDEX_CLASSID]) { @@ -387,8 +419,13 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, else tcf_exts_change(tp, &cr.exts, &e); - if (old_r && old_r != r) - tcindex_filter_result_init(old_r); + if (old_r && old_r != r) { + err = tcindex_filter_result_init(old_r); + if (err < 0) { + kfree(f); + goto errout_alloc; + } + } oldp = p; r->res = cr.res; @@ -415,9 +452,12 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, errout_alloc: if (balloc == 1) - kfree(cp->perfect); + tcindex_free_perfect_hash(cp); else if (balloc == 2) kfree(cp->h); +errout1: + tcf_exts_destroy(&cr.exts); + tcf_exts_destroy(&new_filter_result.exts); errout: kfree(cp); tcf_exts_destroy(&e); @@ -510,7 +550,7 @@ static bool tcindex_destroy(struct tcf_proto *tp, bool force) static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index ffe593efe930..ae83c3aec308 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -104,7 +104,8 @@ static inline unsigned int u32_hash_fold(__be32 key, return h; } -static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct tcf_result *res) +static int u32_classify(struct sk_buff *skb, const struct tcf_proto *tp, + struct tcf_result *res) { struct { struct tc_u_knode *knode; @@ -256,8 +257,7 @@ deadloop: return -1; } -static struct tc_u_hnode * -u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) +static struct tc_u_hnode *u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) { struct tc_u_hnode *ht; @@ -270,8 +270,7 @@ u32_lookup_ht(struct tc_u_common *tp_c, u32 handle) return ht; } -static struct tc_u_knode * -u32_lookup_key(struct tc_u_hnode *ht, u32 handle) +static struct tc_u_knode *u32_lookup_key(struct tc_u_hnode *ht, u32 handle) { unsigned int sel; struct tc_u_knode *n = NULL; @@ -360,8 +359,7 @@ static int u32_init(struct tcf_proto *tp) return 0; } -static int u32_destroy_key(struct tcf_proto *tp, - struct tc_u_knode *n, +static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); @@ -448,9 +446,8 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) } } -static int u32_replace_hw_hnode(struct tcf_proto *tp, - struct tc_u_hnode *h, - u32 flags) +static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -496,9 +493,8 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) } } -static int u32_replace_hw_knode(struct tcf_proto *tp, - struct tc_u_knode *n, - u32 flags) +static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, + u32 flags) { struct net_device *dev = tp->q->dev_queue->dev; struct tc_cls_u32_offload u32_offload = {0}; @@ -709,13 +705,15 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - int err; struct tcf_exts e; + int err; - tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); if (err < 0) return err; + err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + if (err < 0) + goto errout; err = -EINVAL; if (tb[TCA_U32_LINK]) { @@ -761,8 +759,7 @@ errout: return err; } -static void u32_replace_knode(struct tcf_proto *tp, - struct tc_u_common *tp_c, +static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, struct tc_u_knode *n) { struct tc_u_knode __rcu **ins; @@ -833,15 +830,17 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, new->tp = tp; memcpy(&new->sel, s, sizeof(*s) + s->nkeys*sizeof(struct tc_u32_key)); - tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (tcf_exts_init(&new->exts, TCA_U32_ACT, TCA_U32_POLICE)) { + kfree(new); + return NULL; + } return new; } static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, - unsigned long *arg, bool ovr) + struct nlattr **tca, unsigned long *arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -985,9 +984,12 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; n->flags = flags; - tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); n->tp = tp; + err = tcf_exts_init(&n->exts, TCA_U32_ACT, TCA_U32_POLICE); + if (err < 0) + goto errout; + #ifdef CONFIG_CLS_U32_MARK n->pcpu_success = alloc_percpu(u32); if (!n->pcpu_success) { @@ -1028,9 +1030,10 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, errhw: #ifdef CONFIG_CLS_U32_MARK free_percpu(n->pcpu_success); -errout: #endif +errout: + tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF free_percpu(n->pf); #endif @@ -1079,7 +1082,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, - struct sk_buff *skb, struct tcmsg *t) + struct sk_buff *skb, struct tcmsg *t) { struct tc_u_knode *n = (struct tc_u_knode *)fh; struct tc_u_hnode *ht_up, *ht_down; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 12ebde845523..206dc24add3a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include @@ -259,37 +260,40 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) { struct Qdisc *q; + if (!qdisc_dev(root)) + return (root->handle == handle ? root : NULL); + if (!(root->flags & TCQ_F_BUILTIN) && root->handle == handle) return root; - list_for_each_entry_rcu(q, &root->list, list) { + hash_for_each_possible_rcu(qdisc_dev(root)->qdisc_hash, q, hash, handle) { if (q->handle == handle) return q; } return NULL; } -void qdisc_list_add(struct Qdisc *q) +void qdisc_hash_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { struct Qdisc *root = qdisc_dev(q)->qdisc; WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); - list_add_tail_rcu(&q->list, &root->list); + hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); } } -EXPORT_SYMBOL(qdisc_list_add); +EXPORT_SYMBOL(qdisc_hash_add); -void qdisc_list_del(struct Qdisc *q) +void qdisc_hash_del(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { ASSERT_RTNL(); - list_del_rcu(&q->list); + hash_del_rcu(&q->hash); } } -EXPORT_SYMBOL(qdisc_list_del); +EXPORT_SYMBOL(qdisc_hash_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { @@ -385,7 +389,8 @@ static __u8 __detect_linklayer(struct tc_ratespec *r, __u32 *rtab) static struct qdisc_rate_table *qdisc_rtab_list; -struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab) +struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, + struct nlattr *tab) { struct qdisc_rate_table *rtab; @@ -537,7 +542,8 @@ nla_put_failure: return -1; } -void __qdisc_calculate_pkt_len(struct sk_buff *skb, const struct qdisc_size_table *stab) +void __qdisc_calculate_pkt_len(struct sk_buff *skb, + const struct qdisc_size_table *stab) { int pkt_len, slot; @@ -884,10 +890,10 @@ static struct lock_class_key qdisc_rx_lock; Parameters are passed via opt. */ -static struct Qdisc * -qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - struct Qdisc *p, u32 parent, u32 handle, - struct nlattr **tca, int *errp) +static struct Qdisc *qdisc_create(struct net_device *dev, + struct netdev_queue *dev_queue, + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -998,7 +1004,7 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, goto err_out4; } - qdisc_list_add(sch); + qdisc_hash_add(sch); return sch; } @@ -1069,7 +1075,8 @@ struct check_loop_arg { int depth; }; -static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w); +static int check_loop_fn(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *w); static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth) { @@ -1431,10 +1438,11 @@ err_out: static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, - int *q_idx_p, int s_q_idx) + int *q_idx_p, int s_q_idx, bool recur) { int ret = 0, q_idx = *q_idx_p; struct Qdisc *q; + int b; if (!root) return 0; @@ -1445,18 +1453,30 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, } else { if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } - list_for_each_entry(q, &root->list, list) { + + /* If dumping singletons, there is no qdisc_dev(root) and the singleton + * itself has already been dumped. + * + * If we've already dumped the top-level (ingress) qdisc above and the global + * qdisc hashtable, we don't want to hit it again + */ + if (!qdisc_dev(root) || !recur) + goto out; + + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (q_idx < s_q_idx) { q_idx++; continue; } if (!tc_qdisc_dump_ignore(q) && tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) + cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWQDISC) <= 0) goto done; q_idx++; } @@ -1490,13 +1510,14 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx, + true) < 0) goto done; dev_queue = dev_ingress_queue(dev); if (dev_queue && tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, - &q_idx, s_q_idx) < 0) + &q_idx, s_q_idx, false) < 0) goto done; cont: @@ -1625,7 +1646,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n) if (cops->delete) err = cops->delete(q, cl); if (err == 0) - tclass_notify(net, skb, n, q, cl, RTM_DELTCLASS); + tclass_notify(net, skb, n, q, cl, + RTM_DELTCLASS); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1723,12 +1745,14 @@ struct qdisc_dump_args { struct netlink_callback *cb; }; -static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg) +static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, + struct qdisc_walker *arg) { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, - a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); + a->cb->nlh->nlmsg_seq, NLM_F_MULTI, + RTM_NEWTCLASS); } static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb, @@ -1765,6 +1789,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, int *t_p, int s_t) { struct Qdisc *q; + int b; if (!root) return 0; @@ -1772,7 +1797,10 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb, if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0) return -1; - list_for_each_entry(q, &root->list, list) { + if (!qdisc_dev(root)) + return 0; + + hash_for_each(qdisc_dev(root)->qdisc_hash, b, q, hash) { if (tc_dump_tclass_qdisc(q, skb, tcm, cb, t_p, s_t) < 0) return -1; } @@ -1957,10 +1985,12 @@ static int __init pktsched_init(void) rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, NULL); + rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, + NULL); rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, NULL); + rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, + NULL); return 0; } diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index 4002df3c7d9f..5bfa79ee657c 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -69,7 +69,7 @@ struct codel_sched_data { static struct sk_buff *dequeue_func(struct codel_vars *vars, void *ctx) { struct Qdisc *sch = ctx; - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); if (skb) sch->qstats.backlog -= qdisc_pkt_len(skb); @@ -172,7 +172,7 @@ static int codel_change(struct Qdisc *sch, struct nlattr *opt) qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index baeed6a78d28..1e37247656f8 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -31,7 +31,7 @@ static int bfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); return qdisc_drop(skb, sch, to_free); @@ -42,7 +42,7 @@ static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch, { unsigned int prev_backlog; - if (likely(skb_queue_len(&sch->q) < sch->limit)) + if (likely(sch->q.qlen < sch->limit)) return qdisc_enqueue_tail(skb, sch); prev_backlog = sch->qstats.backlog; diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index e5458b99e09c..18e752439f6f 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -86,6 +86,7 @@ struct fq_sched_data { struct rb_root delayed; /* for rate limited flows */ u64 time_next_delayed_flow; + unsigned long unthrottle_latency_ns; struct fq_flow internal; /* for non classified or high prio packets */ u32 quantum; @@ -94,6 +95,7 @@ struct fq_sched_data { u32 flow_max_rate; /* optional max rate per flow */ u32 flow_plimit; /* max packets per flow */ u32 orphan_mask; /* mask for orphaned skb */ + u32 low_rate_threshold; struct rb_root *fq_root; u8 rate_enable; u8 fq_trees_log; @@ -407,11 +409,19 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, static void fq_check_throttled(struct fq_sched_data *q, u64 now) { + unsigned long sample; struct rb_node *p; if (q->time_next_delayed_flow > now) return; + /* Update unthrottle latency EWMA. + * This is cheap and can help diagnosing timer/latency problems. + */ + sample = (unsigned long)(now - q->time_next_delayed_flow); + q->unthrottle_latency_ns -= q->unthrottle_latency_ns >> 3; + q->unthrottle_latency_ns += sample >> 3; + q->time_next_delayed_flow = ~0ULL; while ((p = rb_first(&q->delayed)) != NULL) { struct fq_flow *f = container_of(p, struct fq_flow, rate_node); @@ -433,7 +443,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch) struct fq_flow_head *head; struct sk_buff *skb; struct fq_flow *f; - u32 rate; + u32 rate, plen; skb = fq_dequeue_head(sch, &q->internal); if (skb) @@ -482,7 +492,7 @@ begin: prefetch(&skb->end); f->credit -= qdisc_pkt_len(skb); - if (f->credit > 0 || !q->rate_enable) + if (!q->rate_enable) goto out; /* Do not pace locally generated ack packets */ @@ -493,8 +503,15 @@ begin: if (skb->sk) rate = min(skb->sk->sk_pacing_rate, rate); + if (rate <= q->low_rate_threshold) { + f->credit = 0; + plen = qdisc_pkt_len(skb); + } else { + plen = max(qdisc_pkt_len(skb), q->quantum); + if (f->credit > 0) + goto out; + } if (rate != ~0U) { - u32 plen = max(qdisc_pkt_len(skb), q->quantum); u64 len = (u64)plen * NSEC_PER_SEC; if (likely(rate)) @@ -507,7 +524,12 @@ begin: len = NSEC_PER_SEC; q->stat_pkts_too_long++; } - + /* Account for schedule/timers drifts. + * f->time_next_packet was set when prior packet was sent, + * and current time (@now) can be too late by tens of us. + */ + if (f->time_next_packet) + len -= min(len/2, now - f->time_next_packet); f->time_next_packet = now + len; } out: @@ -662,6 +684,7 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = { [TCA_FQ_FLOW_MAX_RATE] = { .type = NLA_U32 }, [TCA_FQ_BUCKETS_LOG] = { .type = NLA_U32 }, [TCA_FQ_FLOW_REFILL_DELAY] = { .type = NLA_U32 }, + [TCA_FQ_LOW_RATE_THRESHOLD] = { .type = NLA_U32 }, }; static int fq_change(struct Qdisc *sch, struct nlattr *opt) @@ -716,6 +739,10 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt) if (tb[TCA_FQ_FLOW_MAX_RATE]) q->flow_max_rate = nla_get_u32(tb[TCA_FQ_FLOW_MAX_RATE]); + if (tb[TCA_FQ_LOW_RATE_THRESHOLD]) + q->low_rate_threshold = + nla_get_u32(tb[TCA_FQ_LOW_RATE_THRESHOLD]); + if (tb[TCA_FQ_RATE_ENABLE]) { u32 enable = nla_get_u32(tb[TCA_FQ_RATE_ENABLE]); @@ -774,6 +801,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->initial_quantum = 10 * psched_mtu(qdisc_dev(sch)); q->flow_refill_delay = msecs_to_jiffies(40); q->flow_max_rate = ~0U; + q->time_next_delayed_flow = ~0ULL; q->rate_enable = 1; q->new_flows.first = NULL; q->old_flows.first = NULL; @@ -781,6 +809,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt) q->fq_root = NULL; q->fq_trees_log = ilog2(1024); q->orphan_mask = 1024 - 1; + q->low_rate_threshold = 550000 / 8; qdisc_watchdog_init(&q->watchdog, sch); if (opt) @@ -811,6 +840,8 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY, jiffies_to_usecs(q->flow_refill_delay)) || nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) || + nla_put_u32(skb, TCA_FQ_LOW_RATE_THRESHOLD, + q->low_rate_threshold) || nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log)) goto nla_put_failure; @@ -823,20 +854,24 @@ nla_put_failure: static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct fq_sched_data *q = qdisc_priv(sch); - u64 now = ktime_get_ns(); - struct tc_fq_qd_stats st = { - .gc_flows = q->stat_gc_flows, - .highprio_packets = q->stat_internal_packets, - .tcp_retrans = q->stat_tcp_retrans, - .throttled = q->stat_throttled, - .flows_plimit = q->stat_flows_plimit, - .pkts_too_long = q->stat_pkts_too_long, - .allocation_errors = q->stat_allocation_errors, - .flows = q->flows, - .inactive_flows = q->inactive_flows, - .throttled_flows = q->throttled_flows, - .time_next_delayed_flow = q->time_next_delayed_flow - now, - }; + struct tc_fq_qd_stats st; + + sch_tree_lock(sch); + + st.gc_flows = q->stat_gc_flows; + st.highprio_packets = q->stat_internal_packets; + st.tcp_retrans = q->stat_tcp_retrans; + st.throttled = q->stat_throttled; + st.flows_plimit = q->stat_flows_plimit; + st.pkts_too_long = q->stat_pkts_too_long; + st.allocation_errors = q->stat_allocation_errors; + st.time_next_delayed_flow = q->time_next_delayed_flow - ktime_get_ns(); + st.flows = q->flows; + st.inactive_flows = q->inactive_flows; + st.throttled_flows = q->throttled_flows; + st.unthrottle_latency_ns = min_t(unsigned long, + q->unthrottle_latency_ns, ~0U); + sch_tree_unlock(sch); return gnet_stats_copy_app(d, &st, sizeof(st)); } diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 657c13362b19..6cfb6e9038c2 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -423,7 +423,6 @@ struct Qdisc noop_qdisc = { .dequeue = noop_dequeue, .flags = TCQ_F_BUILTIN, .ops = &noop_qdisc_ops, - .list = LIST_HEAD_INIT(noop_qdisc.list), .q.lock = __SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock), .dev_queue = &noop_netdev_queue, .running = SEQCNT_ZERO(noop_qdisc.running), @@ -467,7 +466,7 @@ static const u8 prio2band[TC_PRIO_MAX + 1] = { */ struct pfifo_fast_priv { u32 bitmap; - struct sk_buff_head q[PFIFO_FAST_BANDS]; + struct qdisc_skb_head q[PFIFO_FAST_BANDS]; }; /* @@ -478,7 +477,7 @@ struct pfifo_fast_priv { */ static const int bitmap2band[] = {-1, 0, 1, 0, 2, 0, 1, 0}; -static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, +static inline struct qdisc_skb_head *band2list(struct pfifo_fast_priv *priv, int band) { return priv->q + band; @@ -487,10 +486,10 @@ static inline struct sk_buff_head *band2list(struct pfifo_fast_priv *priv, static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, struct sk_buff **to_free) { - if (skb_queue_len(&qdisc->q) < qdisc_dev(qdisc)->tx_queue_len) { + if (qdisc->q.qlen < qdisc_dev(qdisc)->tx_queue_len) { int band = prio2band[skb->priority & TC_PRIO_MAX]; struct pfifo_fast_priv *priv = qdisc_priv(qdisc); - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *list = band2list(priv, band); priv->bitmap |= (1 << band); qdisc->q.qlen++; @@ -506,11 +505,16 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (likely(band >= 0)) { - struct sk_buff_head *list = band2list(priv, band); - struct sk_buff *skb = __qdisc_dequeue_head(qdisc, list); + struct qdisc_skb_head *qh = band2list(priv, band); + struct sk_buff *skb = __qdisc_dequeue_head(qh); + + if (likely(skb != NULL)) { + qdisc_qstats_backlog_dec(qdisc, skb); + qdisc_bstats_update(qdisc, skb); + } qdisc->q.qlen--; - if (skb_queue_empty(list)) + if (qh->qlen == 0) priv->bitmap &= ~(1 << band); return skb; @@ -525,9 +529,9 @@ static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc) int band = bitmap2band[priv->bitmap]; if (band >= 0) { - struct sk_buff_head *list = band2list(priv, band); + struct qdisc_skb_head *qh = band2list(priv, band); - return skb_peek(list); + return qh->head; } return NULL; @@ -565,7 +569,7 @@ static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt) struct pfifo_fast_priv *priv = qdisc_priv(qdisc); for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) - __skb_queue_head_init(band2list(priv, prio)); + qdisc_skb_head_init(band2list(priv, prio)); /* Can by-pass the queue discipline */ qdisc->flags |= TCQ_F_CAN_BYPASS; @@ -613,8 +617,8 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p); sch->padded = (char *) sch - (char *) p; } - INIT_LIST_HEAD(&sch->list); - skb_queue_head_init(&sch->q); + qdisc_skb_head_init(&sch->q); + spin_lock_init(&sch->q.lock); spin_lock_init(&sch->busylock); lockdep_set_class(&sch->busylock, @@ -701,7 +705,7 @@ void qdisc_destroy(struct Qdisc *qdisc) return; #ifdef CONFIG_NET_SCHED - qdisc_list_del(qdisc); + qdisc_hash_del(qdisc); qdisc_put_stab(rtnl_dereference(qdisc->stab)); #endif @@ -789,6 +793,10 @@ static void attach_default_qdiscs(struct net_device *dev) qdisc->ops->attach(qdisc); } } +#ifdef CONFIG_NET_SCHED + if (dev->qdisc) + qdisc_hash_add(dev->qdisc); +#endif } static void transition_one_qdisc(struct net_device *dev, diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 3ddc7bd74ecb..000f1d36128e 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -142,8 +142,6 @@ struct hfsc_class { link-sharing, max(myf, cfmin) */ u64 cl_myf; /* my fit-time (calculated from this class's own upperlimit curve) */ - u64 cl_myfadj; /* my fit-time adjustment (to cancel - history dependence) */ u64 cl_cfmin; /* earliest children's fit-time (used with cl_myf to obtain cl_f) */ u64 cl_cvtmin; /* minimal virtual time among the @@ -151,11 +149,8 @@ struct hfsc_class { (monotonic within a period) */ u64 cl_vtadj; /* intra-period cumulative vt adjustment */ - u64 cl_vtoff; /* inter-period cumulative vt offset */ - u64 cl_cvtmax; /* max child's vt in the last period */ - u64 cl_cvtoff; /* cumulative cvtmax of all periods */ - u64 cl_pcvtoff; /* parent's cvtoff at initialization - time */ + u64 cl_cvtoff; /* largest virtual time seen among + the children */ struct internal_sc cl_rsc; /* internal real-time service curve */ struct internal_sc cl_fsc; /* internal fair service curve */ @@ -701,28 +696,16 @@ init_vf(struct hfsc_class *cl, unsigned int len) } else { /* * first child for a new parent backlog period. - * add parent's cvtmax to cvtoff to make a new - * vt (vtoff + vt) larger than the vt in the - * last period for all children. + * initialize cl_vt to the highest value seen + * among the siblings. this is analogous to + * what cur_time would provide in realtime case. */ - vt = cl->cl_parent->cl_cvtmax; - cl->cl_parent->cl_cvtoff += vt; - cl->cl_parent->cl_cvtmax = 0; + cl->cl_vt = cl->cl_parent->cl_cvtoff; cl->cl_parent->cl_cvtmin = 0; - cl->cl_vt = 0; } - cl->cl_vtoff = cl->cl_parent->cl_cvtoff - - cl->cl_pcvtoff; - /* update the virtual curve */ - vt = cl->cl_vt + cl->cl_vtoff; - rtsc_min(&cl->cl_virtual, &cl->cl_fsc, vt, - cl->cl_total); - if (cl->cl_virtual.x == vt) { - cl->cl_virtual.x -= cl->cl_vtoff; - cl->cl_vtoff = 0; - } + rtsc_min(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_vtadj = 0; cl->cl_vtperiod++; /* increment vt period */ @@ -745,7 +728,6 @@ init_vf(struct hfsc_class *cl, unsigned int len) /* compute myf */ cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); - cl->cl_myfadj = 0; } } @@ -779,8 +761,7 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) go_passive = 0; /* update vt */ - cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) - - cl->cl_vtoff + cl->cl_vtadj; + cl->cl_vt = rtsc_y2x(&cl->cl_virtual, cl->cl_total) + cl->cl_vtadj; /* * if vt of the class is smaller than cvtmin, @@ -795,9 +776,9 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) if (go_passive) { /* no more active child, going passive */ - /* update cvtmax of the parent class */ - if (cl->cl_vt > cl->cl_parent->cl_cvtmax) - cl->cl_parent->cl_cvtmax = cl->cl_vt; + /* update cvtoff of the parent class */ + if (cl->cl_vt > cl->cl_parent->cl_cvtoff) + cl->cl_parent->cl_cvtoff = cl->cl_vt; /* remove this class from the vt tree */ vttree_remove(cl); @@ -813,9 +794,10 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) /* update f */ if (cl->cl_flags & HFSC_USC) { + cl->cl_myf = rtsc_y2x(&cl->cl_ulimit, cl->cl_total); +#if 0 cl->cl_myf = cl->cl_myfadj + rtsc_y2x(&cl->cl_ulimit, cl->cl_total); -#if 0 /* * This code causes classes to stay way under their * limit when multiple classes are used at gigabit @@ -940,7 +922,7 @@ static void hfsc_change_fsc(struct hfsc_class *cl, struct tc_service_curve *fsc) { sc2isc(fsc, &cl->cl_fsc); - rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vtoff + cl->cl_vt, cl->cl_total); + rtsc_init(&cl->cl_virtual, &cl->cl_fsc, cl->cl_vt, cl->cl_total); cl->cl_flags |= HFSC_FSC; } @@ -1094,7 +1076,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (parent->level == 0) hfsc_purge_queue(sch, parent); hfsc_adjust_levels(parent); - cl->cl_pcvtoff = parent->cl_cvtoff; sch_tree_unlock(sch); qdisc_class_hash_grow(sch, &q->clhash); @@ -1482,16 +1463,12 @@ hfsc_reset_class(struct hfsc_class *cl) cl->cl_e = 0; cl->cl_vt = 0; cl->cl_vtadj = 0; - cl->cl_vtoff = 0; cl->cl_cvtmin = 0; - cl->cl_cvtmax = 0; cl->cl_cvtoff = 0; - cl->cl_pcvtoff = 0; cl->cl_vtperiod = 0; cl->cl_parentperiod = 0; cl->cl_f = 0; cl->cl_myf = 0; - cl->cl_myfadj = 0; cl->cl_cfmin = 0; cl->cl_nactive = 0; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 53dbfa187870..c798d0de8a9d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -162,7 +162,7 @@ struct htb_sched { struct work_struct work; /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; + struct qdisc_skb_head direct_queue; long direct_pkts; struct qdisc_watchdog watchdog; @@ -570,6 +570,22 @@ static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) list_del_init(&cl->un.leaf.drop_list); } +static void htb_enqueue_tail(struct sk_buff *skb, struct Qdisc *sch, + struct qdisc_skb_head *qh) +{ + struct sk_buff *last = qh->tail; + + if (last) { + skb->next = NULL; + last->next = skb; + qh->tail = skb; + } else { + qh->tail = skb; + qh->head = skb; + } + qh->qlen++; +} + static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -580,7 +596,7 @@ static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == HTB_DIRECT) { /* enqueue to helper queue */ if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); + htb_enqueue_tail(skb, sch, &q->direct_queue); q->direct_pkts++; } else { return qdisc_drop(skb, sch, to_free); @@ -888,7 +904,7 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) unsigned long start_at; /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); + skb = __qdisc_dequeue_head(&q->direct_queue); if (skb != NULL) { ok: qdisc_bstats_update(sch, skb); @@ -1019,7 +1035,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); INIT_WORK(&q->work, htb_work_func); - __skb_queue_head_init(&q->direct_queue); + qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) q->direct_qlen = nla_get_u32(tb[TCA_HTB_DIRECT_QLEN]); diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index b9439827c172..2bc8d7f8df16 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -88,7 +88,7 @@ static void mq_attach(struct Qdisc *sch) qdisc_destroy(old); #ifdef CONFIG_NET_SCHED if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); #endif } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index 549c66359924..b5c502c78143 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -182,7 +182,7 @@ static void mqprio_attach(struct Qdisc *sch) if (old) qdisc_destroy(old); if (ntx < dev->real_num_tx_queues) - qdisc_list_add(qdisc); + qdisc_hash_add(qdisc); } kfree(priv->qdiscs); priv->qdiscs = NULL; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index aaaf02175338..9f7b380cf0a3 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -413,6 +413,16 @@ static struct sk_buff *netem_segment(struct sk_buff *skb, struct Qdisc *sch, return segs; } +static void netem_enqueue_skb_head(struct qdisc_skb_head *qh, struct sk_buff *skb) +{ + skb->next = qh->head; + + if (!qh->head) + qh->tail = skb; + qh->head = skb; + qh->qlen++; +} + /* * Insert one skb into qdisc. * Note: parent depends on return value to account for queue length. @@ -502,7 +512,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, 1<<(prandom_u32() % 8); } - if (unlikely(skb_queue_len(&sch->q) >= sch->limit)) + if (unlikely(sch->q.qlen >= sch->limit)) return qdisc_drop(skb, sch, to_free); qdisc_qstats_backlog_inc(sch, skb); @@ -522,8 +532,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->rate) { struct sk_buff *last; - if (!skb_queue_empty(&sch->q)) - last = skb_peek_tail(&sch->q); + if (sch->q.qlen) + last = sch->q.tail; else last = netem_rb_to_skb(rb_last(&q->t_root)); if (last) { @@ -552,7 +562,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, cb->time_to_send = psched_get_time(); q->counter = 0; - __skb_queue_head(&sch->q, skb); + netem_enqueue_skb_head(&sch->q, skb); sch->qstats.requeues++; } @@ -587,7 +597,7 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) struct rb_node *p; tfifo_dequeue: - skb = __skb_dequeue(&sch->q); + skb = __qdisc_dequeue_head(&sch->q); if (skb) { qdisc_qstats_backlog_dec(sch, skb); deliver: diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index a570b0bb254c..5c3a99d6aa82 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -231,7 +231,7 @@ static int pie_change(struct Qdisc *sch, struct nlattr *opt) /* Drop excess packets if new limit is lower */ qlen = sch->q.qlen; while (sch->q.qlen > sch->limit) { - struct sk_buff *skb = __skb_dequeue(&sch->q); + struct sk_buff *skb = __qdisc_dequeue_head(&sch->q); dropped += qdisc_pkt_len(skb); qdisc_qstats_backlog_dec(sch, skb); @@ -511,7 +511,7 @@ static int pie_dump_stats(struct Qdisc *sch, struct gnet_dump *d) static struct sk_buff *pie_qdisc_dequeue(struct Qdisc *sch) { struct sk_buff *skb; - skb = __qdisc_dequeue_head(sch, &sch->q); + skb = qdisc_dequeue_head(sch); if (!skb) return NULL; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 1c23060c41a6..f10d3397f917 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -1408,7 +1408,7 @@ void sctp_assoc_sync_pmtu(struct sock *sk, struct sctp_association *asoc) transports) { if (t->pmtu_pending && t->dst) { sctp_transport_update_pmtu(sk, t, - WORD_TRUNC(dst_mtu(t->dst))); + SCTP_TRUNC4(dst_mtu(t->dst))); t->pmtu_pending = 0; } if (!pmtu || (t->pathmtu < pmtu)) diff --git a/net/sctp/auth.c b/net/sctp/auth.c index 912eb1685a5d..f99d4855d3de 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -48,7 +48,7 @@ static struct sctp_hmac sctp_hmac_list[SCTP_AUTH_NUM_HMACS] = { /* id 2 is reserved as well */ .hmac_id = SCTP_AUTH_HMAC_ID_RESERVED_2, }, -#if defined (CONFIG_CRYPTO_SHA256) || defined (CONFIG_CRYPTO_SHA256_MODULE) +#if IS_ENABLED(CONFIG_CRYPTO_SHA256) { .hmac_id = SCTP_AUTH_HMAC_ID_SHA256, .hmac_name = "hmac(sha256)", diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 0a3dbec0a8fb..7a1cdf43e49d 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -70,6 +70,19 @@ static struct sctp_datamsg *sctp_datamsg_new(gfp_t gfp) return msg; } +void sctp_datamsg_free(struct sctp_datamsg *msg) +{ + struct sctp_chunk *chunk; + + /* This doesn't have to be a _safe vairant because + * sctp_chunk_free() only drops the refs. + */ + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_chunk_free(chunk); + + sctp_datamsg_put(msg); +} + /* Final destructruction of datamsg memory. */ static void sctp_datamsg_destroy(struct sctp_datamsg *msg) { @@ -187,9 +200,10 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, /* This is the biggest possible DATA chunk that can fit into * the packet */ - max_data = (asoc->pathmtu - - sctp_sk(asoc->base.sk)->pf->af->net_header_len - - sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk)) & ~3; + max_data = asoc->pathmtu - + sctp_sk(asoc->base.sk)->pf->af->net_header_len - + sizeof(struct sctphdr) - sizeof(struct sctp_data_chunk); + max_data = SCTP_TRUNC4(max_data); max = asoc->frag_point; /* If the the peer requested that we authenticate DATA chunks @@ -200,8 +214,8 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= WORD_ROUND(sizeof(sctp_auth_chunk_t) + - hmac_desc->hmac_len); + max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + hmac_desc->hmac_len); } /* Now, check if we need to reduce our max */ @@ -221,7 +235,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max) - max_data -= WORD_ROUND(sizeof(sctp_sack_chunk_t)); + max_data -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) diff --git a/net/sctp/input.c b/net/sctp/input.c index 1555fb8c68e0..a2ea1d1cc06a 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -605,7 +605,7 @@ void sctp_v4_err(struct sk_buff *skb, __u32 info) /* PMTU discovery (RFC1191) */ if (ICMP_FRAG_NEEDED == code) { sctp_icmp_frag_needed(sk, asoc, transport, - WORD_TRUNC(info)); + SCTP_TRUNC4(info)); goto out_unlock; } else { if (ICMP_PROT_UNREACH == code) { @@ -673,7 +673,7 @@ static int sctp_rcv_ootb(struct sk_buff *skb) if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = offset + WORD_ROUND(ntohs(ch->length)); + ch_end = offset + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb->len) break; @@ -1128,7 +1128,7 @@ static struct sctp_association *__sctp_rcv_walk_lookup(struct net *net, if (ntohs(ch->length) < sizeof(sctp_chunkhdr_t)) break; - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) break; @@ -1197,7 +1197,7 @@ static struct sctp_association *__sctp_rcv_lookup_harder(struct net *net, * that the chunk length doesn't cause overflow. Otherwise, we'll * walk off the end. */ - if (WORD_ROUND(ntohs(ch->length)) > skb->len) + if (SCTP_PAD4(ntohs(ch->length)) > skb->len) return NULL; /* If this is INIT/INIT-ACK look inside the chunk too. */ diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index 6437aa97cfd7..f731de3e8428 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -213,7 +213,7 @@ new_skb: } chunk->chunk_hdr = ch; - chunk->chunk_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + chunk->chunk_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); skb_pull(chunk->skb, sizeof(sctp_chunkhdr_t)); chunk->subh.v = NULL; /* Subheader is no longer valid. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index 31b7bc35895d..2a5c1896d18f 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -180,7 +180,6 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, int one_packet, gfp_t gfp) { sctp_xmit_t retval; - int error = 0; pr_debug("%s: packet:%p size:%Zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); @@ -188,6 +187,8 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, switch ((retval = (sctp_packet_append_chunk(packet, chunk)))) { case SCTP_XMIT_PMTU_FULL: if (!packet->has_cookie_echo) { + int error = 0; + error = sctp_packet_transmit(packet, gfp); if (error < 0) chunk->skb->sk->sk_err = -error; @@ -296,7 +297,7 @@ static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, struct sctp_chunk *chunk) { sctp_xmit_t retval = SCTP_XMIT_OK; - __u16 chunk_len = WORD_ROUND(ntohs(chunk->chunk_hdr->length)); + __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -441,14 +442,14 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * time. Application may notice this error. */ pr_err_once("Trying to GSO but underlying device doesn't support it."); - goto nomem; + goto err; } } else { pkt_size = packet->size; } head = alloc_skb(pkt_size + MAX_HEADER, gfp); if (!head) - goto nomem; + goto err; if (gso) { NAPI_GRO_CB(head)->last = head; skb_shinfo(head)->gso_type = sk->sk_gso_type; @@ -469,8 +470,12 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } } dst = dst_clone(tp->dst); - if (!dst) - goto no_route; + if (!dst) { + if (asoc) + IP_INC_STATS(sock_net(asoc->base.sk), + IPSTATS_MIB_OUTNOROUTES); + goto nodst; + } skb_dst_set(head, dst); /* Build the SCTP header. */ @@ -503,7 +508,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (gso) { pkt_size = packet->overhead; list_for_each_entry(chunk, &packet->chunk_list, list) { - int padded = WORD_ROUND(chunk->skb->len); + int padded = SCTP_PAD4(chunk->skb->len); if (pkt_size + padded > tp->pathmtu) break; @@ -533,7 +538,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * included in the chunk length field. The sender should * never pad with more than 3 bytes. * - * [This whole comment explains WORD_ROUND() below.] + * [This whole comment explains SCTP_PAD4() below.] */ pkt_size -= packet->overhead; @@ -555,7 +560,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) has_data = 1; } - padding = WORD_ROUND(chunk->skb->len) - chunk->skb->len; + padding = SCTP_PAD4(chunk->skb->len) - chunk->skb->len; if (padding) memset(skb_put(chunk->skb, padding), 0, padding); @@ -582,7 +587,7 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) * acknowledged or have failed. * Re-queue auth chunks if needed. */ - pkt_size -= WORD_ROUND(chunk->skb->len); + pkt_size -= SCTP_PAD4(chunk->skb->len); if (!sctp_chunk_is_data(chunk) && chunk != packet->auth) sctp_chunk_free(chunk); @@ -621,8 +626,10 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) if (!gso) break; - if (skb_gro_receive(&head, nskb)) + if (skb_gro_receive(&head, nskb)) { + kfree_skb(nskb); goto nomem; + } nskb = NULL; if (WARN_ON_ONCE(skb_shinfo(head)->gso_segs >= sk->sk_gso_max_segs)) @@ -716,18 +723,13 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp) } head->ignore_df = packet->ipfragok; tp->af_specific->sctp_xmit(head, tp); + goto out; -out: - sctp_packet_reset(packet); - return err; -no_route: - kfree_skb(head); - if (nskb != head) - kfree_skb(nskb); - - if (asoc) - IP_INC_STATS(sock_net(asoc->base.sk), IPSTATS_MIB_OUTNOROUTES); +nomem: + if (packet->auth && list_empty(&packet->auth->list)) + sctp_chunk_free(packet->auth); +nodst: /* FIXME: Returning the 'err' will effect all the associations * associated with a socket, although only one of the paths of the * association is unreachable. @@ -736,22 +738,18 @@ no_route: * required. */ /* err = -EHOSTUNREACH; */ -err: - /* Control chunks are unreliable so just drop them. DATA chunks - * will get resent or dropped later. - */ + kfree_skb(head); +err: list_for_each_entry_safe(chunk, tmp, &packet->chunk_list, list) { list_del_init(&chunk->list); if (!sctp_chunk_is_data(chunk)) sctp_chunk_free(chunk); } - goto out; -nomem: - if (packet->auth && list_empty(&packet->auth->list)) - sctp_chunk_free(packet->auth); - err = -ENOMEM; - goto err; + +out: + sctp_packet_reset(packet); + return err; } /******************************************************************** @@ -913,7 +911,7 @@ static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, */ maxsize = pmtu - packet->overhead; if (packet->auth) - maxsize -= WORD_ROUND(packet->auth->skb->len); + maxsize -= SCTP_PAD4(packet->auth->skb->len); if (chunk_len > maxsize) retval = SCTP_XMIT_PMTU_FULL; diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 107233da5cc9..582585393d35 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -68,7 +68,7 @@ static void sctp_mark_missing(struct sctp_outq *q, static void sctp_generate_fwdtsn(struct sctp_outq *q, __u32 sack_ctsn); -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, @@ -285,10 +285,9 @@ void sctp_outq_free(struct sctp_outq *q) } /* Put a new chunk in an sctp_outq. */ -int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) +void sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; pr_debug("%s: outq:%p, chunk:%p[%s]\n", __func__, q, chunk, chunk && chunk->chunk_hdr ? @@ -299,54 +298,26 @@ int sctp_outq_tail(struct sctp_outq *q, struct sctp_chunk *chunk, gfp_t gfp) * immediately. */ if (sctp_chunk_is_data(chunk)) { - /* Is it OK to queue data chunks? */ - /* From 9. Termination of Association - * - * When either endpoint performs a shutdown, the - * association on each peer will stop accepting new - * data from its user and only deliver data in queue - * at the time of sending or receiving the SHUTDOWN - * chunk. - */ - switch (q->asoc->state) { - case SCTP_STATE_CLOSED: - case SCTP_STATE_SHUTDOWN_PENDING: - case SCTP_STATE_SHUTDOWN_SENT: - case SCTP_STATE_SHUTDOWN_RECEIVED: - case SCTP_STATE_SHUTDOWN_ACK_SENT: - /* Cannot send after transport endpoint shutdown */ - error = -ESHUTDOWN; - break; + pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", + __func__, q, chunk, chunk && chunk->chunk_hdr ? + sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : + "illegal chunk"); - default: - pr_debug("%s: outqueueing: outq:%p, chunk:%p[%s])\n", - __func__, q, chunk, chunk && chunk->chunk_hdr ? - sctp_cname(SCTP_ST_CHUNK(chunk->chunk_hdr->type)) : - "illegal chunk"); - - sctp_chunk_hold(chunk); - sctp_outq_tail_data(q, chunk); - if (chunk->asoc->peer.prsctp_capable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - chunk->asoc->sent_cnt_removable++; - if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) - SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); - else - SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); - break; - } + sctp_outq_tail_data(q, chunk); + if (chunk->asoc->peer.prsctp_capable && + SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) + chunk->asoc->sent_cnt_removable++; + if (chunk->chunk_hdr->flags & SCTP_DATA_UNORDERED) + SCTP_INC_STATS(net, SCTP_MIB_OUTUNORDERCHUNKS); + else + SCTP_INC_STATS(net, SCTP_MIB_OUTORDERCHUNKS); } else { list_add_tail(&chunk->list, &q->control_chunk_list); SCTP_INC_STATS(net, SCTP_MIB_OUTCTRLCHUNKS); } - if (error < 0) - return error; - if (!q->cork) - error = sctp_outq_flush(q, 0, gfp); - - return error; + sctp_outq_flush(q, 0, gfp); } /* Insert a chunk into the sorted list based on the TSNs. The retransmit list @@ -559,7 +530,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_retransmit_reason_t reason) { struct net *net = sock_net(q->asoc->base.sk); - int error = 0; switch (reason) { case SCTP_RTXR_T3_RTX: @@ -603,10 +573,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, * will be flushed at the end. */ if (reason != SCTP_RTXR_FAST_RTX) - error = sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); - - if (error) - q->asoc->base.sk->sk_err = -error; + sctp_outq_flush(q, /* rtx_timeout */ 1, GFP_ATOMIC); } /* @@ -778,12 +745,12 @@ redo: } /* Cork the outqueue so queued chunks are really queued. */ -int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) +void sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) { if (q->cork) q->cork = 0; - return sctp_outq_flush(q, 0, gfp); + sctp_outq_flush(q, 0, gfp); } @@ -796,7 +763,7 @@ int sctp_outq_uncork(struct sctp_outq *q, gfp_t gfp) * locking concerns must be made. Today we use the sock lock to protect * this function. */ -static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) +static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) { struct sctp_packet *packet; struct sctp_packet singleton; @@ -919,8 +886,10 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) sctp_packet_config(&singleton, vtag, 0); sctp_packet_append_chunk(&singleton, chunk); error = sctp_packet_transmit(&singleton, gfp); - if (error < 0) - return error; + if (error < 0) { + asoc->base.sk->sk_err = -error; + return; + } break; case SCTP_CID_ABORT: @@ -1018,6 +987,8 @@ static int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) retran: error = sctp_outq_flush_rtx(q, packet, rtx_timeout, &start_timer); + if (error < 0) + asoc->base.sk->sk_err = -error; if (start_timer) { sctp_transport_reset_t3_rtx(transport); @@ -1192,14 +1163,15 @@ sctp_flush_out: struct sctp_transport, send_ready); packet = &t->packet; - if (!sctp_packet_empty(packet)) + if (!sctp_packet_empty(packet)) { error = sctp_packet_transmit(packet, gfp); + if (error < 0) + asoc->base.sk->sk_err = -error; + } /* Clear the burst limited state, if any */ sctp_transport_burst_reset(t); } - - return error; } /* Update unack_data based on the incoming SACK chunk */ @@ -1747,7 +1719,7 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { int i; sctp_sack_variable_t *frags; - __u16 gap; + __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); if (TSN_lte(tsn, ctsn)) @@ -1766,10 +1738,11 @@ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) */ frags = sack->variable; - gap = tsn - ctsn; - for (i = 0; i < ntohs(sack->num_gap_ack_blocks); ++i) { - if (TSN_lte(ntohs(frags[i].gab.start), gap) && - TSN_lte(gap, ntohs(frags[i].gab.end))) + blocks = ntohs(sack->num_gap_ack_blocks); + tsn_offset = tsn - ctsn; + for (i = 0; i < blocks; ++i) { + if (tsn_offset >= ntohs(frags[i].gab.start) && + tsn_offset <= ntohs(frags[i].gab.end)) goto pass; } diff --git a/net/sctp/proc.c b/net/sctp/proc.c index ef8ba77a5bea..206377fe91ec 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -73,13 +73,17 @@ static const struct snmp_mib sctp_snmp_list[] = { /* Display sctp snmp mib statistics(/proc/net/sctp/snmp). */ static int sctp_snmp_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[SCTP_MIB_MAX]; struct net *net = seq->private; int i; - for (i = 0; sctp_snmp_list[i].name != NULL; i++) + memset(buff, 0, sizeof(unsigned long) * SCTP_MIB_MAX); + + snmp_get_cpu_field_batch(buff, sctp_snmp_list, + net->sctp.sctp_statistics); + for (i = 0; sctp_snmp_list[i].name; i++) seq_printf(seq, "%-32s\t%ld\n", sctp_snmp_list[i].name, - snmp_fold_field(net->sctp.sctp_statistics, - sctp_snmp_list[i].entry)); + buff[i]); return 0; } diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index cef0cee182d4..048954eee984 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -106,7 +106,8 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, const struct inet_diag_req_v2 *req, struct user_namespace *user_ns, int portid, u32 seq, u16 nlmsg_flags, - const struct nlmsghdr *unlh) + const struct nlmsghdr *unlh, + bool net_admin) { struct sctp_endpoint *ep = sctp_sk(sk)->ep; struct list_head *addr_list; @@ -133,7 +134,7 @@ static int inet_sctp_diag_fill(struct sock *sk, struct sctp_association *asoc, r->idiag_retrans = 0; } - if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns)) + if (inet_diag_msg_attrs_fill(sk, skb, r, ext, user_ns, net_admin)) goto errout; if (ext & (1 << (INET_DIAG_SKMEMINFO - 1))) { @@ -203,6 +204,7 @@ struct sctp_comm_param { struct netlink_callback *cb; const struct inet_diag_req_v2 *r; const struct nlmsghdr *nlh; + bool net_admin; }; static size_t inet_assoc_attr_size(struct sctp_association *asoc) @@ -219,6 +221,7 @@ static size_t inet_assoc_attr_size(struct sctp_association *asoc) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(4) /* INET_DIAG_MARK */ + nla_total_size(addrlen * asoc->peer.transport_count) + nla_total_size(addrlen * addrcnt) + nla_total_size(sizeof(struct inet_diag_meminfo)) @@ -256,7 +259,8 @@ static int sctp_tsp_dump_one(struct sctp_transport *tsp, void *p) err = inet_sctp_diag_fill(sk, assoc, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh); + nlh->nlmsg_seq, 0, nlh, + commp->net_admin); release_sock(sk); if (err < 0) { WARN_ON(err == -EMSGSIZE); @@ -299,7 +303,8 @@ static int sctp_sock_dump(struct sock *sk, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - NLM_F_MULTI, cb->nlh) < 0) { + NLM_F_MULTI, cb->nlh, + commp->net_admin) < 0) { cb->args[3] = 1; err = 1; goto release; @@ -309,7 +314,8 @@ static int sctp_sock_dump(struct sock *sk, void *p) if (inet_sctp_diag_fill(sk, assoc, skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, 0, cb->nlh) < 0) { + cb->nlh->nlmsg_seq, 0, cb->nlh, + commp->net_admin) < 0) { err = 1; goto release; } @@ -389,7 +395,7 @@ static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - cb->nlh) < 0) { + cb->nlh, commp->net_admin) < 0) { err = 2; goto out; } @@ -426,6 +432,7 @@ static int sctp_diag_dump_one(struct sk_buff *in_skb, .skb = in_skb, .r = req, .nlh = nlh, + .net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN), }; if (req->sdiag_family == AF_INET) { @@ -461,6 +468,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, .skb = skb, .cb = cb, .r = r, + .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; /* eps hashtable dumps diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 46ffecc57214..9e9690b7afe1 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -253,7 +253,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, num_types = sp->pf->supported_addrs(sp, types); chunksize = sizeof(init) + addrs_len; - chunksize += WORD_ROUND(SCTP_SAT_LEN(num_types)); + chunksize += SCTP_PAD4(SCTP_SAT_LEN(num_types)); chunksize += sizeof(ecap_param); if (asoc->prsctp_enable) @@ -283,14 +283,14 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* Add HMACS parameter length if any were defined */ auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; /* Add CHUNKS parameter length */ auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -300,8 +300,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -443,13 +443,13 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, auth_hmacs = (sctp_paramhdr_t *)asoc->c.auth_hmacs; if (auth_hmacs->length) - chunksize += WORD_ROUND(ntohs(auth_hmacs->length)); + chunksize += SCTP_PAD4(ntohs(auth_hmacs->length)); else auth_hmacs = NULL; auth_chunks = (sctp_paramhdr_t *)asoc->c.auth_chunks; if (auth_chunks->length) - chunksize += WORD_ROUND(ntohs(auth_chunks->length)); + chunksize += SCTP_PAD4(ntohs(auth_chunks->length)); else auth_chunks = NULL; @@ -458,8 +458,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += WORD_ROUND(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -1375,7 +1375,7 @@ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, struct sock *sk; /* No need to allocate LL here, as this is only a chunk. */ - skb = alloc_skb(WORD_ROUND(sizeof(sctp_chunkhdr_t) + paylen), gfp); + skb = alloc_skb(SCTP_PAD4(sizeof(sctp_chunkhdr_t) + paylen), gfp); if (!skb) goto nodata; @@ -1467,7 +1467,7 @@ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) void *target; void *padding; int chunklen = ntohs(chunk->chunk_hdr->length); - int padlen = WORD_ROUND(chunklen) - chunklen; + int padlen = SCTP_PAD4(chunklen) - chunklen; padding = skb_put(chunk->skb, padlen); target = skb_put(chunk->skb, len); @@ -1885,7 +1885,7 @@ static int sctp_process_missing_param(const struct sctp_association *asoc, struct __sctp_missing report; __u16 len; - len = WORD_ROUND(sizeof(report)); + len = SCTP_PAD4(sizeof(report)); /* Make an ERROR chunk, preparing enough room for * returning multiple unknown parameters. @@ -2083,9 +2083,9 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, if (*errp) { if (!sctp_init_cause_fixed(*errp, SCTP_ERROR_UNKNOWN_PARAM, - WORD_ROUND(ntohs(param.p->length)))) + SCTP_PAD4(ntohs(param.p->length)))) sctp_addto_chunk_fixed(*errp, - WORD_ROUND(ntohs(param.p->length)), + SCTP_PAD4(ntohs(param.p->length)), param.v); } else { /* If there is no memory for generating the ERROR diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 12d45193357c..c345bf153bed 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1020,19 +1020,13 @@ static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, * This way the whole message is queued up and bundling if * encouraged for small fragments. */ -static int sctp_cmd_send_msg(struct sctp_association *asoc, - struct sctp_datamsg *msg, gfp_t gfp) +static void sctp_cmd_send_msg(struct sctp_association *asoc, + struct sctp_datamsg *msg, gfp_t gfp) { struct sctp_chunk *chunk; - int error = 0; - list_for_each_entry(chunk, &msg->chunks, frag_list) { - error = sctp_outq_tail(&asoc->outqueue, chunk, gfp); - if (error) - break; - } - - return error; + list_for_each_entry(chunk, &msg->chunks, frag_list) + sctp_outq_tail(&asoc->outqueue, chunk, gfp); } @@ -1427,8 +1421,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, local_cork = 1; } /* Send a chunk to our peer. */ - error = sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, - gfp); + sctp_outq_tail(&asoc->outqueue, cmd->obj.chunk, gfp); break; case SCTP_CMD_SEND_PKT: @@ -1682,7 +1675,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_FORCE_PRIM_RETRAN: t = asoc->peer.retran_path; asoc->peer.retran_path = asoc->peer.primary_path; - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); local_cork = 0; asoc->peer.retran_path = t; break; @@ -1709,7 +1702,7 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_outq_cork(&asoc->outqueue); local_cork = 1; } - error = sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); + sctp_cmd_send_msg(asoc, cmd->obj.msg, gfp); break; case SCTP_CMD_SEND_NEXT_ASCONF: sctp_cmd_send_asconf(asoc); @@ -1739,9 +1732,9 @@ out: */ if (asoc && SCTP_EVENT_T_CHUNK == event_type && chunk) { if (chunk->end_of_packet || chunk->singleton) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); } else if (local_cork) - error = sctp_outq_uncork(&asoc->outqueue, gfp); + sctp_outq_uncork(&asoc->outqueue, gfp); if (sp->data_ready_signalled) sp->data_ready_signalled = 0; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d88bb2b0b699..026e3bca4a94 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -3454,7 +3454,7 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, } /* Report violation if chunk len overflows */ - ch_end = ((__u8 *)ch) + WORD_ROUND(ntohs(ch->length)); + ch_end = ((__u8 *)ch) + SCTP_PAD4(ntohs(ch->length)); if (ch_end > skb_tail_pointer(skb)) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -4185,7 +4185,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -4203,7 +4203,7 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, hdr = unk_chunk->chunk_hdr; err_chunk = sctp_make_op_error(asoc, unk_chunk, SCTP_ERROR_UNKNOWN_CHUNK, hdr, - WORD_ROUND(ntohs(hdr->length)), + SCTP_PAD4(ntohs(hdr->length)), 0); if (err_chunk) { sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 8ed2d99bde6d..fb02c7033307 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -1958,6 +1958,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) /* Now send the (possibly) fragmented message. */ list_for_each_entry(chunk, &datamsg->chunks, frag_list) { + sctp_chunk_hold(chunk); + /* Do accounting for the write space. */ sctp_set_owner_w(chunk); @@ -1970,13 +1972,15 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) * breaks. */ err = sctp_primitive_SEND(net, asoc, datamsg); - sctp_datamsg_put(datamsg); /* Did the lower layer accept the chunk? */ - if (err) + if (err) { + sctp_datamsg_free(datamsg); goto out_free; + } pr_debug("%s: we sent primitively\n", __func__); + sctp_datamsg_put(datamsg); err = msg_len; if (unlikely(wait_connect)) { diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 81b86678be4d..ce54dce13ddb 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -233,7 +233,7 @@ void sctp_transport_pmtu(struct sctp_transport *transport, struct sock *sk) } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); } else transport->pathmtu = SCTP_DEFAULT_MAXSEGMENT; } @@ -287,7 +287,7 @@ void sctp_transport_route(struct sctp_transport *transport, return; } if (transport->dst) { - transport->pathmtu = WORD_TRUNC(dst_mtu(transport->dst)); + transport->pathmtu = SCTP_TRUNC4(dst_mtu(transport->dst)); /* Initialize sk->sk_rcv_saddr, if the transport is the * association's active path for getsockname(). diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index d85b803da11d..bea00058ce35 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -383,7 +383,7 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, ch = (sctp_errhdr_t *)(chunk->skb->data); cause = ch->cause; - elen = WORD_ROUND(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); /* Pull off the ERROR header. */ skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); @@ -688,7 +688,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc, * MUST ignore the padding bytes. */ len = ntohs(chunk->chunk_hdr->length); - padding = WORD_ROUND(len) - len; + padding = SCTP_PAD4(len) - len; /* Fixup cloned skb with just this chunks data. */ skb_trim(skb, chunk->chunk_end - padding - skb->data); diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 877e55066f89..84d0fdaf7de9 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -140,11 +140,8 @@ int sctp_clear_pd(struct sock *sk, struct sctp_association *asoc) * we can go ahead and clear out the lobby in one shot */ if (!skb_queue_empty(&sp->pd_lobby)) { - struct list_head *list; skb_queue_splice_tail_init(&sp->pd_lobby, &sk->sk_receive_queue); - list = (struct list_head *)&sctp_sk(sk)->pd_lobby; - INIT_LIST_HEAD(list); return 1; } } else { diff --git a/net/strparser/Kconfig b/net/strparser/Kconfig new file mode 100644 index 000000000000..6cff3f6d0c3a --- /dev/null +++ b/net/strparser/Kconfig @@ -0,0 +1,4 @@ + +config STREAM_PARSER + tristate + default n diff --git a/net/strparser/Makefile b/net/strparser/Makefile new file mode 100644 index 000000000000..858a126ebaa0 --- /dev/null +++ b/net/strparser/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_STREAM_PARSER) += strparser.o diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c new file mode 100644 index 000000000000..5c7549b5b92c --- /dev/null +++ b/net/strparser/strparser.c @@ -0,0 +1,510 @@ +/* + * Stream Parser + * + * Copyright (c) 2016 Tom Herbert + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct workqueue_struct *strp_wq; + +struct _strp_rx_msg { + /* Internal cb structure. struct strp_rx_msg must be first for passing + * to upper layer. + */ + struct strp_rx_msg strp; + int accum_len; + int early_eaten; +}; + +static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +{ + return (struct _strp_rx_msg *)((void *)skb->cb + + offsetof(struct qdisc_skb_cb, data)); +} + +/* Lower lock held */ +static void strp_abort_rx_strp(struct strparser *strp, int err) +{ + struct sock *csk = strp->sk; + + /* Unrecoverable error in receive */ + + del_timer(&strp->rx_msg_timer); + + if (strp->rx_stopped) + return; + + strp->rx_stopped = 1; + + /* Report an error on the lower socket */ + csk->sk_err = err; + csk->sk_error_report(csk); +} + +static void strp_start_rx_timer(struct strparser *strp) +{ + if (strp->sk->sk_rcvtimeo) + mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); +} + +/* Lower lock held */ +static void strp_parser_err(struct strparser *strp, int err, + read_descriptor_t *desc) +{ + desc->error = err; + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + strp->cb.abort_parser(strp, err); +} + +static inline int strp_peek_len(struct strparser *strp) +{ + struct socket *sock = strp->sk->sk_socket; + + return sock->ops->peek_len(sock); +} + +/* Lower socket lock held */ +static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + struct _strp_rx_msg *rxm; + struct sk_buff *head, *skb; + size_t eaten = 0, cand_len; + ssize_t extra; + int err; + bool cloned_orig = false; + + if (strp->rx_paused) + return 0; + + head = strp->rx_skb_head; + if (head) { + /* Message already in progress */ + + rxm = _strp_rx_msg(head); + if (unlikely(rxm->early_eaten)) { + /* Already some number of bytes on the receive sock + * data saved in rx_skb_head, just indicate they + * are consumed. + */ + eaten = orig_len <= rxm->early_eaten ? + orig_len : rxm->early_eaten; + rxm->early_eaten -= eaten; + + return eaten; + } + + if (unlikely(orig_offset)) { + /* Getting data with a non-zero offset when a message is + * in progress is not expected. If it does happen, we + * need to clone and pull since we can't deal with + * offsets in the skbs for a message expect in the head. + */ + orig_skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!orig_skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + if (!pskb_pull(orig_skb, orig_offset)) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + kfree_skb(orig_skb); + desc->error = -ENOMEM; + return 0; + } + cloned_orig = true; + orig_offset = 0; + } + + if (!strp->rx_skb_nextp) { + /* We are going to append to the frags_list of head. + * Need to unshare the frag_list. + */ + err = skb_unclone(head, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + return 0; + } + + if (unlikely(skb_shinfo(head)->frag_list)) { + /* We can't append to an sk_buff that already + * has a frag_list. We create a new head, point + * the frag_list of that to the old head, and + * then are able to use the old head->next for + * appending to the message. + */ + if (WARN_ON(head->next)) { + desc->error = -EINVAL; + return 0; + } + + skb = alloc_skb(0, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + return 0; + } + skb->len = head->len; + skb->data_len = head->len; + skb->truesize = head->truesize; + *_strp_rx_msg(skb) = *_strp_rx_msg(head); + strp->rx_skb_nextp = &head->next; + skb_shinfo(skb)->frag_list = head; + strp->rx_skb_head = skb; + head = skb; + } else { + strp->rx_skb_nextp = + &skb_shinfo(head)->frag_list; + } + } + } + + while (eaten < orig_len) { + /* Always clone since we will consume something */ + skb = skb_clone(orig_skb, GFP_ATOMIC); + if (!skb) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = -ENOMEM; + break; + } + + cand_len = orig_len - eaten; + + head = strp->rx_skb_head; + if (!head) { + head = skb; + strp->rx_skb_head = head; + /* Will set rx_skb_nextp on next packet if needed */ + strp->rx_skb_nextp = NULL; + rxm = _strp_rx_msg(head); + memset(rxm, 0, sizeof(*rxm)); + rxm->strp.offset = orig_offset + eaten; + } else { + /* Unclone since we may be appending to an skb that we + * already share a frag_list with. + */ + err = skb_unclone(skb, GFP_ATOMIC); + if (err) { + STRP_STATS_INCR(strp->stats.rx_mem_fail); + desc->error = err; + break; + } + + rxm = _strp_rx_msg(head); + *strp->rx_skb_nextp = skb; + strp->rx_skb_nextp = &skb->next; + head->data_len += skb->len; + head->len += skb->len; + head->truesize += skb->truesize; + } + + if (!rxm->strp.full_len) { + ssize_t len; + + len = (*strp->cb.parse_msg)(strp, head); + + if (!len) { + /* Need more header to determine length */ + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + rxm->accum_len += cand_len; + eaten += cand_len; + STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + WARN_ON(eaten != orig_len); + break; + } else if (len < 0) { + if (len == -ESTRPIPE && rxm->accum_len) { + len = -ENODATA; + strp->rx_unrecov_intr = 1; + } else { + strp->rx_interrupted = 1; + } + strp_parser_err(strp, err, desc); + break; + } else if (len > strp->sk->sk_rcvbuf) { + /* Message length exceeds maximum allowed */ + STRP_STATS_INCR(strp->stats.rx_msg_too_big); + strp_parser_err(strp, -EMSGSIZE, desc); + break; + } else if (len <= (ssize_t)head->len - + skb->len - rxm->strp.offset) { + /* Length must be into new skb (and also + * greater than zero) + */ + STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + strp_parser_err(strp, -EPROTO, desc); + break; + } + + rxm->strp.full_len = len; + } + + extra = (ssize_t)(rxm->accum_len + cand_len) - + rxm->strp.full_len; + + if (extra < 0) { + /* Message not complete yet. */ + if (rxm->strp.full_len - rxm->accum_len > + strp_peek_len(strp)) { + /* Don't have the whole messages in the socket + * buffer. Set strp->rx_need_bytes to wait for + * the rest of the message. Also, set "early + * eaten" since we've already buffered the skb + * but don't consume yet per strp_read_sock. + */ + + if (!rxm->accum_len) { + /* Start RX timer for new message */ + strp_start_rx_timer(strp); + } + + strp->rx_need_bytes = rxm->strp.full_len - + rxm->accum_len; + rxm->accum_len += cand_len; + rxm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + desc->count = 0; /* Stop reading socket */ + break; + } + rxm->accum_len += cand_len; + eaten += cand_len; + WARN_ON(eaten != orig_len); + break; + } + + /* Positive extra indicates ore bytes than needed for the + * message + */ + + WARN_ON(extra > cand_len); + + eaten += (cand_len - extra); + + /* Hurray, we have a new message! */ + del_timer(&strp->rx_msg_timer); + strp->rx_skb_head = NULL; + STRP_STATS_INCR(strp->stats.rx_msgs); + + /* Give skb to upper layer */ + strp->cb.rcv_msg(strp, head); + + if (unlikely(strp->rx_paused)) { + /* Upper layer paused strp */ + break; + } + } + + if (cloned_orig) + kfree_skb(orig_skb); + + STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + + return eaten; +} + +static int default_read_sock_done(struct strparser *strp, int err) +{ + return err; +} + +/* Called with lock held on lower socket */ +static int strp_read_sock(struct strparser *strp) +{ + struct socket *sock = strp->sk->sk_socket; + read_descriptor_t desc; + + desc.arg.data = strp; + desc.error = 0; + desc.count = 1; /* give more than one skb per call */ + + /* sk should be locked here, so okay to do read_sock */ + sock->ops->read_sock(strp->sk, &desc, strp_recv); + + desc.error = strp->cb.read_sock_done(strp, desc.error); + + return desc.error; +} + +/* Lower sock lock held */ +void strp_data_ready(struct strparser *strp) +{ + if (unlikely(strp->rx_stopped)) + return; + + /* This check is needed to synchronize with do_strp_rx_work. + * do_strp_rx_work acquires a process lock (lock_sock) whereas + * the lock held here is bh_lock_sock. The two locks can be + * held by different threads at the same time, but bh_lock_sock + * allows a thread in BH context to safely check if the process + * lock is held. In this case, if the lock is held, queue work. + */ + if (sock_owned_by_user(strp->sk)) { + queue_work(strp_wq, &strp->rx_work); + return; + } + + if (strp->rx_paused) + return; + + if (strp->rx_need_bytes) { + if (strp_peek_len(strp) >= strp->rx_need_bytes) + strp->rx_need_bytes = 0; + else + return; + } + + if (strp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_data_ready); + +static void do_strp_rx_work(struct strparser *strp) +{ + read_descriptor_t rd_desc; + struct sock *csk = strp->sk; + + /* We need the read lock to synchronize with strp_data_ready. We + * need the socket lock for calling strp_read_sock. + */ + lock_sock(csk); + + if (unlikely(strp->rx_stopped)) + goto out; + + if (strp->rx_paused) + goto out; + + rd_desc.arg.data = strp; + + if (strp_read_sock(strp) == -ENOMEM) + queue_work(strp_wq, &strp->rx_work); + +out: + release_sock(csk); +} + +static void strp_rx_work(struct work_struct *w) +{ + do_strp_rx_work(container_of(w, struct strparser, rx_work)); +} + +static void strp_rx_msg_timeout(unsigned long arg) +{ + struct strparser *strp = (struct strparser *)arg; + + /* Message assembly timed out */ + STRP_STATS_INCR(strp->stats.rx_msg_timeouts); + lock_sock(strp->sk); + strp->cb.abort_parser(strp, ETIMEDOUT); + release_sock(strp->sk); +} + +int strp_init(struct strparser *strp, struct sock *csk, + struct strp_callbacks *cb) +{ + struct socket *sock = csk->sk_socket; + + if (!cb || !cb->rcv_msg || !cb->parse_msg) + return -EINVAL; + + if (!sock->ops->read_sock || !sock->ops->peek_len) + return -EAFNOSUPPORT; + + memset(strp, 0, sizeof(*strp)); + + strp->sk = csk; + + setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, + (unsigned long)strp); + + INIT_WORK(&strp->rx_work, strp_rx_work); + + strp->cb.rcv_msg = cb->rcv_msg; + strp->cb.parse_msg = cb->parse_msg; + strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + + return 0; +} +EXPORT_SYMBOL_GPL(strp_init); + +void strp_unpause(struct strparser *strp) +{ + strp->rx_paused = 0; + + /* Sync setting rx_paused with RX work */ + smp_mb(); + + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_unpause); + +/* strp must already be stopped so that strp_recv will no longer be called. + * Note that strp_done is not called with the lower socket held. + */ +void strp_done(struct strparser *strp) +{ + WARN_ON(!strp->rx_stopped); + + del_timer_sync(&strp->rx_msg_timer); + cancel_work_sync(&strp->rx_work); + + if (strp->rx_skb_head) { + kfree_skb(strp->rx_skb_head); + strp->rx_skb_head = NULL; + } +} +EXPORT_SYMBOL_GPL(strp_done); + +void strp_stop(struct strparser *strp) +{ + strp->rx_stopped = 1; +} +EXPORT_SYMBOL_GPL(strp_stop); + +void strp_check_rcv(struct strparser *strp) +{ + queue_work(strp_wq, &strp->rx_work); +} +EXPORT_SYMBOL_GPL(strp_check_rcv); + +static int __init strp_mod_init(void) +{ + strp_wq = create_singlethread_workqueue("kstrp"); + + return 0; +} + +static void __exit strp_mod_exit(void) +{ +} +module_init(strp_mod_init); +module_exit(strp_mod_exit); +MODULE_LICENSE("GPL"); diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index a5fc9dd24aa9..02beb35f577f 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -21,7 +21,6 @@ #include #include #include -#include #include /** @@ -344,8 +343,6 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_IPV4_FIB: - return sizeof(struct switchdev_obj_ipv4_fib); case SWITCHDEV_OBJ_ID_PORT_FDB: return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: @@ -1042,7 +1039,7 @@ static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) struct nlmsghdr *nlh; struct ndmsg *ndm; - if (dump->idx < dump->cb->args[0]) + if (dump->idx < dump->cb->args[2]) goto skip; nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, @@ -1089,7 +1086,7 @@ nla_put_failure: */ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, struct net_device *dev, - struct net_device *filter_dev, int idx) + struct net_device *filter_dev, int *idx) { struct switchdev_fdb_dump dump = { .fdb.obj.orig_dev = dev, @@ -1097,207 +1094,27 @@ int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, .dev = dev, .skb = skb, .cb = cb, - .idx = idx, + .idx = *idx, }; int err; err = switchdev_port_obj_dump(dev, &dump.fdb.obj, switchdev_port_fdb_dump_cb); - cb->args[1] = err; - return dump.idx; + *idx = dump.idx; + return err; } EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); -static struct net_device *switchdev_get_lowest_dev(struct net_device *dev) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct net_device *port_dev; - struct list_head *iter; - - /* Recusively search down until we find a sw port dev. - * (A sw port dev supports switchdev_port_attr_get). - */ - - if (ops && ops->switchdev_port_attr_get) - return dev; - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - port_dev = switchdev_get_lowest_dev(lower_dev); - if (port_dev) - return port_dev; - } - - return NULL; -} - -static struct net_device *switchdev_get_dev_by_nhs(struct fib_info *fi) -{ - struct switchdev_attr attr = { - .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - }; - struct switchdev_attr prev_attr; - struct net_device *dev = NULL; - int nhsel; - - ASSERT_RTNL(); - - /* For this route, all nexthop devs must be on the same switch. */ - - for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) { - const struct fib_nh *nh = &fi->fib_nh[nhsel]; - - if (!nh->nh_dev) - return NULL; - - dev = switchdev_get_lowest_dev(nh->nh_dev); - if (!dev) - return NULL; - - attr.orig_dev = dev; - if (switchdev_port_attr_get(dev, &attr)) - return NULL; - - if (nhsel > 0 && - !netdev_phys_item_id_same(&prev_attr.u.ppid, &attr.u.ppid)) - return NULL; - - prev_attr = attr; - } - - return dev; -} - -/** - * switchdev_fib_ipv4_add - Add/modify switch IPv4 route entry - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @nlflags: netlink flags passed in (NLM_F_*) - * @tb_id: route table ID - * - * Add/modify switch IPv4 route entry. - */ -int switchdev_fib_ipv4_add(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 nlflags, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = nlflags, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - /* Don't offload route if using custom ip rules or if - * IPv4 FIB offloading has been disabled completely. - */ - -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (fi->fib_net->ipv4.fib_has_custom_rules) - return 0; -#endif - - if (fi->fib_net->ipv4.fib_offload_disabled) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_add(dev, &ipv4_fib.obj); - if (!err) - fi->fib_flags |= RTNH_F_OFFLOAD; - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_add); - -/** - * switchdev_fib_ipv4_del - Delete IPv4 route entry from switch - * - * @dst: route's IPv4 destination address - * @dst_len: destination address length (prefix length) - * @fi: route FIB info structure - * @tos: route TOS - * @type: route type - * @tb_id: route table ID - * - * Delete IPv4 route entry from switch device. - */ -int switchdev_fib_ipv4_del(u32 dst, int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) -{ - struct switchdev_obj_ipv4_fib ipv4_fib = { - .obj.id = SWITCHDEV_OBJ_ID_IPV4_FIB, - .dst = dst, - .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .nlflags = 0, - .tb_id = tb_id, - }; - struct net_device *dev; - int err = 0; - - if (!(fi->fib_flags & RTNH_F_OFFLOAD)) - return 0; - - dev = switchdev_get_dev_by_nhs(fi); - if (!dev) - return 0; - - ipv4_fib.obj.orig_dev = dev; - err = switchdev_port_obj_del(dev, &ipv4_fib.obj); - if (!err) - fi->fib_flags &= ~RTNH_F_OFFLOAD; - - return err == -EOPNOTSUPP ? 0 : err; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_del); - -/** - * switchdev_fib_ipv4_abort - Abort an IPv4 FIB operation - * - * @fi: route FIB info structure - */ -void switchdev_fib_ipv4_abort(struct fib_info *fi) -{ - /* There was a problem installing this route to the offload - * device. For now, until we come up with more refined - * policy handling, abruptly end IPv4 fib offloading for - * for entire net by flushing offload device(s) of all - * IPv4 routes, and mark IPv4 fib offloading broken from - * this point forward. - */ - - fib_flush_external(fi->fib_net); - fi->fib_net->ipv4.fib_offload_disabled = true; -} -EXPORT_SYMBOL_GPL(switchdev_fib_ipv4_abort); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { struct switchdev_attr a_attr = { .orig_dev = a, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; struct switchdev_attr b_attr = { .orig_dev = b, .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, - .flags = SWITCHDEV_F_NO_RECURSE, }; if (switchdev_port_attr_get(a, &a_attr) || @@ -1306,89 +1123,4 @@ bool switchdev_port_same_parent_id(struct net_device *a, return netdev_phys_item_id_same(&a_attr.u.ppid, &b_attr.u.ppid); } - -static u32 switchdev_port_fwd_mark_get(struct net_device *dev, - struct net_device *group_dev) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev == dev) - continue; - if (switchdev_port_same_parent_id(dev, lower_dev)) - return lower_dev->offload_fwd_mark; - return switchdev_port_fwd_mark_get(dev, lower_dev); - } - - return dev->ifindex; -} EXPORT_SYMBOL_GPL(switchdev_port_same_parent_id); - -static void switchdev_port_fwd_mark_reset(struct net_device *group_dev, - u32 old_mark, u32 *reset_mark) -{ - struct net_device *lower_dev; - struct list_head *iter; - - netdev_for_each_lower_dev(group_dev, lower_dev, iter) { - if (lower_dev->offload_fwd_mark == old_mark) { - if (!*reset_mark) - *reset_mark = lower_dev->ifindex; - lower_dev->offload_fwd_mark = *reset_mark; - } - switchdev_port_fwd_mark_reset(lower_dev, old_mark, reset_mark); - } -} - -/** - * switchdev_port_fwd_mark_set - Set port offload forwarding mark - * - * @dev: port device - * @group_dev: containing device - * @joining: true if dev is joining group; false if leaving group - * - * An ungrouped port's offload mark is just its ifindex. A grouped - * port's (member of a bridge, for example) offload mark is the ifindex - * of one of the ports in the group with the same parent (switch) ID. - * Ports on the same device in the same group will have the same mark. - * - * Example: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=5 - * sw2p2 ifindex=5 mark=5 - * - * If sw2p2 leaves the bridge, we'll have: - * - * br0 ifindex=9 - * sw1p1 ifindex=2 mark=2 - * sw1p2 ifindex=3 mark=2 - * sw2p1 ifindex=4 mark=4 - * sw2p2 ifindex=5 mark=5 - */ -void switchdev_port_fwd_mark_set(struct net_device *dev, - struct net_device *group_dev, - bool joining) -{ - u32 mark = dev->ifindex; - u32 reset_mark = 0; - - if (group_dev) { - ASSERT_RTNL(); - if (joining) - mark = switchdev_port_fwd_mark_get(dev, group_dev); - else if (dev->offload_fwd_mark == mark) - /* Ohoh, this port was the mark reference port, - * but it's leaving the group, so reset the - * mark for the remaining ports in the group. - */ - switchdev_port_fwd_mark_reset(group_dev, mark, - &reset_mark); - } - - dev->offload_fwd_mark = mark; -} -EXPORT_SYMBOL_GPL(switchdev_port_fwd_mark_set); diff --git a/net/sysctl_net.c b/net/sysctl_net.c index 46a71c701e7c..e0c71bd8f7cf 100644 --- a/net/sysctl_net.c +++ b/net/sysctl_net.c @@ -42,26 +42,37 @@ static int net_ctl_permissions(struct ctl_table_header *head, struct ctl_table *table) { struct net *net = container_of(head->set, struct net, sysctls); - kuid_t root_uid = make_kuid(net->user_ns, 0); - kgid_t root_gid = make_kgid(net->user_ns, 0); /* Allow network administrator to have same access as root. */ - if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN) || - uid_eq(root_uid, current_euid())) { + if (ns_capable_noaudit(net->user_ns, CAP_NET_ADMIN)) { int mode = (table->mode >> 6) & 7; return (mode << 6) | (mode << 3) | mode; } - /* Allow netns root group to have the same access as the root group */ - if (in_egroup_p(root_gid)) { - int mode = (table->mode >> 3) & 7; - return (mode << 3) | mode; - } + return table->mode; } +static void net_ctl_set_ownership(struct ctl_table_header *head, + struct ctl_table *table, + kuid_t *uid, kgid_t *gid) +{ + struct net *net = container_of(head->set, struct net, sysctls); + kuid_t ns_root_uid; + kgid_t ns_root_gid; + + ns_root_uid = make_kuid(net->user_ns, 0); + if (uid_valid(ns_root_uid)) + *uid = ns_root_uid; + + ns_root_gid = make_kgid(net->user_ns, 0); + if (gid_valid(ns_root_gid)) + *gid = ns_root_gid; +} + static struct ctl_table_root net_sysctl_root = { .lookup = net_ctl_header_lookup, .permissions = net_ctl_permissions, + .set_ownership = net_ctl_set_ownership, }; static int __net_init sysctl_net_init(struct net *net) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ae469b37d852..753f774cb46f 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -269,18 +269,19 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked) * * RCU is locked, no other locks set */ -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr) +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr) { struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq; struct sk_buff_head xmitq; + int rc = 0; __skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (msg_type(hdr) == STATE_MSG) { tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq); - tipc_link_bc_sync_rcv(l, hdr, &xmitq); + rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq); } else { tipc_link_bc_init_rcv(l, hdr); } @@ -291,6 +292,7 @@ void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, /* Any socket wakeup messages ? */ if (!skb_queue_empty(inputq)) tipc_sk_rcv(net, inputq); + return rc; } /* tipc_bcast_add_peer - add a peer node to broadcast link and bearer diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index d5e79b3767fd..5ffe34472ccd 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -56,8 +56,8 @@ int tipc_bcast_get_mtu(struct net *net); int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list); int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb); void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l, u32 acked); -void tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, - struct tipc_msg *hdr); +int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l, + struct tipc_msg *hdr); int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg); int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]); int tipc_bclink_reset_stats(struct net *net); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 65b1bbf133bd..975dbeb60ab0 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -42,6 +42,7 @@ #include "monitor.h" #include "bcast.h" #include "netlink.h" +#include "udp_media.h" #define MAX_ADDR_STR 60 @@ -56,6 +57,13 @@ static struct tipc_media * const media_info_array[] = { NULL }; +static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + + return rcu_dereference_rtnl(tn->bearer_list[bearer_id]); +} + static void bearer_disable(struct net *net, struct tipc_bearer *b); /** @@ -323,6 +331,7 @@ restart: b->domain = disc_domain; b->net_plane = bearer_id + 'A'; b->priority = priority; + test_and_set_bit_lock(0, &b->up); res = tipc_disc_create(net, b, &b->bcast_addr, &skb); if (res) { @@ -360,15 +369,24 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) */ void tipc_bearer_reset_all(struct net *net) { - struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; int i; for (i = 0; i < MAX_BEARERS; i++) { - b = rcu_dereference_rtnl(tn->bearer_list[i]); + b = bearer_get(net, i); + if (b) + clear_bit_unlock(0, &b->up); + } + for (i = 0; i < MAX_BEARERS; i++) { + b = bearer_get(net, i); if (b) tipc_reset_bearer(net, b); } + for (i = 0; i < MAX_BEARERS; i++) { + b = bearer_get(net, i); + if (b) + test_and_set_bit_lock(0, &b->up); + } } /** @@ -382,8 +400,9 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b) int bearer_id = b->identity; pr_info("Disabling bearer <%s>\n", b->name); - b->media->disable_media(b); + clear_bit_unlock(0, &b->up); tipc_node_delete_links(net, bearer_id); + b->media->disable_media(b); RCU_INIT_POINTER(b->media_ptr, NULL); if (b->link_req) tipc_disc_delete(b->link_req); @@ -440,22 +459,16 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, { struct net_device *dev; int delta; - void *tipc_ptr; dev = (struct net_device *)rcu_dereference_rtnl(b->media_ptr); if (!dev) return 0; - /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); - if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) - goto drop; - - delta = dev->hard_header_len - skb_headroom(skb); - if ((delta > 0) && - pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) - goto drop; - + delta = SKB_DATA_ALIGN(dev->hard_header_len - skb_headroom(skb)); + if ((delta > 0) && pskb_expand_head(skb, delta, 0, GFP_ATOMIC)) { + kfree_skb(skb); + return 0; + } skb_reset_network_header(skb); skb->dev = dev; skb->protocol = htons(ETH_P_TIPC); @@ -463,9 +476,6 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, dev->dev_addr, skb->len); dev_queue_xmit(skb); return 0; -drop: - kfree_skb(skb); - return 0; } int tipc_bearer_mtu(struct net *net, u32 bearer_id) @@ -487,12 +497,12 @@ void tipc_bearer_xmit_skb(struct net *net, u32 bearer_id, struct sk_buff *skb, struct tipc_media_addr *dest) { - struct tipc_net *tn = tipc_net(net); + struct tipc_msg *hdr = buf_msg(skb); struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (likely(b)) + b = bearer_get(net, bearer_id); + if (likely(b && (test_bit(0, &b->up) || msg_is_reset(hdr)))) b->media->send_msg(net, skb, b, dest); else kfree_skb(skb); @@ -505,7 +515,6 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, struct sk_buff_head *xmitq, struct tipc_media_addr *dst) { - struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_bearer *b; struct sk_buff *skb, *tmp; @@ -513,12 +522,15 @@ void tipc_bearer_xmit(struct net *net, u32 bearer_id, return; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); + b = bearer_get(net, bearer_id); if (unlikely(!b)) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { __skb_dequeue(xmitq); - b->media->send_msg(net, skb, b, dst); + if (likely(test_bit(0, &b->up) || msg_is_reset(buf_msg(skb)))) + b->media->send_msg(net, skb, b, dst); + else + kfree_skb(skb); } rcu_read_unlock(); } @@ -535,8 +547,8 @@ void tipc_bearer_bc_xmit(struct net *net, u32 bearer_id, struct tipc_msg *hdr; rcu_read_lock(); - b = rcu_dereference_rtnl(tn->bearer_list[bearer_id]); - if (unlikely(!b)) + b = bearer_get(net, bearer_id); + if (unlikely(!b || !test_bit(0, &b->up))) __skb_queue_purge(xmitq); skb_queue_walk_safe(xmitq, skb, tmp) { hdr = buf_msg(skb); @@ -566,7 +578,8 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, rcu_read_lock(); b = rcu_dereference_rtnl(dev->tipc_ptr); - if (likely(b && (skb->pkt_type <= PACKET_BROADCAST))) { + if (likely(b && test_bit(0, &b->up) && + (skb->pkt_type <= PACKET_BROADCAST))) { skb->next = NULL; tipc_rcv(dev_net(dev), skb, b); rcu_read_unlock(); @@ -591,18 +604,9 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); - struct tipc_net *tn = tipc_net(net); struct tipc_bearer *b; - int i; b = rtnl_dereference(dev->tipc_ptr); - if (!b) { - for (i = 0; i < MAX_BEARERS; b = NULL, i++) { - b = rtnl_dereference(tn->bearer_list[i]); - if (b && (b->media_ptr == dev)) - break; - } - } if (!b) return NOTIFY_DONE; @@ -613,11 +617,10 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, if (netif_carrier_ok(dev)) break; case NETDEV_UP: - rcu_assign_pointer(dev->tipc_ptr, b); + test_and_set_bit_lock(0, &b->up); break; case NETDEV_GOING_DOWN: - RCU_INIT_POINTER(dev->tipc_ptr, NULL); - synchronize_net(); + clear_bit_unlock(0, &b->up); tipc_reset_bearer(net, b); break; case NETDEV_CHANGEMTU: @@ -709,6 +712,14 @@ static int __tipc_nl_add_bearer(struct tipc_nl_msg *msg, goto prop_msg_full; nla_nest_end(msg->skb, prop); + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (bearer->media->type_id == TIPC_MEDIA_TYPE_UDP) { + if (tipc_udp_nl_add_bearer_data(msg, bearer)) + goto attr_msg_full; + } +#endif + nla_nest_end(msg->skb, attrs); genlmsg_end(msg->skb, hdr); @@ -895,6 +906,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) return 0; } +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) +{ + int err; + char *name; + struct tipc_bearer *b; + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; + struct net *net = sock_net(skb->sk); + + if (!info->attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, + info->attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, name); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + +#ifdef CONFIG_TIPC_MEDIA_UDP + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { + err = tipc_udp_nl_bearer_add(b, + attrs[TIPC_NLA_BEARER_UDP_OPTS]); + if (err) { + rtnl_unlock(); + return err; + } + } +#endif + rtnl_unlock(); + + return 0; +} + int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) { int err; diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 43757f1f9cb3..78892e2f53e3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -150,6 +150,7 @@ struct tipc_bearer { u32 identity; struct tipc_link_req *link_req; char net_plane; + unsigned long up; }; struct tipc_bearer_names { @@ -180,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/link.c b/net/tipc/link.c index 877d94f34814..b36e16cdc945 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -181,7 +181,10 @@ struct tipc_link { u16 acked; struct tipc_link *bc_rcvlink; struct tipc_link *bc_sndlink; - int nack_state; + unsigned long prev_retr; + u16 prev_from; + u16 prev_to; + u8 nack_state; bool bc_peer_is_up; /* Statistics */ @@ -202,6 +205,8 @@ enum { BC_NACK_SND_SUPPRESS, }; +#define TIPC_BC_RETR_LIMIT 10 /* [ms] */ + /* * Interval between NACKs when packets arrive out of order */ @@ -237,8 +242,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq); +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq); static void tipc_link_build_bc_init_msg(struct tipc_link *l, struct sk_buff_head *xmitq); static bool tipc_link_release_pkts(struct tipc_link *l, u16 to); @@ -367,6 +372,18 @@ int tipc_link_bc_peers(struct tipc_link *l) return l->ackers; } +u16 link_bc_rcv_gap(struct tipc_link *l) +{ + struct sk_buff *skb = skb_peek(&l->deferdq); + u16 gap = 0; + + if (more(l->snd_nxt, l->rcv_nxt)) + gap = l->snd_nxt - l->rcv_nxt; + if (skb) + gap = buf_seqno(skb) - l->rcv_nxt; + return gap; +} + void tipc_link_set_mtu(struct tipc_link *l, int mtu) { l->mtu = mtu; @@ -807,7 +824,7 @@ void link_prepare_wakeup(struct tipc_link *l) skb_queue_walk_safe(&l->wakeupq, skb, tmp) { imp = TIPC_SKB_CB(skb)->chain_imp; - lim = l->window + l->backlog[imp].limit; + lim = l->backlog[imp].limit; pnd[imp] += TIPC_SKB_CB(skb)->chain_sz; if ((pnd[imp] + l->backlog[imp].len) >= lim) break; @@ -873,9 +890,11 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, struct sk_buff *skb, *_skb, *bskb; /* Match msg importance against this and all higher backlog limits: */ - for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { - if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) - return link_schedule_user(l, list); + if (!skb_queue_empty(backlogq)) { + for (i = imp; i <= TIPC_SYSTEM_IMPORTANCE; i++) { + if (unlikely(l->backlog[i].len >= l->backlog[i].limit)) + return link_schedule_user(l, list); + } } if (unlikely(msg_size(hdr) > mtu)) { skb_queue_purge(list); @@ -1133,7 +1152,10 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (((l->rcv_nxt ^ tipc_own_addr(l->net)) & 0xf) != 0xf) return 0; l->rcv_unacked = 0; - return TIPC_LINK_SND_BC_ACK; + + /* Use snd_nxt to store peer's snd_nxt in broadcast rcv link */ + l->snd_nxt = l->rcv_nxt; + return TIPC_LINK_SND_STATE; } /* Unicast ACK */ @@ -1162,17 +1184,26 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) } /* tipc_link_build_nack_msg: prepare link nack message for transmission + * Note that sending of broadcast NACK is coordinated among nodes, to + * reduce the risk of NACK storms towards the sender */ -static void tipc_link_build_nack_msg(struct tipc_link *l, - struct sk_buff_head *xmitq) +static int tipc_link_build_nack_msg(struct tipc_link *l, + struct sk_buff_head *xmitq) { u32 def_cnt = ++l->stats.deferred_recv; + int match1, match2; - if (link_is_bc_rcvlink(l)) - return; + if (link_is_bc_rcvlink(l)) { + match1 = def_cnt & 0xf; + match2 = tipc_own_addr(l->net) & 0xf; + if (match1 == match2) + return TIPC_LINK_SND_STATE; + return 0; + } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + return 0; } /* tipc_link_rcv - process TIPC packets/messages arriving from off-node @@ -1223,7 +1254,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, /* Defer delivery if sequence gap */ if (unlikely(seqno != rcv_nxt)) { __tipc_skb_queue_sorted(defq, seqno, skb); - tipc_link_build_nack_msg(l, xmitq); + rc |= tipc_link_build_nack_msg(l, xmitq); break; } @@ -1234,7 +1265,7 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb, rc |= tipc_link_input(l, skb, l->inputq); if (unlikely(++l->rcv_unacked >= TIPC_MIN_LINK_WIN)) rc |= tipc_link_build_state_msg(l, xmitq); - if (unlikely(rc & ~TIPC_LINK_SND_BC_ACK)) + if (unlikely(rc & ~TIPC_LINK_SND_STATE)) break; } while ((skb = __skb_dequeue(defq))); @@ -1248,10 +1279,11 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, u16 rcvgap, int tolerance, int priority, struct sk_buff_head *xmitq) { + struct tipc_link *bcl = l->bc_rcvlink; struct sk_buff *skb; struct tipc_msg *hdr; struct sk_buff_head *dfq = &l->deferdq; - bool node_up = link_is_up(l->bc_rcvlink); + bool node_up = link_is_up(bcl); struct tipc_mon_state *mstate = &l->mon_state; int dlen = 0; void *data; @@ -1279,7 +1311,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_net_plane(hdr, l->net_plane); msg_set_next_sent(hdr, l->snd_nxt); msg_set_ack(hdr, l->rcv_nxt - 1); - msg_set_bcast_ack(hdr, l->bc_rcvlink->rcv_nxt - 1); + msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1); msg_set_last_bcast(hdr, l->bc_sndlink->snd_nxt - 1); msg_set_link_tolerance(hdr, tolerance); msg_set_linkprio(hdr, priority); @@ -1289,6 +1321,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, if (mtyp == STATE_MSG) { msg_set_seq_gap(hdr, rcvgap); + msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); @@ -1571,51 +1604,107 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr) l->rcv_nxt = peers_snd_nxt; } +/* link_bc_retr eval()- check if the indicated range can be retransmitted now + * - Adjust permitted range if there is overlap with previous retransmission + */ +static bool link_bc_retr_eval(struct tipc_link *l, u16 *from, u16 *to) +{ + unsigned long elapsed = jiffies_to_msecs(jiffies - l->prev_retr); + + if (less(*to, *from)) + return false; + + /* New retransmission request */ + if ((elapsed > TIPC_BC_RETR_LIMIT) || + less(*to, l->prev_from) || more(*from, l->prev_to)) { + l->prev_from = *from; + l->prev_to = *to; + l->prev_retr = jiffies; + return true; + } + + /* Inside range of previous retransmit */ + if (!less(*from, l->prev_from) && !more(*to, l->prev_to)) + return false; + + /* Fully or partially outside previous range => exclude overlap */ + if (less(*from, l->prev_from)) { + *to = l->prev_from - 1; + l->prev_from = *from; + } + if (more(*to, l->prev_to)) { + *from = l->prev_to + 1; + l->prev_to = *to; + } + l->prev_retr = jiffies; + return true; +} + /* tipc_link_bc_sync_rcv - update rcv link according to peer's send state */ -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq) +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq) { + struct tipc_link *snd_l = l->bc_sndlink; u16 peers_snd_nxt = msg_bc_snd_nxt(hdr); + u16 from = msg_bcast_ack(hdr) + 1; + u16 to = from + msg_bc_gap(hdr) - 1; + int rc = 0; if (!link_is_up(l)) - return; + return rc; if (!msg_peer_node_is_up(hdr)) - return; + return rc; /* Open when peer ackowledges our bcast init msg (pkt #1) */ if (msg_ack(hdr)) l->bc_peer_is_up = true; if (!l->bc_peer_is_up) - return; + return rc; + + l->stats.recv_nacks++; /* Ignore if peers_snd_nxt goes beyond receive window */ if (more(peers_snd_nxt, l->rcv_nxt + l->window)) - return; + return rc; + + if (link_bc_retr_eval(snd_l, &from, &to)) + rc = tipc_link_retrans(snd_l, from, to, xmitq); + + l->snd_nxt = peers_snd_nxt; + if (link_bc_rcv_gap(l)) + rc |= TIPC_LINK_SND_STATE; + + /* Return now if sender supports nack via STATE messages */ + if (l->peer_caps & TIPC_BCAST_STATE_NACK) + return rc; + + /* Otherwise, be backwards compatible */ if (!more(peers_snd_nxt, l->rcv_nxt)) { l->nack_state = BC_NACK_SND_CONDITIONAL; - return; + return 0; } /* Don't NACK if one was recently sent or peeked */ if (l->nack_state == BC_NACK_SND_SUPPRESS) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; - return; + return 0; } /* Conditionally delay NACK sending until next synch rcv */ if (l->nack_state == BC_NACK_SND_CONDITIONAL) { l->nack_state = BC_NACK_SND_UNCONDITIONAL; if ((peers_snd_nxt - l->rcv_nxt) < TIPC_MIN_LINK_WIN) - return; + return 0; } /* Send NACK now but suppress next one */ tipc_link_build_bc_proto_msg(l, true, peers_snd_nxt, xmitq); l->nack_state = BC_NACK_SND_SUPPRESS; + return 0; } void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, @@ -1652,6 +1741,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, } /* tipc_link_bc_nack_rcv(): receive broadcast nack message + * This function is here for backwards compatibility, since + * no BCAST_PROTOCOL/STATE messages occur from TIPC v2.5. */ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq) @@ -1692,10 +1783,10 @@ void tipc_link_set_queue_limits(struct tipc_link *l, u32 win) int max_bulk = TIPC_MAX_PUBLICATIONS / (l->mtu / ITEM_SIZE); l->window = win; - l->backlog[TIPC_LOW_IMPORTANCE].limit = win / 2; - l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = win; - l->backlog[TIPC_HIGH_IMPORTANCE].limit = win / 2 * 3; - l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = win * 2; + l->backlog[TIPC_LOW_IMPORTANCE].limit = max_t(u16, 50, win); + l->backlog[TIPC_MEDIUM_IMPORTANCE].limit = max_t(u16, 100, win * 2); + l->backlog[TIPC_HIGH_IMPORTANCE].limit = max_t(u16, 150, win * 3); + l->backlog[TIPC_CRITICAL_IMPORTANCE].limit = max_t(u16, 200, win * 4); l->backlog[TIPC_SYSTEM_IMPORTANCE].limit = max_bulk; } diff --git a/net/tipc/link.h b/net/tipc/link.h index d7e9d42fcb2d..d1bd1787a768 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -63,7 +63,7 @@ enum { enum { TIPC_LINK_UP_EVT = 1, TIPC_LINK_DOWN_EVT = (1 << 1), - TIPC_LINK_SND_BC_ACK = (1 << 2) + TIPC_LINK_SND_STATE = (1 << 2) }; /* Starting value for maximum packet size negotiation on unicast links @@ -138,8 +138,8 @@ void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, void tipc_link_build_bc_sync_msg(struct tipc_link *l, struct sk_buff_head *xmitq); void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr); -void tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, - struct sk_buff_head *xmitq); +int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, + struct sk_buff_head *xmitq); int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 7cf52fb39bee..c3832cdf2278 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -719,6 +719,16 @@ static inline char *msg_media_addr(struct tipc_msg *m) return (char *)&m->hdr[TIPC_MEDIA_INFO_OFFSET]; } +static inline u32 msg_bc_gap(struct tipc_msg *m) +{ + return msg_bits(m, 8, 0, 0x3ff); +} + +static inline void msg_set_bc_gap(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 8, 0, 0x3ff, n); +} + /* * Word 9 */ diff --git a/net/tipc/net.h b/net/tipc/net.h index 77a7a118911d..c7c254902873 100644 --- a/net/tipc/net.h +++ b/net/tipc/net.h @@ -39,6 +39,8 @@ #include +extern const struct nla_policy tipc_nl_net_policy[]; + int tipc_net_start(struct net *net, u32 addr); void tipc_net_stop(struct net *net); diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index a84daec0afe9..3200059d14b2 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -41,6 +41,7 @@ #include "link.h" #include "node.h" #include "net.h" +#include "udp_media.h" #include static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { @@ -160,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .dumpit = tipc_nl_bearer_dump, .policy = tipc_nl_policy, }, + { + .cmd = TIPC_NL_BEARER_ADD, + .doit = tipc_nl_bearer_add, + .policy = tipc_nl_policy, + }, { .cmd = TIPC_NL_BEARER_SET, .doit = tipc_nl_bearer_set, @@ -238,6 +244,18 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .dumpit = tipc_nl_node_dump_monitor_peer, .policy = tipc_nl_policy, }, + { + .cmd = TIPC_NL_PEER_REMOVE, + .doit = tipc_nl_peer_rm, + .policy = tipc_nl_policy, + }, +#ifdef CONFIG_TIPC_MEDIA_UDP + { + .cmd = TIPC_NL_UDP_GET_REMOTEIP, + .dumpit = tipc_udp_nl_dump_remoteip, + .policy = tipc_nl_policy, + }, +#endif }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/node.c b/net/tipc/node.c index 21974191e425..7ef14e2d2356 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1262,6 +1262,34 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) kfree_skb(skb); } +static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, + int bearer_id, struct sk_buff_head *xmitq) +{ + struct tipc_link *ucl; + int rc; + + rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); + + if (rc & TIPC_LINK_DOWN_EVT) { + tipc_bearer_reset_all(n->net); + return; + } + + if (!(rc & TIPC_LINK_SND_STATE)) + return; + + /* If probe message, a STATE response will be sent anyway */ + if (msg_probe(hdr)) + return; + + /* Produce a STATE message carrying broadcast NACK */ + tipc_node_read_lock(n); + ucl = n->links[bearer_id].link; + if (ucl) + tipc_link_build_state_msg(ucl, xmitq); + tipc_node_read_unlock(n); +} + /** * tipc_node_bc_rcv - process TIPC broadcast packet arriving from off-node * @net: the applicable net namespace @@ -1298,7 +1326,7 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id rc = tipc_bcast_rcv(net, be->link, skb); /* Broadcast ACKs are sent on a unicast link */ - if (rc & TIPC_LINK_SND_BC_ACK) { + if (rc & TIPC_LINK_SND_STATE) { tipc_node_read_lock(n); tipc_link_build_state_msg(le->link, &xmitq); tipc_node_read_unlock(n); @@ -1505,7 +1533,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Ensure broadcast reception is in synch with peer's send state */ if (unlikely(usr == LINK_PROTOCOL)) - tipc_bcast_sync_rcv(net, n->bc_entry.link, hdr); + tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq); else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) tipc_bcast_ack_rcv(net, n->bc_entry.link, bc_ack); @@ -1553,6 +1581,69 @@ discard: kfree_skb(skb); } +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + struct nlattr *attrs[TIPC_NLA_NET_MAX + 1]; + struct tipc_node *peer; + u32 addr; + int err; + int i; + + /* We identify the peer by its net */ + if (!info->attrs[TIPC_NLA_NET]) + return -EINVAL; + + err = nla_parse_nested(attrs, TIPC_NLA_NET_MAX, + info->attrs[TIPC_NLA_NET], + tipc_nl_net_policy); + if (err) + return err; + + if (!attrs[TIPC_NLA_NET_ADDR]) + return -EINVAL; + + addr = nla_get_u32(attrs[TIPC_NLA_NET_ADDR]); + + if (in_own_node(net, addr)) + return -ENOTSUPP; + + spin_lock_bh(&tn->node_list_lock); + peer = tipc_node_find(net, addr); + if (!peer) { + spin_unlock_bh(&tn->node_list_lock); + return -ENXIO; + } + + tipc_node_write_lock(peer); + if (peer->state != SELF_DOWN_PEER_DOWN && + peer->state != SELF_DOWN_PEER_LEAVING) { + tipc_node_write_unlock(peer); + err = -EBUSY; + goto err_out; + } + + for (i = 0; i < MAX_BEARERS; i++) { + struct tipc_link_entry *le = &peer->links[i]; + + if (le->link) { + kfree(le->link); + le->link = NULL; + peer->link_cnt--; + } + } + tipc_node_write_unlock(peer); + tipc_node_delete(peer); + + err = 0; +err_out: + tipc_node_put(peer); + spin_unlock_bh(&tn->node_list_lock); + + return err; +} + int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb) { int err; diff --git a/net/tipc/node.h b/net/tipc/node.h index d69fdfcc0ec9..39ef54c1f2ad 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -1,7 +1,7 @@ /* * net/tipc/node.h: Include file for TIPC node management routines * - * Copyright (c) 2000-2006, 2014-2015, Ericsson AB + * Copyright (c) 2000-2006, 2014-2016, Ericsson AB * Copyright (c) 2005, 2010-2014, Wind River Systems * All rights reserved. * @@ -45,11 +45,14 @@ /* Optional capabilities supported by this code version */ enum { - TIPC_BCAST_SYNCH = (1 << 1), - TIPC_BLOCK_FLOWCTL = (2 << 1) + TIPC_BCAST_SYNCH = (1 << 1), + TIPC_BCAST_STATE_NACK = (1 << 2), + TIPC_BLOCK_FLOWCTL = (1 << 3) }; -#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | TIPC_BLOCK_FLOWCTL) +#define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ + TIPC_BCAST_STATE_NACK | \ + TIPC_BLOCK_FLOWCTL) #define INVALID_BEARER_ID -1 void tipc_node_stop(struct net *net); @@ -77,6 +80,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_peer_rm(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index ae7e14cae085..d80cd3f7503f 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -49,6 +49,7 @@ #include "core.h" #include "bearer.h" #include "netlink.h" +#include "msg.h" /* IANA assigned UDP port */ #define UDP_PORT_DEFAULT 6118 @@ -70,6 +71,13 @@ struct udp_media_addr { }; }; +/* struct udp_replicast - container for UDP remote addresses */ +struct udp_replicast { + struct udp_media_addr addr; + struct rcu_head rcu; + struct list_head list; +}; + /** * struct udp_bearer - ip/udp bearer data structure * @bearer: associated generic tipc bearer @@ -82,8 +90,20 @@ struct udp_bearer { struct socket *ubsock; u32 ifindex; struct work_struct work; + struct udp_replicast rcast; }; +static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) +{ + if (ntohs(addr->proto) == ETH_P_IP) + return ipv4_is_multicast(addr->ipv4.s_addr); +#if IS_ENABLED(CONFIG_IPV6) + else + return ipv6_addr_is_multicast(&addr->ipv6); +#endif + return 0; +} + /* udp_media_addr_set - convert a ip/udp address to a TIPC media address */ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, struct udp_media_addr *ua) @@ -91,15 +111,9 @@ static void tipc_udp_media_addr_set(struct tipc_media_addr *addr, memset(addr, 0, sizeof(struct tipc_media_addr)); addr->media_id = TIPC_MEDIA_TYPE_UDP; memcpy(addr->value, ua, sizeof(struct udp_media_addr)); - if (ntohs(ua->proto) == ETH_P_IP) { - if (ipv4_is_multicast(ua->ipv4.s_addr)) - addr->broadcast = 1; - } else if (ntohs(ua->proto) == ETH_P_IPV6) { - if (ipv6_addr_type(&ua->ipv6) & IPV6_ADDR_MULTICAST) - addr->broadcast = 1; - } else { - pr_err("Invalid UDP media address\n"); - } + + if (tipc_udp_is_mcast_addr(ua)) + addr->broadcast = 1; } /* tipc_udp_addr2str - convert ip/udp address to string */ @@ -140,28 +154,13 @@ static int tipc_udp_addr2msg(char *msg, struct tipc_media_addr *a) } /* tipc_send_msg - enqueue a send request */ -static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, - struct tipc_bearer *b, - struct tipc_media_addr *dest) +static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, + struct udp_bearer *ub, struct udp_media_addr *src, + struct udp_media_addr *dst) { int ttl, err = 0; - struct udp_bearer *ub; - struct udp_media_addr *dst = (struct udp_media_addr *)&dest->value; - struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; struct rtable *rt; - if (skb_headroom(skb) < UDP_MIN_HEADROOM) { - err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); - if (err) - goto tx_error; - } - - skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); - ub = rcu_dereference_rtnl(b->media_ptr); - if (!ub) { - err = -ENODEV; - goto tx_error; - } if (dst->proto == htons(ETH_P_IP)) { struct flowi4 fl = { .daddr = dst->ipv4.s_addr, @@ -207,29 +206,178 @@ tx_error: return err; } +static int tipc_udp_send_msg(struct net *net, struct sk_buff *skb, + struct tipc_bearer *b, + struct tipc_media_addr *addr) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst = (struct udp_media_addr *)&addr->value; + struct udp_replicast *rcast; + struct udp_bearer *ub; + int err = 0; + + if (skb_headroom(skb) < UDP_MIN_HEADROOM) { + err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); + if (err) + goto out; + } + + skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + err = -ENODEV; + goto out; + } + + if (!addr->broadcast || list_empty(&ub->rcast.list)) + return tipc_udp_xmit(net, skb, ub, src, dst); + + /* Replicast, send an skb to each configured IP address */ + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { + struct sk_buff *_skb; + + _skb = pskb_copy(skb, GFP_ATOMIC); + if (!_skb) { + err = -ENOMEM; + goto out; + } + + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); + if (err) { + kfree_skb(_skb); + goto out; + } + } + err = 0; +out: + kfree_skb(skb); + return err; +} + +static bool tipc_udp_is_known_peer(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast, *tmp; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + pr_err_ratelimited("UDP bearer instance not found\n"); + return false; + } + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (!memcmp(&rcast->addr, addr, sizeof(struct udp_media_addr))) + return true; + } + + return false; +} + +static int tipc_udp_rcast_add(struct tipc_bearer *b, + struct udp_media_addr *addr) +{ + struct udp_replicast *rcast; + struct udp_bearer *ub; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); + if (!rcast) + return -ENOMEM; + + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); + + if (ntohs(addr->proto) == ETH_P_IP) + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); +#if IS_ENABLED(CONFIG_IPV6) + else if (ntohs(addr->proto) == ETH_P_IPV6) + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); +#endif + + list_add_rcu(&rcast->list, &ub->rcast.list); + return 0; +} + +static int tipc_udp_rcast_disc(struct tipc_bearer *b, struct sk_buff *skb) +{ + struct udp_media_addr src = {0}; + struct udp_media_addr *dst; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) + return 0; + + src.port = udp_hdr(skb)->source; + + if (ip_hdr(skb)->version == 4) { + struct iphdr *iphdr = ip_hdr(skb); + + src.proto = htons(ETH_P_IP); + src.ipv4.s_addr = iphdr->saddr; + if (ipv4_is_multicast(iphdr->daddr)) + return 0; +#if IS_ENABLED(CONFIG_IPV6) + } else if (ip_hdr(skb)->version == 6) { + struct ipv6hdr *iphdr = ipv6_hdr(skb); + + src.proto = htons(ETH_P_IPV6); + src.ipv6 = iphdr->saddr; + if (ipv6_addr_is_multicast(&iphdr->daddr)) + return 0; +#endif + } else { + return 0; + } + + if (likely(tipc_udp_is_known_peer(b, &src))) + return 0; + + return tipc_udp_rcast_add(b, &src); +} + /* tipc_udp_recv - read data from bearer socket */ static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) { struct udp_bearer *ub; struct tipc_bearer *b; + struct tipc_msg *hdr; + int err; ub = rcu_dereference_sk_user_data(sk); if (!ub) { pr_err_ratelimited("Failed to get UDP bearer reference"); - kfree_skb(skb); - return 0; + goto out; } - skb_pull(skb, sizeof(struct udphdr)); + hdr = buf_msg(skb); + rcu_read_lock(); b = rcu_dereference_rtnl(ub->bearer); + if (!b) + goto rcu_out; - if (b) { + if (b && test_bit(0, &b->up)) { tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); return 0; } + + if (unlikely(msg_user(hdr) == LINK_CONFIG)) { + err = tipc_udp_rcast_disc(b, skb); + if (err) + goto rcu_out; + } + + tipc_rcv(sock_net(sk), skb, b); rcu_read_unlock(); + return 0; + +rcu_out: + rcu_read_unlock(); +out: kfree_skb(skb); return 0; } @@ -241,15 +389,11 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) struct sock *sk = ub->ubsock->sk; if (ntohs(remote->proto) == ETH_P_IP) { - if (!ipv4_is_multicast(remote->ipv4.s_addr)) - return 0; mreqn.imr_multiaddr = remote->ipv4; mreqn.imr_ifindex = ub->ifindex; err = ip_mc_join_group(sk, &mreqn); #if IS_ENABLED(CONFIG_IPV6) } else { - if (!ipv6_addr_is_multicast(&remote->ipv6)) - return 0; err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); #endif @@ -257,75 +401,234 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) return err; } -/** - * parse_options - build local/remote addresses from configuration - * @attrs: netlink config data - * @ub: UDP bearer instance - * @local: local bearer IP address/port - * @remote: peer or multicast IP/port - */ -static int parse_options(struct nlattr *attrs[], struct udp_bearer *ub, - struct udp_media_addr *local, - struct udp_media_addr *remote) +static int __tipc_nl_add_udp_addr(struct sk_buff *skb, + struct udp_media_addr *addr, int nla_t) { - struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; - struct sockaddr_storage sa_local, sa_remote; + if (ntohs(addr->proto) == ETH_P_IP) { + struct sockaddr_in ip4; - if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) - goto err; - if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, - attrs[TIPC_NLA_BEARER_UDP_OPTS], - tipc_nl_udp_policy)) - goto err; - if (opts[TIPC_NLA_UDP_LOCAL] && opts[TIPC_NLA_UDP_REMOTE]) { - nla_memcpy(&sa_local, opts[TIPC_NLA_UDP_LOCAL], - sizeof(sa_local)); - nla_memcpy(&sa_remote, opts[TIPC_NLA_UDP_REMOTE], - sizeof(sa_remote)); + ip4.sin_family = AF_INET; + ip4.sin_port = addr->port; + ip4.sin_addr.s_addr = addr->ipv4.s_addr; + if (nla_put(skb, nla_t, sizeof(ip4), &ip4)) + return -EMSGSIZE; + +#if IS_ENABLED(CONFIG_IPV6) + } else if (ntohs(addr->proto) == ETH_P_IPV6) { + struct sockaddr_in6 ip6; + + ip6.sin6_family = AF_INET6; + ip6.sin6_port = addr->port; + memcpy(&ip6.sin6_addr, &addr->ipv6, sizeof(struct in6_addr)); + if (nla_put(skb, nla_t, sizeof(ip6), &ip6)) + return -EMSGSIZE; +#endif + } + + return 0; +} + +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb) +{ + u32 bid = cb->args[0]; + u32 skip_cnt = cb->args[1]; + u32 portid = NETLINK_CB(cb->skb).portid; + struct udp_replicast *rcast, *tmp; + struct tipc_bearer *b; + struct udp_bearer *ub; + void *hdr; + int err; + int i; + + if (!bid && !skip_cnt) { + struct net *net = sock_net(skb->sk); + struct nlattr *battrs[TIPC_NLA_BEARER_MAX + 1]; + struct nlattr **attrs; + char *bname; + + err = tipc_nlmsg_parse(cb->nlh, &attrs); + if (err) + return err; + + if (!attrs[TIPC_NLA_BEARER]) + return -EINVAL; + + err = nla_parse_nested(battrs, TIPC_NLA_BEARER_MAX, + attrs[TIPC_NLA_BEARER], + tipc_nl_bearer_policy); + if (err) + return err; + + if (!battrs[TIPC_NLA_BEARER_NAME]) + return -EINVAL; + + bname = nla_data(battrs[TIPC_NLA_BEARER_NAME]); + + rtnl_lock(); + b = tipc_bearer_find(net, bname); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + bid = b->identity; } else { -err: - pr_err("Invalid UDP bearer configuration"); + struct net *net = sock_net(skb->sk); + struct tipc_net *tn = net_generic(net, tipc_net_id); + + rtnl_lock(); + b = rtnl_dereference(tn->bearer_list[bid]); + if (!b) { + rtnl_unlock(); + return -EINVAL; + } + } + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) { + rtnl_unlock(); return -EINVAL; } - if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET) { - struct sockaddr_in *ip4; - ip4 = (struct sockaddr_in *)&sa_local; - local->proto = htons(ETH_P_IP); - local->port = ip4->sin_port; - local->ipv4.s_addr = ip4->sin_addr.s_addr; + i = 0; + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + if (i < skip_cnt) + goto count; - ip4 = (struct sockaddr_in *)&sa_remote; - remote->proto = htons(ETH_P_IP); - remote->port = ip4->sin_port; - remote->ipv4.s_addr = ip4->sin_addr.s_addr; + hdr = genlmsg_put(skb, portid, cb->nlh->nlmsg_seq, + &tipc_genl_family, NLM_F_MULTI, + TIPC_NL_BEARER_GET); + if (!hdr) + goto done; + + err = __tipc_nl_add_udp_addr(skb, &rcast->addr, + TIPC_NLA_UDP_REMOTE); + if (err) { + genlmsg_cancel(skb, hdr); + goto done; + } + genlmsg_end(skb, hdr); +count: + i++; + } +done: + rtnl_unlock(); + cb->args[0] = bid; + cb->args[1] = i; + + return skb->len; +} + +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b) +{ + struct udp_media_addr *src = (struct udp_media_addr *)&b->addr.value; + struct udp_media_addr *dst; + struct udp_bearer *ub; + struct nlattr *nest; + + ub = rcu_dereference_rtnl(b->media_ptr); + if (!ub) + return -ENODEV; + + nest = nla_nest_start(msg->skb, TIPC_NLA_BEARER_UDP_OPTS); + if (!nest) + goto msg_full; + + if (__tipc_nl_add_udp_addr(msg->skb, src, TIPC_NLA_UDP_LOCAL)) + goto msg_full; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (__tipc_nl_add_udp_addr(msg->skb, dst, TIPC_NLA_UDP_REMOTE)) + goto msg_full; + + if (!list_empty(&ub->rcast.list)) { + if (nla_put_flag(msg->skb, TIPC_NLA_UDP_MULTI_REMOTEIP)) + goto msg_full; + } + + nla_nest_end(msg->skb, nest); + return 0; +msg_full: + nla_nest_cancel(msg->skb, nest); + return -EMSGSIZE; +} + +/** + * tipc_parse_udp_addr - build udp media address from netlink data + * @nlattr: netlink attribute containing sockaddr storage aligned address + * @addr: tipc media address to fill with address, port and protocol type + * @scope_id: IPv6 scope id pointer, not NULL indicates it's required + */ + +static int tipc_parse_udp_addr(struct nlattr *nla, struct udp_media_addr *addr, + u32 *scope_id) +{ + struct sockaddr_storage sa; + + nla_memcpy(&sa, nla, sizeof(sa)); + if (sa.ss_family == AF_INET) { + struct sockaddr_in *ip4 = (struct sockaddr_in *)&sa; + + addr->proto = htons(ETH_P_IP); + addr->port = ip4->sin_port; + addr->ipv4.s_addr = ip4->sin_addr.s_addr; return 0; #if IS_ENABLED(CONFIG_IPV6) - } else if ((sa_local.ss_family & sa_remote.ss_family) == AF_INET6) { - int atype; - struct sockaddr_in6 *ip6; + } else if (sa.ss_family == AF_INET6) { + struct sockaddr_in6 *ip6 = (struct sockaddr_in6 *)&sa; - ip6 = (struct sockaddr_in6 *)&sa_local; - atype = ipv6_addr_type(&ip6->sin6_addr); - if (__ipv6_addr_needs_scope_id(atype) && !ip6->sin6_scope_id) - return -EINVAL; + addr->proto = htons(ETH_P_IPV6); + addr->port = ip6->sin6_port; + memcpy(&addr->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); - local->proto = htons(ETH_P_IPV6); - local->port = ip6->sin6_port; - memcpy(&local->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); - ub->ifindex = ip6->sin6_scope_id; + /* Scope ID is only interesting for local addresses */ + if (scope_id) { + int atype; + + atype = ipv6_addr_type(&ip6->sin6_addr); + if (__ipv6_addr_needs_scope_id(atype) && + !ip6->sin6_scope_id) { + return -EINVAL; + } + + *scope_id = ip6->sin6_scope_id ? : 0; + } - ip6 = (struct sockaddr_in6 *)&sa_remote; - remote->proto = htons(ETH_P_IPV6); - remote->port = ip6->sin6_port; - memcpy(&remote->ipv6, &ip6->sin6_addr, sizeof(struct in6_addr)); return 0; #endif } return -EADDRNOTAVAIL; } +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) +{ + int err; + struct udp_media_addr addr = {0}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; + struct udp_media_addr *dst; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) + return -EINVAL; + + if (!opts[TIPC_NLA_UDP_REMOTE]) + return -EINVAL; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); + if (err) + return err; + + dst = (struct udp_media_addr *)&b->bcast_addr.value; + if (tipc_udp_is_mcast_addr(dst)) { + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); + return -EINVAL; + } + + if (tipc_udp_is_known_peer(b, &addr)) + return 0; + + return tipc_udp_rcast_add(b, &addr); +} + /** * tipc_udp_enable - callback to create a new udp bearer instance * @net: network namespace @@ -340,18 +643,38 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, { int err = -EINVAL; struct udp_bearer *ub; - struct udp_media_addr *remote; + struct udp_media_addr remote = {0}; struct udp_media_addr local = {0}; struct udp_port_cfg udp_conf = {0}; struct udp_tunnel_sock_cfg tuncfg = {NULL}; + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; ub = kzalloc(sizeof(*ub), GFP_ATOMIC); if (!ub) return -ENOMEM; - remote = (struct udp_media_addr *)&b->bcast_addr.value; - memset(remote, 0, sizeof(struct udp_media_addr)); - err = parse_options(attrs, ub, &local, remote); + INIT_LIST_HEAD(&ub->rcast.list); + + if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) + goto err; + + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, + attrs[TIPC_NLA_BEARER_UDP_OPTS], + tipc_nl_udp_policy)) + goto err; + + if (!opts[TIPC_NLA_UDP_LOCAL] || !opts[TIPC_NLA_UDP_REMOTE]) { + pr_err("Invalid UDP bearer configuration"); + err = -EINVAL; + goto err; + } + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_LOCAL], &local, + &ub->ifindex); + if (err) + goto err; + + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); if (err) goto err; @@ -396,9 +719,18 @@ static int tipc_udp_enable(struct net *net, struct tipc_bearer *b, tuncfg.encap_destroy = NULL; setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); - err = enable_mcast(ub, remote); + /** + * The bcast media address port is used for all peers and the ip + * is used if it's a multicast address. + */ + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); + if (tipc_udp_is_mcast_addr(&remote)) + err = enable_mcast(ub, &remote); + else + err = tipc_udp_rcast_add(b, &remote); if (err) goto err; + return 0; err: if (ub->ubsock) @@ -411,6 +743,12 @@ err: static void cleanup_bearer(struct work_struct *work) { struct udp_bearer *ub = container_of(work, struct udp_bearer, work); + struct udp_replicast *rcast, *tmp; + + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { + list_del_rcu(&rcast->list); + kfree_rcu(rcast, rcu); + } if (ub->ubsock) udp_tunnel_sock_release(ub->ubsock); diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h new file mode 100644 index 000000000000..281bbae87726 --- /dev/null +++ b/net/tipc/udp_media.h @@ -0,0 +1,46 @@ +/* + * net/tipc/udp_media.h: Include file for UDP bearer media + * + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB + * Copyright (c) 2005, 2010-2011, Wind River Systems + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef CONFIG_TIPC_MEDIA_UDP +#ifndef _TIPC_UDP_MEDIA_H +#define _TIPC_UDP_MEDIA_H + +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); +int tipc_udp_nl_add_bearer_data(struct tipc_nl_msg *msg, struct tipc_bearer *b); +int tipc_udp_nl_dump_remoteip(struct sk_buff *skb, struct netlink_callback *cb); + +#endif +#endif diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 0f506220a3bd..5497d022fada 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -372,6 +372,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: @@ -946,6 +947,7 @@ cfg80211_get_chan_state(struct wireless_dev *wdev, case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* these interface types don't really have a channel */ return; case NL80211_IFTYPE_UNSPECIFIED: diff --git a/net/wireless/core.c b/net/wireless/core.c index 7645e97362c0..8201e6d7449e 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -225,6 +225,23 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, } } +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) + return; + + if (!wdev->nan_started) + return; + + rdev_stop_nan(rdev, wdev); + wdev->nan_started = false; + + rdev->opencount--; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -242,6 +259,9 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: break; } @@ -537,6 +557,11 @@ static int wiphy_verify_combinations(struct wiphy *wiphy) c->limits[j].max > 1)) return -EINVAL; + /* Only a single NAN can be allowed */ + if (WARN_ON(types & BIT(NL80211_IFTYPE_NAN) && + c->limits[j].max > 1)) + return -EINVAL; + cnt += c->limits[j].max; /* * Don't advertise an unsupported type @@ -579,6 +604,11 @@ int wiphy_register(struct wiphy *wiphy) !rdev->ops->tdls_cancel_channel_switch))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN)) && + (!rdev->ops->start_nan || !rdev->ops->stop_nan || + !rdev->ops->add_nan_func || !rdev->ops->del_nan_func))) + return -EINVAL; + /* * if a wiphy has unsupported modes for regulatory channel enforcement, * opt-out of enforcement checking @@ -589,6 +619,7 @@ int wiphy_register(struct wiphy *wiphy) BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_P2P_DEVICE) | + BIT(NL80211_IFTYPE_NAN) | BIT(NL80211_IFTYPE_AP_VLAN) | BIT(NL80211_IFTYPE_MONITOR))) wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF; @@ -906,6 +937,8 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) if (WARN_ON(wdev->netdev)) return; + nl80211_notify_iface(rdev, wdev, NL80211_CMD_DEL_INTERFACE); + list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -914,6 +947,9 @@ void cfg80211_unregister_wdev(struct wireless_dev *wdev) cfg80211_mlme_purge_registrations(wdev); cfg80211_stop_p2p_device(rdev, wdev); break; + case NL80211_IFTYPE_NAN: + cfg80211_stop_nan(rdev, wdev); + break; default: WARN_ON_ONCE(1); break; @@ -977,6 +1013,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev, /* must be handled by mac80211/driver, has no APIs */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: /* cannot happen, has no netdev */ break; case NL80211_IFTYPE_AP_VLAN: @@ -1079,6 +1116,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, wdev->iftype == NL80211_IFTYPE_P2P_CLIENT || wdev->iftype == NL80211_IFTYPE_ADHOC) && !wdev->use_4addr) dev->priv_flags |= IFF_DONT_BRIDGE; + + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); break; case NETDEV_GOING_DOWN: cfg80211_leave(rdev, wdev); @@ -1157,6 +1196,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, * remove and clean it up. */ if (!list_empty(&wdev->list)) { + nl80211_notify_iface(rdev, wdev, + NL80211_CMD_DEL_INTERFACE); sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; @@ -1246,7 +1287,7 @@ static int __init cfg80211_init(void) if (err) goto out_fail_reg; - cfg80211_wq = create_singlethread_workqueue("cfg80211"); + cfg80211_wq = alloc_ordered_workqueue("cfg80211", WQ_MEM_RECLAIM); if (!cfg80211_wq) { err = -ENOMEM; goto out_fail_wq; diff --git a/net/wireless/core.h b/net/wireless/core.h index eee91443924d..08d2e948c9ad 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -249,9 +249,9 @@ struct cfg80211_event { }; struct cfg80211_cached_keys { - struct key_params params[6]; - u8 data[6][WLAN_MAX_KEY_LEN]; - int def, defmgmt; + struct key_params params[CFG80211_MAX_WEP_KEYS]; + u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104]; + int def; }; enum cfg80211_chan_mode { @@ -488,6 +488,9 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + #define CFG80211_MAX_NUM_DIFFERENT_CHANNELS 10 #ifdef CONFIG_CFG80211_DEVELOPER_WARNINGS diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 4a4dda53bdf1..364f900a3dc4 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -43,7 +43,8 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, cfg80211_hold_bss(bss_from_pub(bss)); wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid, GFP_KERNEL); @@ -114,6 +115,9 @@ static int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev, } } + if (WARN_ON(connkeys && connkeys->def < 0)) + return -EINVAL; + if (WARN_ON(wdev->connect_keys)) kzfree(wdev->connect_keys); wdev->connect_keys = connkeys; @@ -284,18 +288,16 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev, if (!netif_running(wdev->netdev)) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys) wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; - } wdev->wext.ibss.privacy = wdev->wext.default_key != -1; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } err = __cfg80211_join_ibss(rdev, wdev->netdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index c284d883c349..cbb48e26a871 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -222,7 +222,7 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); if (auth_type == NL80211_AUTHTYPE_SHARED_KEY) - if (!key || !key_len || key_idx < 0 || key_idx > 4) + if (!key || !key_len || key_idx < 0 || key_idx > 3) return -EINVAL; if (wdev->current_bss && @@ -634,6 +634,7 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * fall through, P2P device only supports * public action frames */ + case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4809f4d2cdcc..c510810f0b7c 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -56,6 +56,7 @@ enum nl80211_multicast_groups { NL80211_MCGRP_REGULATORY, NL80211_MCGRP_MLME, NL80211_MCGRP_VENDOR, + NL80211_MCGRP_NAN, NL80211_MCGRP_TESTMODE /* keep last - ifdef! */ }; @@ -65,6 +66,7 @@ static const struct genl_multicast_group nl80211_mcgrps[] = { [NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG }, [NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME }, [NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR }, + [NL80211_MCGRP_NAN] = { .name = NL80211_MULTICAST_GROUP_NAN }, #ifdef CONFIG_NL80211_TESTMODE [NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE } #endif @@ -409,6 +411,9 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { .len = VHT_MUMIMO_GROUPS_DATA_LEN }, [NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = { .len = ETH_ALEN }, + [NL80211_ATTR_NAN_MASTER_PREF] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_DUAL] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED }, }; /* policy for the key attributes */ @@ -502,6 +507,39 @@ nl80211_bss_select_policy[NL80211_BSS_SELECT_ATTR_MAX + 1] = { }, }; +/* policy for NAN function attributes */ +static const struct nla_policy +nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = { + [NL80211_NAN_FUNC_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_SERVICE_ID] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_ID_LEN }, + [NL80211_NAN_FUNC_PUBLISH_TYPE] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_PUBLISH_BCAST] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_FOLLOW_UP_DEST] = { .len = ETH_ALEN }, + [NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG }, + [NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 }, + [NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SERVICE_SPEC_INFO_MAX_LEN }, + [NL80211_NAN_FUNC_SRF] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_RX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_TX_MATCH_FILTER] = { .type = NLA_NESTED }, + [NL80211_NAN_FUNC_INSTANCE_ID] = { .type = NLA_U8 }, + [NL80211_NAN_FUNC_TERM_REASON] = { .type = NLA_U8 }, +}; + +/* policy for Service Response Filter attributes */ +static const struct nla_policy +nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { + [NL80211_NAN_SRF_INCLUDE] = { .type = NLA_FLAG }, + [NL80211_NAN_SRF_BF] = { .type = NLA_BINARY, + .len = NL80211_NAN_FUNC_SRF_MAX_LEN }, + [NL80211_NAN_SRF_BF_IDX] = { .type = NLA_U8 }, + [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -848,13 +886,21 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, struct nlattr *key; struct cfg80211_cached_keys *result; int rem, err, def = 0; + bool have_key = false; + + nla_for_each_nested(key, keys, rem) { + have_key = true; + break; + } + + if (!have_key) + return NULL; result = kzalloc(sizeof(*result), GFP_KERNEL); if (!result) return ERR_PTR(-ENOMEM); result->def = -1; - result->defmgmt = -1; nla_for_each_nested(key, keys, rem) { memset(&parse, 0, sizeof(parse)); @@ -866,7 +912,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, err = -EINVAL; if (!parse.p.key) goto error; - if (parse.idx < 0 || parse.idx > 4) + if (parse.idx < 0 || parse.idx > 3) goto error; if (parse.def) { if (def) @@ -881,16 +927,24 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, parse.idx, false, NULL); if (err) goto error; + if (parse.p.cipher != WLAN_CIPHER_SUITE_WEP40 && + parse.p.cipher != WLAN_CIPHER_SUITE_WEP104) { + err = -EINVAL; + goto error; + } result->params[parse.idx].cipher = parse.p.cipher; result->params[parse.idx].key_len = parse.p.key_len; result->params[parse.idx].key = result->data[parse.idx]; memcpy(result->data[parse.idx], parse.p.key, parse.p.key_len); - if (parse.p.cipher == WLAN_CIPHER_SUITE_WEP40 || - parse.p.cipher == WLAN_CIPHER_SUITE_WEP104) { - if (no_ht) - *no_ht = true; - } + /* must be WEP key if we got here */ + if (no_ht) + *no_ht = true; + } + + if (result->def < 0) { + err = -EINVAL; + goto error; } return result; @@ -918,6 +972,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: @@ -2525,10 +2580,35 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * int if_idx = 0; int wp_start = cb->args[0]; int if_start = cb->args[1]; + int filter_wiphy = -1; struct cfg80211_registered_device *rdev; struct wireless_dev *wdev; rtnl_lock(); + if (!cb->args[2]) { + struct nl80211_dump_wiphy_state state = { + .filter_wiphy = -1, + }; + int ret; + + ret = nl80211_dump_wiphy_parse(skb, cb, &state); + if (ret) + return ret; + + filter_wiphy = state.filter_wiphy; + + /* + * if filtering, set cb->args[2] to +1 since 0 is the default + * value needed to determine that parsing is necessary. + */ + if (filter_wiphy >= 0) + cb->args[2] = filter_wiphy + 1; + else + cb->args[2] = -1; + } else if (cb->args[2] > 0) { + filter_wiphy = cb->args[2] - 1; + } + list_for_each_entry(rdev, &cfg80211_rdev_list, list) { if (!net_eq(wiphy_net(&rdev->wiphy), sock_net(skb->sk))) continue; @@ -2536,6 +2616,10 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * wp_idx++; continue; } + + if (filter_wiphy >= 0 && filter_wiphy != rdev->wiphy_idx) + continue; + if_idx = 0; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { @@ -2751,7 +2835,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct vif_params params; struct wireless_dev *wdev; - struct sk_buff *msg, *event; + struct sk_buff *msg; int err; enum nl80211_iftype type = NL80211_IFTYPE_UNSPECIFIED; u32 flags; @@ -2774,7 +2858,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) !(rdev->wiphy.interface_modes & (1 << type))) return -EOPNOTSUPP; - if ((type == NL80211_IFTYPE_P2P_DEVICE || + if ((type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN || rdev->wiphy.features & NL80211_FEATURE_MAC_ON_CREATE) && info->attrs[NL80211_ATTR_MAC]) { nla_memcpy(params.macaddr, info->attrs[NL80211_ATTR_MAC], @@ -2830,9 +2914,10 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) wdev->mesh_id_up_len); wdev_unlock(wdev); break; + case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_P2P_DEVICE: /* - * P2P Device doesn't have a netdev, so doesn't go + * P2P Device and NAN do not have a netdev, so don't go * through the netdev notifier and must be added here */ mutex_init(&wdev->mtx); @@ -2855,20 +2940,15 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) return -ENOBUFS; } - event = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (event) { - if (nl80211_send_iface(event, 0, 0, 0, - rdev, wdev, false) < 0) { - nlmsg_free(event); - goto out; - } + /* + * For wdevs which have no associated netdev object (e.g. of type + * NL80211_IFTYPE_P2P_DEVICE), emit the NEW_INTERFACE event here. + * For all other types, the event will be generated from the + * netdev notifier + */ + if (!wdev->netdev) + nl80211_notify_iface(rdev, wdev, NL80211_CMD_NEW_INTERFACE); - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - event, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - } - -out: return genlmsg_reply(msg, info); } @@ -2876,18 +2956,10 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct wireless_dev *wdev = info->user_ptr[1]; - struct sk_buff *msg; - int status; if (!rdev->ops->del_virtual_intf) return -EOPNOTSUPP; - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (msg && nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, true) < 0) { - nlmsg_free(msg); - msg = NULL; - } - /* * If we remove a wireless device without a netdev then clear * user_ptr[1] so that nl80211_post_doit won't dereference it @@ -2898,15 +2970,7 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) if (!wdev->netdev) info->user_ptr[1] = NULL; - status = rdev_del_virtual_intf(rdev, wdev); - if (status >= 0 && msg) - genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), - msg, 0, NL80211_MCGRP_CONFIG, - GFP_KERNEL); - else - nlmsg_free(msg); - - return status; + return rdev_del_virtual_intf(rdev, wdev); } static int nl80211_set_noack_map(struct sk_buff *skb, struct genl_info *info) @@ -3316,6 +3380,291 @@ static int nl80211_set_mac_acl(struct sk_buff *skb, struct genl_info *info) return err; } +static u32 rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len) +{ + u8 i; + u32 mask = 0; + + for (i = 0; i < rates_len; i++) { + int rate = (rates[i] & 0x7f) * 5; + int ridx; + + for (ridx = 0; ridx < sband->n_bitrates; ridx++) { + struct ieee80211_rate *srate = + &sband->bitrates[ridx]; + if (rate == srate->bitrate) { + mask |= 1 << ridx; + break; + } + } + if (ridx == sband->n_bitrates) + return 0; /* rate not found */ + } + + return mask; +} + +static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, + u8 *rates, u8 rates_len, + u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) +{ + u8 i; + + memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); + + for (i = 0; i < rates_len; i++) { + int ridx, rbit; + + ridx = rates[i] / 8; + rbit = BIT(rates[i] % 8); + + /* check validity */ + if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) + return false; + + /* check availability */ + if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) + mcs[ridx] |= rbit; + else + return false; + } + + return true; +} + +static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) +{ + u16 mcs_mask = 0; + + switch (vht_mcs_map) { + case IEEE80211_VHT_MCS_NOT_SUPPORTED: + break; + case IEEE80211_VHT_MCS_SUPPORT_0_7: + mcs_mask = 0x00FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_8: + mcs_mask = 0x01FF; + break; + case IEEE80211_VHT_MCS_SUPPORT_0_9: + mcs_mask = 0x03FF; + break; + default: + break; + } + + return mcs_mask; +} + +static void vht_build_mcs_mask(u16 vht_mcs_map, + u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) +{ + u8 nss; + + for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { + vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); + vht_mcs_map >>= 2; + } +} + +static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, + struct nl80211_txrate_vht *txrate, + u16 mcs[NL80211_VHT_NSS_MAX]) +{ + u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; + u8 i; + + if (!sband->vht_cap.vht_supported) + return false; + + memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); + + /* Build vht_mcs_mask from VHT capabilities */ + vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) + mcs[i] = txrate->mcs[i]; + else + return false; + } + + return true; +} + +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { + [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_RATES }, + [NL80211_TXRATE_HT] = { .type = NLA_BINARY, + .len = NL80211_MAX_SUPP_HT_RATES }, + [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, + [NL80211_TXRATE_GI] = { .type = NLA_U8 }, +}; + +static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, + struct cfg80211_bitrate_mask *mask) +{ + struct nlattr *tb[NL80211_TXRATE_MAX + 1]; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + int rem, i; + struct nlattr *tx_rates; + struct ieee80211_supported_band *sband; + u16 vht_tx_mcs_map; + + memset(mask, 0, sizeof(*mask)); + /* Default to all rates enabled */ + for (i = 0; i < NUM_NL80211_BANDS; i++) { + sband = rdev->wiphy.bands[i]; + + if (!sband) + continue; + + mask->control[i].legacy = (1 << sband->n_bitrates) - 1; + memcpy(mask->control[i].ht_mcs, + sband->ht_cap.mcs.rx_mask, + sizeof(mask->control[i].ht_mcs)); + + if (!sband->vht_cap.vht_supported) + continue; + + vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); + vht_build_mcs_mask(vht_tx_mcs_map, mask->control[i].vht_mcs); + } + + /* if no rates are given set it back to the defaults */ + if (!info->attrs[NL80211_ATTR_TX_RATES]) + goto out; + + /* The nested attribute uses enum nl80211_band as the index. This maps + * directly to the enum nl80211_band values used in cfg80211. + */ + BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); + nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { + enum nl80211_band band = nla_type(tx_rates); + int err; + + if (band < 0 || band >= NUM_NL80211_BANDS) + return -EINVAL; + sband = rdev->wiphy.bands[band]; + if (sband == NULL) + return -EINVAL; + err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), + nla_len(tx_rates), nl80211_txattr_policy); + if (err) + return err; + if (tb[NL80211_TXRATE_LEGACY]) { + mask->control[band].legacy = rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_LEGACY]), + nla_len(tb[NL80211_TXRATE_LEGACY])); + if ((mask->control[band].legacy == 0) && + nla_len(tb[NL80211_TXRATE_LEGACY])) + return -EINVAL; + } + if (tb[NL80211_TXRATE_HT]) { + if (!ht_rateset_to_mask( + sband, + nla_data(tb[NL80211_TXRATE_HT]), + nla_len(tb[NL80211_TXRATE_HT]), + mask->control[band].ht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_VHT]) { + if (!vht_set_mcs_mask( + sband, + nla_data(tb[NL80211_TXRATE_VHT]), + mask->control[band].vht_mcs)) + return -EINVAL; + } + if (tb[NL80211_TXRATE_GI]) { + mask->control[band].gi = + nla_get_u8(tb[NL80211_TXRATE_GI]); + if (mask->control[band].gi > NL80211_TXRATE_FORCE_LGI) + return -EINVAL; + } + + if (mask->control[band].legacy == 0) { + /* don't allow empty legacy rates if HT or VHT + * are not even supported. + */ + if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || + rdev->wiphy.bands[band]->vht_cap.vht_supported)) + return -EINVAL; + + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) + if (mask->control[band].ht_mcs[i]) + goto out; + + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) + if (mask->control[band].vht_mcs[i]) + goto out; + + /* legacy and mcs rates may not be both empty */ + return -EINVAL; + } + } + +out: + return 0; +} + +static int validate_beacon_tx_rate(struct cfg80211_registered_device *rdev, + enum nl80211_band band, + struct cfg80211_bitrate_mask *beacon_rate) +{ + u32 count_ht, count_vht, i; + u32 rate = beacon_rate->control[band].legacy; + + /* Allow only one rate */ + if (hweight32(rate) > 1) + return -EINVAL; + + count_ht = 0; + for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) { + if (hweight8(beacon_rate->control[band].ht_mcs[i]) > 1) { + return -EINVAL; + } else if (beacon_rate->control[band].ht_mcs[i]) { + count_ht++; + if (count_ht > 1) + return -EINVAL; + } + if (count_ht && rate) + return -EINVAL; + } + + count_vht = 0; + for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { + if (hweight16(beacon_rate->control[band].vht_mcs[i]) > 1) { + return -EINVAL; + } else if (beacon_rate->control[band].vht_mcs[i]) { + count_vht++; + if (count_vht > 1) + return -EINVAL; + } + if (count_vht && rate) + return -EINVAL; + } + + if ((count_ht && count_vht) || (!rate && !count_ht && !count_vht)) + return -EINVAL; + + if (rate && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) + return -EINVAL; + if (count_ht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_HT)) + return -EINVAL; + if (count_vht && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_BEACON_RATE_VHT)) + return -EINVAL; + + return 0; +} + static int nl80211_parse_beacon(struct nlattr *attrs[], struct cfg80211_beacon_data *bcn) { @@ -3545,6 +3894,17 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) wdev->iftype)) return -EINVAL; + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, ¶ms.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, params.chandef.chan->band, + ¶ms.beacon_rate); + if (err) + return err; + } + if (info->attrs[NL80211_ATTR_SMPS_MODE]) { params.smps_mode = nla_get_u8(info->attrs[NL80211_ATTR_SMPS_MODE]); @@ -5374,6 +5734,18 @@ static int nl80211_check_s32(const struct nlattr *nla, s32 min, s32 max, s32 *ou return 0; } +static int nl80211_check_power_mode(const struct nlattr *nla, + enum nl80211_mesh_power_mode min, + enum nl80211_mesh_power_mode max, + enum nl80211_mesh_power_mode *out) +{ + u32 val = nla_get_u32(nla); + if (val < min || val > max) + return -EINVAL; + *out = val; + return 0; +} + static int nl80211_parse_mesh_config(struct genl_info *info, struct mesh_config *cfg, u32 *mask_out) @@ -5518,7 +5890,7 @@ do { \ NL80211_MESH_POWER_ACTIVE, NL80211_MESH_POWER_MAX, mask, NL80211_MESHCONF_POWER_MODE, - nl80211_check_u32); + nl80211_check_power_mode); FILL_IN_MESH_PARAM_IF_SET(tb, cfg, dot11MeshAwakeWindowDuration, 0, 65535, mask, NL80211_MESHCONF_AWAKE_WINDOW, nl80211_check_u16); @@ -6102,6 +6474,9 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) wiphy = &rdev->wiphy; + if (wdev->iftype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + if (!rdev->ops->scan) return -EOPNOTSUPP; @@ -7368,7 +7743,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) (key.p.cipher != WLAN_CIPHER_SUITE_WEP104 || key.p.key_len != WLAN_KEY_LEN_WEP104)) return -EINVAL; - if (key.idx > 4) + if (key.idx > 3) return -EINVAL; } else { key.p.key_len = 0; @@ -7773,12 +8148,13 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.beacon_interval = 100; - if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { + if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) ibss.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (ibss.beacon_interval < 1 || ibss.beacon_interval > 10000) - return -EINVAL; - } + + err = cfg80211_validate_beacon_int(rdev, ibss.beacon_interval); + if (err) + return err; if (!rdev->ops->join_ibss) return -EOPNOTSUPP; @@ -7985,6 +8361,8 @@ __cfg80211_alloc_vendor_skb(struct cfg80211_registered_device *rdev, } data = nla_nest_start(skb, attr); + if (!data) + goto nla_put_failure; ((void **)skb->cb)[0] = rdev; ((void **)skb->cb)[1] = hdr; @@ -8602,238 +8980,21 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb, return rdev_cancel_remain_on_channel(rdev, wdev, cookie); } -static u32 rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len) -{ - u8 i; - u32 mask = 0; - - for (i = 0; i < rates_len; i++) { - int rate = (rates[i] & 0x7f) * 5; - int ridx; - - for (ridx = 0; ridx < sband->n_bitrates; ridx++) { - struct ieee80211_rate *srate = - &sband->bitrates[ridx]; - if (rate == srate->bitrate) { - mask |= 1 << ridx; - break; - } - } - if (ridx == sband->n_bitrates) - return 0; /* rate not found */ - } - - return mask; -} - -static bool ht_rateset_to_mask(struct ieee80211_supported_band *sband, - u8 *rates, u8 rates_len, - u8 mcs[IEEE80211_HT_MCS_MASK_LEN]) -{ - u8 i; - - memset(mcs, 0, IEEE80211_HT_MCS_MASK_LEN); - - for (i = 0; i < rates_len; i++) { - int ridx, rbit; - - ridx = rates[i] / 8; - rbit = BIT(rates[i] % 8); - - /* check validity */ - if ((ridx < 0) || (ridx >= IEEE80211_HT_MCS_MASK_LEN)) - return false; - - /* check availability */ - if (sband->ht_cap.mcs.rx_mask[ridx] & rbit) - mcs[ridx] |= rbit; - else - return false; - } - - return true; -} - -static u16 vht_mcs_map_to_mcs_mask(u8 vht_mcs_map) -{ - u16 mcs_mask = 0; - - switch (vht_mcs_map) { - case IEEE80211_VHT_MCS_NOT_SUPPORTED: - break; - case IEEE80211_VHT_MCS_SUPPORT_0_7: - mcs_mask = 0x00FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_8: - mcs_mask = 0x01FF; - break; - case IEEE80211_VHT_MCS_SUPPORT_0_9: - mcs_mask = 0x03FF; - break; - default: - break; - } - - return mcs_mask; -} - -static void vht_build_mcs_mask(u16 vht_mcs_map, - u16 vht_mcs_mask[NL80211_VHT_NSS_MAX]) -{ - u8 nss; - - for (nss = 0; nss < NL80211_VHT_NSS_MAX; nss++) { - vht_mcs_mask[nss] = vht_mcs_map_to_mcs_mask(vht_mcs_map & 0x03); - vht_mcs_map >>= 2; - } -} - -static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband, - struct nl80211_txrate_vht *txrate, - u16 mcs[NL80211_VHT_NSS_MAX]) -{ - u16 tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - u16 tx_mcs_mask[NL80211_VHT_NSS_MAX] = {}; - u8 i; - - if (!sband->vht_cap.vht_supported) - return false; - - memset(mcs, 0, sizeof(u16) * NL80211_VHT_NSS_MAX); - - /* Build vht_mcs_mask from VHT capabilities */ - vht_build_mcs_mask(tx_mcs_map, tx_mcs_mask); - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) { - if ((tx_mcs_mask[i] & txrate->mcs[i]) == txrate->mcs[i]) - mcs[i] = txrate->mcs[i]; - else - return false; - } - - return true; -} - -static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { - [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_RATES }, - [NL80211_TXRATE_HT] = { .type = NLA_BINARY, - .len = NL80211_MAX_SUPP_HT_RATES }, - [NL80211_TXRATE_VHT] = { .len = sizeof(struct nl80211_txrate_vht)}, - [NL80211_TXRATE_GI] = { .type = NLA_U8 }, -}; - static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, struct genl_info *info) { - struct nlattr *tb[NL80211_TXRATE_MAX + 1]; - struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct cfg80211_bitrate_mask mask; - int rem, i; + struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; - struct nlattr *tx_rates; - struct ieee80211_supported_band *sband; - u16 vht_tx_mcs_map; + int err; if (!rdev->ops->set_bitrate_mask) return -EOPNOTSUPP; - memset(&mask, 0, sizeof(mask)); - /* Default to all rates enabled */ - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = rdev->wiphy.bands[i]; + err = nl80211_parse_tx_bitrate_mask(info, &mask); + if (err) + return err; - if (!sband) - continue; - - mask.control[i].legacy = (1 << sband->n_bitrates) - 1; - memcpy(mask.control[i].ht_mcs, - sband->ht_cap.mcs.rx_mask, - sizeof(mask.control[i].ht_mcs)); - - if (!sband->vht_cap.vht_supported) - continue; - - vht_tx_mcs_map = le16_to_cpu(sband->vht_cap.vht_mcs.tx_mcs_map); - vht_build_mcs_mask(vht_tx_mcs_map, mask.control[i].vht_mcs); - } - - /* if no rates are given set it back to the defaults */ - if (!info->attrs[NL80211_ATTR_TX_RATES]) - goto out; - - /* - * The nested attribute uses enum nl80211_band as the index. This maps - * directly to the enum nl80211_band values used in cfg80211. - */ - BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); - nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) { - enum nl80211_band band = nla_type(tx_rates); - int err; - - if (band < 0 || band >= NUM_NL80211_BANDS) - return -EINVAL; - sband = rdev->wiphy.bands[band]; - if (sband == NULL) - return -EINVAL; - err = nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates), - nla_len(tx_rates), nl80211_txattr_policy); - if (err) - return err; - if (tb[NL80211_TXRATE_LEGACY]) { - mask.control[band].legacy = rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_LEGACY]), - nla_len(tb[NL80211_TXRATE_LEGACY])); - if ((mask.control[band].legacy == 0) && - nla_len(tb[NL80211_TXRATE_LEGACY])) - return -EINVAL; - } - if (tb[NL80211_TXRATE_HT]) { - if (!ht_rateset_to_mask( - sband, - nla_data(tb[NL80211_TXRATE_HT]), - nla_len(tb[NL80211_TXRATE_HT]), - mask.control[band].ht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_VHT]) { - if (!vht_set_mcs_mask( - sband, - nla_data(tb[NL80211_TXRATE_VHT]), - mask.control[band].vht_mcs)) - return -EINVAL; - } - if (tb[NL80211_TXRATE_GI]) { - mask.control[band].gi = - nla_get_u8(tb[NL80211_TXRATE_GI]); - if (mask.control[band].gi > NL80211_TXRATE_FORCE_LGI) - return -EINVAL; - } - - if (mask.control[band].legacy == 0) { - /* don't allow empty legacy rates if HT or VHT - * are not even supported. - */ - if (!(rdev->wiphy.bands[band]->ht_cap.ht_supported || - rdev->wiphy.bands[band]->vht_cap.vht_supported)) - return -EINVAL; - - for (i = 0; i < IEEE80211_HT_MCS_MASK_LEN; i++) - if (mask.control[band].ht_mcs[i]) - goto out; - - for (i = 0; i < NL80211_VHT_NSS_MAX; i++) - if (mask.control[band].vht_mcs[i]) - goto out; - - /* legacy and mcs rates may not be both empty */ - return -EINVAL; - } - } - -out: return rdev_set_bitrate_mask(rdev, dev, NULL, &mask); } @@ -8859,6 +9020,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -8904,6 +9066,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9020,6 +9183,7 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: break; + case NL80211_IFTYPE_NAN: default: return -EOPNOTSUPP; } @@ -9252,9 +9416,10 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_BEACON_INTERVAL]) { setup.beacon_interval = nla_get_u32(info->attrs[NL80211_ATTR_BEACON_INTERVAL]); - if (setup.beacon_interval < 10 || - setup.beacon_interval > 10000) - return -EINVAL; + + err = cfg80211_validate_beacon_int(rdev, setup.beacon_interval); + if (err) + return err; } if (info->attrs[NL80211_ATTR_DTIM_PERIOD]) { @@ -9300,6 +9465,17 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) return err; } + if (info->attrs[NL80211_ATTR_TX_RATES]) { + err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate); + if (err) + return err; + + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, + &setup.beacon_rate); + if (err) + return err; + } + return cfg80211_join_mesh(rdev, dev, &setup, &cfg); } @@ -9413,18 +9589,27 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, if (!freqs) return -ENOBUFS; - for (i = 0; i < req->n_channels; i++) - nla_put_u32(msg, i, req->channels[i]->center_freq); + for (i = 0; i < req->n_channels; i++) { + if (nla_put_u32(msg, i, req->channels[i]->center_freq)) + return -ENOBUFS; + } nla_nest_end(msg, freqs); if (req->n_match_sets) { matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH); + if (!matches) + return -ENOBUFS; + for (i = 0; i < req->n_match_sets; i++) { match = nla_nest_start(msg, i); - nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, - req->match_sets[i].ssid.ssid_len, - req->match_sets[i].ssid.ssid); + if (!match) + return -ENOBUFS; + + if (nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID, + req->match_sets[i].ssid.ssid_len, + req->match_sets[i].ssid.ssid)) + return -ENOBUFS; nla_nest_end(msg, match); } nla_nest_end(msg, matches); @@ -9436,6 +9621,9 @@ static int nl80211_send_wowlan_nd(struct sk_buff *msg, for (i = 0; i < req->n_scan_plans; i++) { scan_plan = nla_nest_start(msg, i + 1); + if (!scan_plan) + return -ENOBUFS; + if (!scan_plan || nla_put_u32(msg, NL80211_SCHED_SCAN_PLAN_INTERVAL, req->scan_plans[i].interval) || @@ -10362,6 +10550,549 @@ static int nl80211_stop_p2p_device(struct sk_buff *skb, struct genl_info *info) return 0; } +static int nl80211_start_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + int err; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (wdev->nan_started) + return -EEXIST; + + if (rfkill_blocked(rdev->rfkill)) + return -ERFKILL; + + if (!info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_NAN_DUAL]) + return -EINVAL; + + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (!conf.master_pref) + return -EINVAL; + + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + + err = rdev_start_nan(rdev, wdev, &conf); + if (err) + return err; + + wdev->nan_started = true; + rdev->opencount++; + + return 0; +} + +static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + cfg80211_stop_nan(rdev, wdev); + + return 0; +} + +static int validate_nan_filter(struct nlattr *filter_attr) +{ + struct nlattr *attr; + int len = 0, n_entries = 0, rem; + + nla_for_each_nested(attr, filter_attr, rem) { + len += nla_len(attr); + n_entries++; + } + + if (len >= U8_MAX) + return -EINVAL; + + return n_entries; +} + +static int handle_nan_filter(struct nlattr *attr_filter, + struct cfg80211_nan_func *func, + bool tx) +{ + struct nlattr *attr; + int n_entries, rem, i; + struct cfg80211_nan_func_filter *filter; + + n_entries = validate_nan_filter(attr_filter); + if (n_entries < 0) + return n_entries; + + BUILD_BUG_ON(sizeof(*func->rx_filters) != sizeof(*func->tx_filters)); + + filter = kcalloc(n_entries, sizeof(*func->rx_filters), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + i = 0; + nla_for_each_nested(attr, attr_filter, rem) { + filter[i].filter = kmemdup(nla_data(attr), nla_len(attr), + GFP_KERNEL); + filter[i].len = nla_len(attr); + i++; + } + if (tx) { + func->num_tx_filters = n_entries; + func->tx_filters = filter; + } else { + func->num_rx_filters = n_entries; + func->rx_filters = filter; + } + + return 0; +} + +static int nl80211_nan_add_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *tb[NUM_NL80211_NAN_FUNC_ATTR], *func_attr; + struct cfg80211_nan_func *func; + struct sk_buff *msg = NULL; + void *hdr = NULL; + int err = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_FUNC]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + err = nla_parse(tb, NL80211_NAN_FUNC_ATTR_MAX, + nla_data(info->attrs[NL80211_ATTR_NAN_FUNC]), + nla_len(info->attrs[NL80211_ATTR_NAN_FUNC]), + nl80211_nan_func_policy); + if (err) + return err; + + func = kzalloc(sizeof(*func), GFP_KERNEL); + if (!func) + return -ENOMEM; + + func->cookie = wdev->wiphy->cookie_counter++; + + if (!tb[NL80211_NAN_FUNC_TYPE] || + nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]) > NL80211_NAN_FUNC_MAX_TYPE) { + err = -EINVAL; + goto out; + } + + + func->type = nla_get_u8(tb[NL80211_NAN_FUNC_TYPE]); + + if (!tb[NL80211_NAN_FUNC_SERVICE_ID]) { + err = -EINVAL; + goto out; + } + + memcpy(func->service_id, nla_data(tb[NL80211_NAN_FUNC_SERVICE_ID]), + sizeof(func->service_id)); + + func->close_range = + nla_get_flag(tb[NL80211_NAN_FUNC_CLOSE_RANGE]); + + if (tb[NL80211_NAN_FUNC_SERVICE_INFO]) { + func->serv_spec_info_len = + nla_len(tb[NL80211_NAN_FUNC_SERVICE_INFO]); + func->serv_spec_info = + kmemdup(nla_data(tb[NL80211_NAN_FUNC_SERVICE_INFO]), + func->serv_spec_info_len, + GFP_KERNEL); + if (!func->serv_spec_info) { + err = -ENOMEM; + goto out; + } + } + + if (tb[NL80211_NAN_FUNC_TTL]) + func->ttl = nla_get_u32(tb[NL80211_NAN_FUNC_TTL]); + + switch (func->type) { + case NL80211_NAN_FUNC_PUBLISH: + if (!tb[NL80211_NAN_FUNC_PUBLISH_TYPE]) { + err = -EINVAL; + goto out; + } + + func->publish_type = + nla_get_u8(tb[NL80211_NAN_FUNC_PUBLISH_TYPE]); + func->publish_bcast = + nla_get_flag(tb[NL80211_NAN_FUNC_PUBLISH_BCAST]); + + if ((!(func->publish_type & NL80211_NAN_SOLICITED_PUBLISH)) && + func->publish_bcast) { + err = -EINVAL; + goto out; + } + break; + case NL80211_NAN_FUNC_SUBSCRIBE: + func->subscribe_active = + nla_get_flag(tb[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE]); + break; + case NL80211_NAN_FUNC_FOLLOW_UP: + if (!tb[NL80211_NAN_FUNC_FOLLOW_UP_ID] || + !tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]) { + err = -EINVAL; + goto out; + } + + func->followup_id = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_ID]); + func->followup_reqid = + nla_get_u8(tb[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID]); + memcpy(func->followup_dest.addr, + nla_data(tb[NL80211_NAN_FUNC_FOLLOW_UP_DEST]), + sizeof(func->followup_dest.addr)); + if (func->ttl) { + err = -EINVAL; + goto out; + } + break; + default: + err = -EINVAL; + goto out; + } + + if (tb[NL80211_NAN_FUNC_SRF]) { + struct nlattr *srf_tb[NUM_NL80211_NAN_SRF_ATTR]; + + err = nla_parse(srf_tb, NL80211_NAN_SRF_ATTR_MAX, + nla_data(tb[NL80211_NAN_FUNC_SRF]), + nla_len(tb[NL80211_NAN_FUNC_SRF]), NULL); + if (err) + goto out; + + func->srf_include = + nla_get_flag(srf_tb[NL80211_NAN_SRF_INCLUDE]); + + if (srf_tb[NL80211_NAN_SRF_BF]) { + if (srf_tb[NL80211_NAN_SRF_MAC_ADDRS] || + !srf_tb[NL80211_NAN_SRF_BF_IDX]) { + err = -EINVAL; + goto out; + } + + func->srf_bf_len = + nla_len(srf_tb[NL80211_NAN_SRF_BF]); + func->srf_bf = + kmemdup(nla_data(srf_tb[NL80211_NAN_SRF_BF]), + func->srf_bf_len, GFP_KERNEL); + if (!func->srf_bf) { + err = -ENOMEM; + goto out; + } + + func->srf_bf_idx = + nla_get_u8(srf_tb[NL80211_NAN_SRF_BF_IDX]); + } else { + struct nlattr *attr, *mac_attr = + srf_tb[NL80211_NAN_SRF_MAC_ADDRS]; + int n_entries, rem, i = 0; + + if (!mac_attr) { + err = -EINVAL; + goto out; + } + + n_entries = validate_acl_mac_addrs(mac_attr); + if (n_entries <= 0) { + err = -EINVAL; + goto out; + } + + func->srf_num_macs = n_entries; + func->srf_macs = + kzalloc(sizeof(*func->srf_macs) * n_entries, + GFP_KERNEL); + if (!func->srf_macs) { + err = -ENOMEM; + goto out; + } + + nla_for_each_nested(attr, mac_attr, rem) + memcpy(func->srf_macs[i++].addr, nla_data(attr), + sizeof(*func->srf_macs)); + } + } + + if (tb[NL80211_NAN_FUNC_TX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_TX_MATCH_FILTER], + func, true); + if (err) + goto out; + } + + if (tb[NL80211_NAN_FUNC_RX_MATCH_FILTER]) { + err = handle_nan_filter(tb[NL80211_NAN_FUNC_RX_MATCH_FILTER], + func, false); + if (err) + goto out; + } + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) { + err = -ENOMEM; + goto out; + } + + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, + NL80211_CMD_ADD_NAN_FUNCTION); + /* This can't really happen - we just allocated 4KB */ + if (WARN_ON(!hdr)) { + err = -ENOMEM; + goto out; + } + + err = rdev_add_nan_func(rdev, wdev, func); +out: + if (err < 0) { + cfg80211_free_nan_func(func); + nlmsg_free(msg); + return err; + } + + /* propagate the instance id and cookie to userspace */ + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, func->cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, + func->instance_id)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + + genlmsg_end(msg, hdr); + return genlmsg_reply(msg, info); + +nla_put_failure: + nlmsg_free(msg); + return -ENOBUFS; +} + +static int nl80211_nan_del_func(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + u64 cookie; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_COOKIE]) + return -EINVAL; + + if (wdev->owner_nlportid && + wdev->owner_nlportid != info->snd_portid) + return -ENOTCONN; + + cookie = nla_get_u64(info->attrs[NL80211_ATTR_COOKIE]); + + rdev_del_nan_func(rdev, wdev, cookie); + + return 0; +} + +static int nl80211_nan_change_config(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct cfg80211_nan_conf conf = {}; + u32 changed = 0; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev->nan_started) + return -ENOTCONN; + + if (info->attrs[NL80211_ATTR_NAN_MASTER_PREF]) { + conf.master_pref = + nla_get_u8(info->attrs[NL80211_ATTR_NAN_MASTER_PREF]); + if (conf.master_pref <= 1 || conf.master_pref == 255) + return -EINVAL; + + changed |= CFG80211_NAN_CONF_CHANGED_PREF; + } + + if (info->attrs[NL80211_ATTR_NAN_DUAL]) { + conf.dual = nla_get_u8(info->attrs[NL80211_ATTR_NAN_DUAL]); + changed |= CFG80211_NAN_CONF_CHANGED_DUAL; + } + + if (!changed) + return -EINVAL; + + return rdev_nan_change_conf(rdev, wdev, &conf, changed); +} + +void cfg80211_nan_match(struct wireless_dev *wdev, + struct cfg80211_nan_match_params *match, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct nlattr *match_attr, *local_func_attr, *peer_func_attr; + struct sk_buff *msg; + void *hdr; + + if (WARN_ON(!match->inst_id || !match->peer_inst_id || !match->addr)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_MATCH); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, match->cookie, + NL80211_ATTR_PAD) || + nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, match->addr)) + goto nla_put_failure; + + match_attr = nla_nest_start(msg, NL80211_ATTR_NAN_MATCH); + if (!match_attr) + goto nla_put_failure; + + local_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_LOCAL); + if (!local_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->inst_id)) + goto nla_put_failure; + + nla_nest_end(msg, local_func_attr); + + peer_func_attr = nla_nest_start(msg, NL80211_NAN_MATCH_FUNC_PEER); + if (!peer_func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_TYPE, match->type) || + nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, match->peer_inst_id)) + goto nla_put_failure; + + if (match->info && match->info_len && + nla_put(msg, NL80211_NAN_FUNC_SERVICE_INFO, match->info_len, + match->info)) + goto nla_put_failure; + + nla_nest_end(msg, peer_func_attr); + nla_nest_end(msg, match_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_match); + +void cfg80211_nan_func_terminated(struct wireless_dev *wdev, + u8 inst_id, + enum nl80211_nan_func_term_reason reason, + u64 cookie, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *func_attr; + void *hdr; + + if (WARN_ON(!inst_id)) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_NAN_FUNCTION); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + (wdev->netdev && nla_put_u32(msg, NL80211_ATTR_IFINDEX, + wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + if (nla_put_u64_64bit(msg, NL80211_ATTR_COOKIE, cookie, + NL80211_ATTR_PAD)) + goto nla_put_failure; + + func_attr = nla_nest_start(msg, NL80211_ATTR_NAN_FUNC); + if (!func_attr) + goto nla_put_failure; + + if (nla_put_u8(msg, NL80211_NAN_FUNC_INSTANCE_ID, inst_id) || + nla_put_u8(msg, NL80211_NAN_FUNC_TERM_REASON, reason)) + goto nla_put_failure; + + nla_nest_end(msg, func_attr); + genlmsg_end(msg, hdr); + + if (!wdev->owner_nlportid) + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), + msg, 0, NL80211_MCGRP_NAN, gfp); + else + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, + wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_func_terminated); + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -11063,7 +11794,14 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb, dev_hold(dev); } else if (ops->internal_flags & NL80211_FLAG_CHECK_NETDEV_UP) { - if (!wdev->p2p_started) { + if (wdev->iftype == NL80211_IFTYPE_P2P_DEVICE && + !wdev->p2p_started) { + if (rtnl) + rtnl_unlock(); + return -ENETDOWN; + } + if (wdev->iftype == NL80211_IFTYPE_NAN && + !wdev->nan_started) { if (rtnl) rtnl_unlock(); return -ENETDOWN; @@ -11696,6 +12434,46 @@ static const struct genl_ops nl80211_ops[] = { .internal_flags = NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_NEED_RTNL, }, + { + .cmd = NL80211_CMD_START_NAN, + .doit = nl80211_start_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_STOP_NAN, + .doit = nl80211_stop_nan, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_ADD_NAN_FUNCTION, + .doit = nl80211_nan_add_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_DEL_NAN_FUNCTION, + .doit = nl80211_nan_del_func, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, + { + .cmd = NL80211_CMD_CHANGE_NAN_CONFIG, + .doit = nl80211_nan_change_config, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + .internal_flags = NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NEED_RTNL, + }, { .cmd = NL80211_CMD_SET_MCAST_RATE, .doit = nl80211_set_mcast_rate, @@ -11847,6 +12625,29 @@ void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, NL80211_MCGRP_CONFIG, GFP_KERNEL); } +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd) +{ + struct sk_buff *msg; + + WARN_ON(cmd != NL80211_CMD_NEW_INTERFACE && + cmd != NL80211_CMD_DEL_INTERFACE); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + if (nl80211_send_iface(msg, 0, 0, 0, rdev, wdev, + cmd == NL80211_CMD_DEL_INTERFACE) < 0) { + nlmsg_free(msg); + return; + } + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_CONFIG, GFP_KERNEL); +} + static int nl80211_add_scan_req(struct sk_buff *msg, struct cfg80211_registered_device *rdev) { diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index a63f402b10b7..7e3821d7fcc5 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -7,6 +7,9 @@ int nl80211_init(void); void nl80211_exit(void); void nl80211_notify_wiphy(struct cfg80211_registered_device *rdev, enum nl80211_commands cmd); +void nl80211_notify_iface(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + enum nl80211_commands cmd); void nl80211_send_scan_start(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); struct sk_buff *nl80211_build_scan_msg(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index 85ff30bee2b9..11cf83c8ad4f 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -887,6 +887,64 @@ static inline void rdev_stop_p2p_device(struct cfg80211_registered_device *rdev, trace_rdev_return_void(&rdev->wiphy); } +static inline int rdev_start_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf) +{ + int ret; + + trace_rdev_start_nan(&rdev->wiphy, wdev, conf); + ret = rdev->ops->start_nan(&rdev->wiphy, wdev, conf); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_stop_nan(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + trace_rdev_stop_nan(&rdev->wiphy, wdev); + rdev->ops->stop_nan(&rdev->wiphy, wdev); + trace_rdev_return_void(&rdev->wiphy); +} + +static inline int +rdev_add_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_func *nan_func) +{ + int ret; + + trace_rdev_add_nan_func(&rdev->wiphy, wdev, nan_func); + ret = rdev->ops->add_nan_func(&rdev->wiphy, wdev, nan_func); + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline void rdev_del_nan_func(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, u64 cookie) +{ + trace_rdev_del_nan_func(&rdev->wiphy, wdev, cookie); + rdev->ops->del_nan_func(&rdev->wiphy, wdev, cookie); + trace_rdev_return_void(&rdev->wiphy); +} + +static inline int +rdev_nan_change_conf(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes) +{ + int ret; + + trace_rdev_nan_change_conf(&rdev->wiphy, wdev, conf, changes); + if (rdev->ops->nan_change_conf) + ret = rdev->ops->nan_change_conf(&rdev->wiphy, wdev, conf, + changes); + else + ret = -ENOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 0358e12be54b..b5bd58d0f731 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -352,52 +352,48 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *rdev) __cfg80211_bss_expire(rdev, jiffies - IEEE80211_SCAN_RESULT_EXPIRE); } -const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len) +const u8 *cfg80211_find_ie_match(u8 eid, const u8 *ies, int len, + const u8 *match, int match_len, + int match_offset) { - while (len > 2 && ies[0] != eid) { + /* match_offset can't be smaller than 2, unless match_len is + * zero, in which case match_offset must be zero as well. + */ + if (WARN_ON((match_len && match_offset < 2) || + (!match_len && match_offset))) + return NULL; + + while (len >= 2 && len >= ies[1] + 2) { + if ((ies[0] == eid) && + (ies[1] + 2 >= match_offset + match_len) && + !memcmp(ies + match_offset, match, match_len)) + return ies; + len -= ies[1] + 2; ies += ies[1] + 2; } - if (len < 2) - return NULL; - if (len < 2 + ies[1]) - return NULL; - return ies; + + return NULL; } -EXPORT_SYMBOL(cfg80211_find_ie); +EXPORT_SYMBOL(cfg80211_find_ie_match); const u8 *cfg80211_find_vendor_ie(unsigned int oui, int oui_type, const u8 *ies, int len) { - struct ieee80211_vendor_ie *ie; - const u8 *pos = ies, *end = ies + len; - int ie_oui; + const u8 *ie; + u8 match[] = { oui >> 16, oui >> 8, oui, oui_type }; + int match_len = (oui_type < 0) ? 3 : sizeof(match); if (WARN_ON(oui_type > 0xff)) return NULL; - while (pos < end) { - pos = cfg80211_find_ie(WLAN_EID_VENDOR_SPECIFIC, pos, - end - pos); - if (!pos) - return NULL; + ie = cfg80211_find_ie_match(WLAN_EID_VENDOR_SPECIFIC, ies, len, + match, match_len, 2); - ie = (struct ieee80211_vendor_ie *)pos; + if (ie && (ie[1] < 4)) + return NULL; - /* make sure we can access ie->len */ - BUILD_BUG_ON(offsetof(struct ieee80211_vendor_ie, len) != 1); - - if (ie->len < sizeof(*ie)) - goto cont; - - ie_oui = ie->oui[0] << 16 | ie->oui[1] << 8 | ie->oui[2]; - if (ie_oui == oui && - (oui_type < 0 || ie->oui_type == oui_type)) - return pos; -cont: - pos += 2 + ie->len; - } - return NULL; + return ie; } EXPORT_SYMBOL(cfg80211_find_vendor_ie); diff --git a/net/wireless/sme.c b/net/wireless/sme.c index add6824c44fd..a77db333927e 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -726,7 +726,8 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, wdev->current_bss = bss_from_pub(bss); - cfg80211_upload_connect_keys(wdev); + if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP)) + cfg80211_upload_connect_keys(wdev); rcu_read_lock(); country_ie = ieee80211_bss_get_ie(bss, WLAN_EID_COUNTRY); @@ -1043,6 +1044,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, connect->crypto.ciphers_pairwise[0] = cipher; } } + + connect->crypto.wep_keys = connkeys->params; + connect->crypto.wep_tx_key = connkeys->def; + } else { + if (WARN_ON(connkeys)) + return -EINVAL; } wdev->connect_keys = connkeys; diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index e46469bc130f..0082f4b01795 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -57,7 +57,7 @@ static ssize_t addresses_show(struct device *dev, return sprintf(buf, "%pM\n", wiphy->perm_addr); for (i = 0; i < wiphy->n_addresses; i++) - buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr); + buf += sprintf(buf, "%pM\n", wiphy->addresses[i].addr); return buf - start; } diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 72b5255cefe2..a3d0a91b1e09 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1889,6 +1889,96 @@ DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_p2p_device, TP_ARGS(wiphy, wdev) ); +TRACE_EVENT(rdev_start_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf), + TP_ARGS(wiphy, wdev, conf), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual) +); + +TRACE_EVENT(rdev_nan_change_conf, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_conf *conf, u32 changes), + TP_ARGS(wiphy, wdev, conf, changes), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, master_pref) + __field(u8, dual); + __field(u32, changes); + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->master_pref = conf->master_pref; + __entry->dual = conf->dual; + __entry->changes = changes; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", master preference: %u, dual: %d, changes: %x", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->master_pref, + __entry->dual, __entry->changes) +); + +DEFINE_EVENT(wiphy_wdev_evt, rdev_stop_nan, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev), + TP_ARGS(wiphy, wdev) +); + +TRACE_EVENT(rdev_add_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_nan_func *func), + TP_ARGS(wiphy, wdev, func), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u8, func_type) + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->func_type = func->type; + __entry->cookie = func->cookie + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", type=%u, cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->func_type, + __entry->cookie) +); + +TRACE_EVENT(rdev_del_nan_func, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + u64 cookie), + TP_ARGS(wiphy, wdev, cookie), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(u64, cookie) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->cookie = cookie; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie=%llu", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), diff --git a/net/wireless/util.c b/net/wireless/util.c index b7d1592bd5b8..8edce22d1b93 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -218,7 +218,7 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, bool pairwise, const u8 *mac_addr) { - if (key_idx > 5) + if (key_idx < 0 || key_idx > 5) return -EINVAL; if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) @@ -249,7 +249,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, /* Disallow BIP (group-only) cipher as pairwise cipher */ if (pairwise) return -EINVAL; + if (key_idx < 4) + return -EINVAL; break; + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + if (key_idx > 3) + return -EINVAL; default: break; } @@ -906,7 +912,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) if (!wdev->connect_keys) return; - for (i = 0; i < 6; i++) { + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) { if (!wdev->connect_keys->params[i].cipher) continue; if (rdev_add_key(rdev, dev, i, false, NULL, @@ -919,9 +925,6 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) netdev_err(dev, "failed to set defkey %d\n", i); continue; } - if (wdev->connect_keys->defmgmt == i) - if (rdev_set_default_mgmt_key(rdev, dev, i)) - netdev_err(dev, "failed to set mgtdef %d\n", i); } kzfree(wdev->connect_keys); @@ -1005,8 +1008,9 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; - /* cannot change into P2P device type */ - if (ntype == NL80211_IFTYPE_P2P_DEVICE) + /* cannot change into P2P device or NAN */ + if (ntype == NL80211_IFTYPE_P2P_DEVICE || + ntype == NL80211_IFTYPE_NAN) return -EOPNOTSUPP; if (!rdev->ops->change_virtual_intf || @@ -1085,6 +1089,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* not happening */ break; case NL80211_IFTYPE_P2P_DEVICE: + case NL80211_IFTYPE_NAN: WARN_ON(1); break; } @@ -1559,7 +1564,7 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev; int res = 0; - if (!beacon_int) + if (beacon_int < 10 || beacon_int > 10000) return -EINVAL; list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { @@ -1757,6 +1762,28 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_get_station); +void cfg80211_free_nan_func(struct cfg80211_nan_func *f) +{ + int i; + + if (!f) + return; + + kfree(f->serv_spec_info); + kfree(f->srf_bf); + kfree(f->srf_macs); + for (i = 0; i < f->num_rx_filters; i++) + kfree(f->rx_filters[i].filter); + + for (i = 0; i < f->num_tx_filters; i++) + kfree(f->tx_filters[i].filter); + + kfree(f->rx_filters); + kfree(f->tx_filters); + kfree(f); +} +EXPORT_SYMBOL(cfg80211_free_nan_func); + /* See IEEE 802.1H for LLC/SNAP encapsulation/decapsulation */ /* Ethernet-II snap header (RFC1042 for most EtherTypes) */ const unsigned char rfc1042_header[] __aligned(2) = diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 9f27221c8913..a220156cf217 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -406,12 +406,16 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (pairwise && !addr) return -EINVAL; + /* + * In many cases we won't actually need this, but it's better + * to do it first in case the allocation fails. Don't use wext. + */ if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), - GFP_KERNEL); + GFP_KERNEL); if (!wdev->wext.keys) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) wdev->wext.keys->params[i].key = wdev->wext.keys->data[i]; } @@ -460,7 +464,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (err == -ENOENT) err = 0; if (!err) { - if (!addr) { + if (!addr && idx < 4) { memset(wdev->wext.keys->data[idx], 0, sizeof(wdev->wext.keys->data[idx])); wdev->wext.keys->params[idx].key_len = 0; @@ -487,10 +491,19 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, err = 0; if (wdev->current_bss) err = rdev_add_key(rdev, dev, idx, pairwise, addr, params); + else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && + params->cipher != WLAN_CIPHER_SUITE_WEP104) + return -EINVAL; if (err) return err; - if (!addr) { + /* + * We only need to store WEP keys, since they're the only keys that + * can be be set before a connection is established and persist after + * disconnecting. + */ + if (!addr && (params->cipher == WLAN_CIPHER_SUITE_WEP40 || + params->cipher == WLAN_CIPHER_SUITE_WEP104)) { wdev->wext.keys->params[idx] = *params; memcpy(wdev->wext.keys->data[idx], params->key, params->key_len); diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index a4e8af3321d2..995163830a61 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -35,7 +35,6 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (wdev->wext.keys) { wdev->wext.keys->def = wdev->wext.default_key; - wdev->wext.keys->defmgmt = wdev->wext.default_mgmt_key; if (wdev->wext.default_key != -1) wdev->wext.connect.privacy = true; } @@ -43,11 +42,11 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, if (!wdev->wext.connect.ssid_len) return 0; - if (wdev->wext.keys) { + if (wdev->wext.keys && wdev->wext.keys->def != -1) { ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL); if (!ck) return -ENOMEM; - for (i = 0; i < 6; i++) + for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) ck->params[i].key = ck->data[i]; } diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index a750f330b8dd..f83b74d3e2ac 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1500,12 +1500,8 @@ out_fac_release: goto out_dtefac_release; if (dtefacs.calling_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.calling_ae == NULL) - goto out_dtefac_release; if (dtefacs.called_len > X25_MAX_AE_LEN) goto out_dtefac_release; - if (dtefacs.called_ae == NULL) - goto out_dtefac_release; x25->dte_facilities = dtefacs; rc = 0; out_dtefac_release: diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 250e567ba3d6..44ac85fe2bc9 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -17,7 +17,7 @@ #include #include #include -#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE) +#if IS_ENABLED(CONFIG_INET_ESP) || IS_ENABLED(CONFIG_INET6_ESP) #include #endif diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 45f9cf97ea25..fd6986634e6f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -49,6 +49,7 @@ static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] __read_mostly; static struct kmem_cache *xfrm_dst_cache __read_mostly; +static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); @@ -59,6 +60,11 @@ static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); +static inline bool xfrm_pol_hold_rcu(struct xfrm_policy *policy) +{ + return atomic_inc_not_zero(&policy->refcnt); +} + static inline bool __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl) { @@ -385,9 +391,11 @@ static struct hlist_head *policy_hash_bysel(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __sel_hash(sel, family, hmask, dbits, sbits); - return (hash == hmask + 1 ? - &net->xfrm.policy_inexact[dir] : - net->xfrm.policy_bydst[dir].table + hash); + if (hash == hmask + 1) + return &net->xfrm.policy_inexact[dir]; + + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static struct hlist_head *policy_hash_direct(struct net *net, @@ -403,7 +411,8 @@ static struct hlist_head *policy_hash_direct(struct net *net, __get_hash_thresh(net, family, dir, &dbits, &sbits); hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); - return net->xfrm.policy_bydst[dir].table + hash; + return rcu_dereference_check(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)) + hash; } static void xfrm_dst_hash_transfer(struct net *net, @@ -426,14 +435,14 @@ redo: h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, pol->family, nhashmask, dbits, sbits); if (!entry0) { - hlist_del(&pol->bydst); - hlist_add_head(&pol->bydst, ndsttable+h); + hlist_del_rcu(&pol->bydst); + hlist_add_head_rcu(&pol->bydst, ndsttable + h); h0 = h; } else { if (h != h0) continue; - hlist_del(&pol->bydst); - hlist_add_behind(&pol->bydst, entry0); + hlist_del_rcu(&pol->bydst); + hlist_add_behind_rcu(&pol->bydst, entry0); } entry0 = &pol->bydst; } @@ -468,22 +477,32 @@ static void xfrm_bydst_resize(struct net *net, int dir) unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; unsigned int nhashmask = xfrm_new_hash_mask(hmask); unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = net->xfrm.policy_bydst[dir].table; struct hlist_head *ndst = xfrm_hash_alloc(nsize); + struct hlist_head *odst; int i; if (!ndst) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_begin(&xfrm_policy_hash_generation); + + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); + + odst = rcu_dereference_protected(net->xfrm.policy_bydst[dir].table, + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); for (i = hmask; i >= 0; i--) xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); - net->xfrm.policy_bydst[dir].table = ndst; + rcu_assign_pointer(net->xfrm.policy_bydst[dir].table, ndst); net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + write_seqcount_end(&xfrm_policy_hash_generation); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); + + synchronize_rcu(); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } @@ -500,7 +519,7 @@ static void xfrm_byidx_resize(struct net *net, int total) if (!nidx) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -508,7 +527,7 @@ static void xfrm_byidx_resize(struct net *net, int total) net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -541,7 +560,6 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total) void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -550,7 +568,6 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); @@ -600,7 +617,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) rbits6 = net->xfrm.policy_hthresh.rbits6; } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /* reset the bydst and inexact table in all directions */ for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { @@ -646,7 +663,7 @@ static void xfrm_hash_rebuild(struct work_struct *work) hlist_add_head(&policy->bydst, chain); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); mutex_unlock(&hash_resize_mutex); } @@ -757,7 +774,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -768,7 +785,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -804,7 +821,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -824,7 +841,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, struct hlist_head *chain; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -837,7 +854,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -846,7 +863,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -865,7 +882,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, return NULL; *err = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -876,7 +893,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -885,7 +902,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, break; } } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -943,7 +960,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) @@ -959,14 +976,14 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -978,13 +995,13 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -993,7 +1010,7 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) if (!cnt) err = -ESRCH; out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -1013,7 +1030,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else @@ -1041,7 +1058,7 @@ int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk, } list_del_init(&walk->walk.all); out: - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -1060,9 +1077,9 @@ void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) if (list_empty(&walk->walk.all)) return; - write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -1100,17 +1117,24 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, struct xfrm_policy *pol, *ret; const xfrm_address_t *daddr, *saddr; struct hlist_head *chain; - u32 priority = ~0U; + unsigned int sequence; + u32 priority; daddr = xfrm_flowi_daddr(fl, family); saddr = xfrm_flowi_saddr(fl, family); if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&net->xfrm.xfrm_policy_lock); - chain = policy_hash_direct(net, daddr, saddr, family, dir); + rcu_read_lock(); + retry: + do { + sequence = read_seqcount_begin(&xfrm_policy_hash_generation); + chain = policy_hash_direct(net, daddr, saddr, family, dir); + } while (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)); + + priority = ~0U; ret = NULL; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1126,7 +1150,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } chain = &net->xfrm.policy_inexact[dir]; - hlist_for_each_entry(pol, chain, bydst) { + hlist_for_each_entry_rcu(pol, chain, bydst) { if ((pol->priority >= priority) && ret) break; @@ -1144,9 +1168,13 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, } } - xfrm_pol_hold(ret); + if (read_seqcount_retry(&xfrm_policy_hash_generation, sequence)) + goto retry; + + if (ret && !xfrm_pol_hold_rcu(ret)) + goto retry; fail: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return ret; } @@ -1223,10 +1251,9 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl) { struct xfrm_policy *pol; - struct net *net = sock_net(sk); rcu_read_lock(); - read_lock_bh(&net->xfrm.xfrm_policy_lock); + again: pol = rcu_dereference(sk->sk_policy[dir]); if (pol != NULL) { bool match = xfrm_selector_match(&pol->selector, fl, @@ -1241,8 +1268,8 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, policy_to_flow_dir(dir)); - if (!err) - xfrm_pol_hold(pol); + if (!err && !xfrm_pol_hold_rcu(pol)) + goto again; else if (err == -ESRCH) pol = NULL; else @@ -1251,7 +1278,6 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, pol = NULL; } out: - read_unlock_bh(&net->xfrm.xfrm_policy_lock); rcu_read_unlock(); return pol; } @@ -1275,7 +1301,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, /* Socket policies are not hashed. */ if (!hlist_unhashed(&pol->bydst)) { - hlist_del(&pol->bydst); + hlist_del_rcu(&pol->bydst); hlist_del(&pol->byidx); } @@ -1299,9 +1325,9 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1320,7 +1346,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) return -EINVAL; #endif - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); old_pol = rcu_dereference_protected(sk->sk_policy[dir], lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { @@ -1338,7 +1364,7 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) */ xfrm_sk_policy_unlink(old_pol, dir); } - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1368,9 +1394,9 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&net->xfrm.xfrm_policy_lock); + spin_lock_bh(&net->xfrm.xfrm_policy_lock); xfrm_sk_policy_link(newp, dir); - write_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; @@ -3052,7 +3078,7 @@ static int __net_init xfrm_net_init(struct net *net) /* Initialize the per-net locks here */ spin_lock_init(&net->xfrm.xfrm_state_lock); - rwlock_init(&net->xfrm.xfrm_policy_lock); + spin_lock_init(&net->xfrm.xfrm_policy_lock); mutex_init(&net->xfrm.xfrm_cfg_mutex); return 0; @@ -3086,6 +3112,7 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { register_pernet_subsys(&xfrm_net_ops); + seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); } @@ -3183,7 +3210,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + spin_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && @@ -3207,7 +3234,7 @@ static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector * xfrm_pol_hold(ret); - read_unlock_bh(&net->xfrm.xfrm_policy_lock); + spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 9c4fbd8935f4..ba2b539879bc 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -50,12 +50,18 @@ static const struct snmp_mib xfrm_mib_list[] = { static int xfrm_statistics_seq_show(struct seq_file *seq, void *v) { + unsigned long buff[LINUX_MIB_XFRMMAX]; struct net *net = seq->private; int i; + + memset(buff, 0, sizeof(unsigned long) * LINUX_MIB_XFRMMAX); + + snmp_get_cpu_field_batch(buff, xfrm_mib_list, + net->mib.xfrm_statistics); for (i = 0; xfrm_mib_list[i].name; i++) seq_printf(seq, "%-24s\t%lu\n", xfrm_mib_list[i].name, - snmp_fold_field(net->mib.xfrm_statistics, - xfrm_mib_list[i].entry)); + buff[i]); + return 0; } diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 4fd725a0c500..cdc2e2e71bff 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -558,7 +558,7 @@ static void xfrm_replay_advance_esn(struct xfrm_state *x, __be32 net_seq) x->repl->notify(x, XFRM_REPLAY_UPDATE); } -static struct xfrm_replay xfrm_replay_legacy = { +static const struct xfrm_replay xfrm_replay_legacy = { .advance = xfrm_replay_advance, .check = xfrm_replay_check, .recheck = xfrm_replay_check, @@ -566,7 +566,7 @@ static struct xfrm_replay xfrm_replay_legacy = { .overflow = xfrm_replay_overflow, }; -static struct xfrm_replay xfrm_replay_bmp = { +static const struct xfrm_replay xfrm_replay_bmp = { .advance = xfrm_replay_advance_bmp, .check = xfrm_replay_check_bmp, .recheck = xfrm_replay_check_bmp, @@ -574,7 +574,7 @@ static struct xfrm_replay xfrm_replay_bmp = { .overflow = xfrm_replay_overflow_bmp, }; -static struct xfrm_replay xfrm_replay_esn = { +static const struct xfrm_replay xfrm_replay_esn = { .advance = xfrm_replay_advance_esn, .check = xfrm_replay_check_esn, .recheck = xfrm_replay_recheck_esn, diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a30f898dc1c5..419bf5d463bd 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -28,6 +28,11 @@ #include "xfrm_hash.h" +#define xfrm_state_deref_prot(table, net) \ + rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) + +static void xfrm_state_gc_task(struct work_struct *work); + /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) @@ -36,6 +41,15 @@ */ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; +static __read_mostly seqcount_t xfrm_state_hash_generation = SEQCNT_ZERO(xfrm_state_hash_generation); + +static DECLARE_WORK(xfrm_state_gc_work, xfrm_state_gc_task); +static HLIST_HEAD(xfrm_state_gc_list); + +static inline bool xfrm_state_hold_rcu(struct xfrm_state __rcu *x) +{ + return atomic_inc_not_zero(&x->refcnt); +} static inline unsigned int xfrm_dst_hash(struct net *net, const xfrm_address_t *daddr, @@ -76,18 +90,18 @@ static void xfrm_hash_transfer(struct hlist_head *list, h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family, nhashmask); - hlist_add_head(&x->bydst, ndsttable+h); + hlist_add_head_rcu(&x->bydst, ndsttable + h); h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family, nhashmask); - hlist_add_head(&x->bysrc, nsrctable+h); + hlist_add_head_rcu(&x->bysrc, nsrctable + h); if (x->id.spi) { h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family, nhashmask); - hlist_add_head(&x->byspi, nspitable+h); + hlist_add_head_rcu(&x->byspi, nspitable + h); } } } @@ -122,25 +136,29 @@ static void xfrm_hash_resize(struct work_struct *work) } spin_lock_bh(&net->xfrm.xfrm_state_lock); + write_seqcount_begin(&xfrm_state_hash_generation); nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; + odst = xfrm_state_deref_prot(net->xfrm.state_bydst, net); for (i = net->xfrm.state_hmask; i >= 0; i--) - xfrm_hash_transfer(net->xfrm.state_bydst+i, ndst, nsrc, nspi, - nhashmask); + xfrm_hash_transfer(odst + i, ndst, nsrc, nspi, nhashmask); - odst = net->xfrm.state_bydst; - osrc = net->xfrm.state_bysrc; - ospi = net->xfrm.state_byspi; + osrc = xfrm_state_deref_prot(net->xfrm.state_bysrc, net); + ospi = xfrm_state_deref_prot(net->xfrm.state_byspi, net); ohashmask = net->xfrm.state_hmask; - net->xfrm.state_bydst = ndst; - net->xfrm.state_bysrc = nsrc; - net->xfrm.state_byspi = nspi; + rcu_assign_pointer(net->xfrm.state_bydst, ndst); + rcu_assign_pointer(net->xfrm.state_bysrc, nsrc); + rcu_assign_pointer(net->xfrm.state_byspi, nspi); net->xfrm.state_hmask = nhashmask; + write_seqcount_end(&xfrm_state_hash_generation); spin_unlock_bh(&net->xfrm.xfrm_state_lock); osize = (ohashmask + 1) * sizeof(struct hlist_head); + + synchronize_rcu(); + xfrm_hash_free(odst, osize); xfrm_hash_free(osrc, osize); xfrm_hash_free(ospi, osize); @@ -356,15 +374,16 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(struct work_struct *work) { - struct net *net = container_of(work, struct net, xfrm.state_gc_work); struct xfrm_state *x; struct hlist_node *tmp; struct hlist_head gc_list; spin_lock_bh(&xfrm_state_gc_lock); - hlist_move_list(&net->xfrm.state_gc_list, &gc_list); + hlist_move_list(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); + synchronize_rcu(); + hlist_for_each_entry_safe(x, tmp, &gc_list, gclist) xfrm_state_gc_destroy(x); } @@ -501,14 +520,12 @@ EXPORT_SYMBOL(xfrm_state_alloc); void __xfrm_state_destroy(struct xfrm_state *x) { - struct net *net = xs_net(x); - WARN_ON(x->km.state != XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->gclist, &net->xfrm.state_gc_list); + hlist_add_head(&x->gclist, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - schedule_work(&net->xfrm.state_gc_work); + schedule_work(&xfrm_state_gc_work); } EXPORT_SYMBOL(__xfrm_state_destroy); @@ -521,10 +538,10 @@ int __xfrm_state_delete(struct xfrm_state *x) x->km.state = XFRM_STATE_DEAD; spin_lock(&net->xfrm.xfrm_state_lock); list_del(&x->km.all); - hlist_del(&x->bydst); - hlist_del(&x->bysrc); + hlist_del_rcu(&x->bydst); + hlist_del_rcu(&x->bysrc); if (x->id.spi) - hlist_del(&x->byspi); + hlist_del_rcu(&x->byspi); net->xfrm.state_num--; spin_unlock(&net->xfrm.xfrm_state_lock); @@ -660,7 +677,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_byspi+h, byspi) { + hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto || @@ -669,7 +686,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -684,7 +702,7 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; - hlist_for_each_entry(x, net->xfrm.state_bysrc+h, bysrc) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { if (x->props.family != family || x->id.proto != proto || !xfrm_addr_equal(&x->id.daddr, daddr, family) || @@ -693,7 +711,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, if ((mark & x->mark.m) != x->mark.v) continue; - xfrm_state_hold(x); + if (!xfrm_state_hold_rcu(x)) + continue; return x; } @@ -776,13 +795,16 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, struct xfrm_state *best = NULL; u32 mark = pol->mark.v & pol->mark.m; unsigned short encap_family = tmpl->encap_family; + unsigned int sequence; struct km_event c; to_put = NULL; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + sequence = read_seqcount_begin(&xfrm_state_hash_generation); + + rcu_read_lock(); h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -798,7 +820,7 @@ xfrm_state_find(const xfrm_address_t *daddr, const xfrm_address_t *saddr, goto found; h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); - hlist_for_each_entry(x, net->xfrm.state_bydst+h_wildcard, bydst) { + hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { if (x->props.family == encap_family && x->props.reqid == tmpl->reqid && (mark & x->mark.m) == x->mark.v && @@ -851,19 +873,21 @@ found: } if (km_query(x, tmpl, pol) == 0) { + spin_lock_bh(&net->xfrm.xfrm_state_lock); x->km.state = XFRM_STATE_ACQ; list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, encap_family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, encap_family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); net->xfrm.state_num++; xfrm_hash_grow_check(net, x->bydst.next != NULL); + spin_unlock_bh(&net->xfrm.xfrm_state_lock); } else { x->km.state = XFRM_STATE_DEAD; to_put = x; @@ -872,13 +896,26 @@ found: } } out: - if (x) - xfrm_state_hold(x); - else + if (x) { + if (!xfrm_state_hold_rcu(x)) { + *err = -EAGAIN; + x = NULL; + } + } else { *err = acquire_in_progress ? -EAGAIN : error; - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + } + rcu_read_unlock(); if (to_put) xfrm_state_put(to_put); + + if (read_seqcount_retry(&xfrm_state_hash_generation, sequence)) { + *err = -EAGAIN; + if (x) { + xfrm_state_put(x); + x = NULL; + } + } + return x; } @@ -946,16 +983,16 @@ static void __xfrm_state_insert(struct xfrm_state *x) h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr, x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, &x->id.daddr, &x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); if (x->id.spi) { h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); } tasklet_hrtimer_start(&x->mtimer, ktime_set(1, 0), HRTIMER_MODE_REL); @@ -1064,9 +1101,9 @@ static struct xfrm_state *__find_acq_core(struct net *net, xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); list_add(&x->km.all, &net->xfrm.state_all); - hlist_add_head(&x->bydst, net->xfrm.state_bydst+h); + hlist_add_head_rcu(&x->bydst, net->xfrm.state_bydst + h); h = xfrm_src_hash(net, daddr, saddr, family); - hlist_add_head(&x->bysrc, net->xfrm.state_bysrc+h); + hlist_add_head_rcu(&x->bysrc, net->xfrm.state_bysrc + h); net->xfrm.state_num++; @@ -1395,9 +1432,9 @@ xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 { struct xfrm_state *x; - spin_lock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_lock(); x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); - spin_unlock_bh(&net->xfrm.xfrm_state_lock); + rcu_read_unlock(); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -1582,7 +1619,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) if (x->id.spi) { spin_lock_bh(&net->xfrm.xfrm_state_lock); h = xfrm_spi_hash(net, &x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, net->xfrm.state_byspi+h); + hlist_add_head_rcu(&x->byspi, net->xfrm.state_byspi + h); spin_unlock_bh(&net->xfrm.xfrm_state_lock); err = 0; @@ -2100,8 +2137,6 @@ int __net_init xfrm_state_init(struct net *net) net->xfrm.state_num = 0; INIT_WORK(&net->xfrm.state_hash_work, xfrm_hash_resize); - INIT_HLIST_HEAD(&net->xfrm.state_gc_list); - INIT_WORK(&net->xfrm.state_gc_work, xfrm_state_gc_task); spin_lock_init(&net->xfrm.xfrm_state_lock); return 0; @@ -2119,7 +2154,7 @@ void xfrm_state_fini(struct net *net) flush_work(&net->xfrm.state_hash_work); xfrm_state_flush(net, IPSEC_PROTO_ANY, false); - flush_work(&net->xfrm.state_gc_work); + flush_work(&xfrm_state_gc_work); WARN_ON(!list_empty(&net->xfrm.state_all)); diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 05a6e3d9c258..35a7e794ad04 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -17,13 +17,13 @@ static struct ctl_table xfrm_table[] = { .procname = "xfrm_aevent_etime", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_aevent_rseqth", .maxlen = sizeof(u32), .mode = 0644, - .proc_handler = proc_dointvec + .proc_handler = proc_douintvec }, { .procname = "xfrm_larval_drop", diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 90ebf7d35c07..12b7304d55dc 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -24,6 +24,9 @@ hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += xdp1 hostprogs-y += xdp2 +hostprogs-y += test_current_task_under_cgroup +hostprogs-y += trace_event +hostprogs-y += sampleip test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -49,6 +52,10 @@ test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o +test_current_task_under_cgroup-objs := bpf_load.o libbpf.o \ + test_current_task_under_cgroup_user.o +trace_event-objs := bpf_load.o libbpf.o trace_event_user.o +sampleip-objs := bpf_load.o libbpf.o sampleip_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -64,6 +71,7 @@ always += tracex6_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o +always += tcbpf2_kern.o always += lathist_kern.o always += offwaketime_kern.o always += spintest_kern.o @@ -74,6 +82,9 @@ always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o always += xdp1_kern.o always += xdp2_kern.o +always += test_current_task_under_cgroup_kern.o +always += trace_event_kern.o +always += sampleip_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -97,6 +108,9 @@ HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt HOSTLOADLIBES_xdp1 += -lelf HOSTLOADLIBES_xdp2 += -lelf +HOSTLOADLIBES_test_current_task_under_cgroup += -lelf +HOSTLOADLIBES_trace_event += -lelf +HOSTLOADLIBES_sampleip += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h index 7927a090fa0d..90f44bd2045e 100644 --- a/samples/bpf/bpf_helpers.h +++ b/samples/bpf/bpf_helpers.h @@ -37,12 +37,26 @@ static int (*bpf_clone_redirect)(void *ctx, int ifindex, int flags) = (void *) BPF_FUNC_clone_redirect; static int (*bpf_redirect)(int ifindex, int flags) = (void *) BPF_FUNC_redirect; -static int (*bpf_perf_event_output)(void *ctx, void *map, int index, void *data, int size) = +static int (*bpf_perf_event_output)(void *ctx, void *map, + unsigned long long flags, void *data, + int size) = (void *) BPF_FUNC_perf_event_output; static int (*bpf_get_stackid)(void *ctx, void *map, int flags) = (void *) BPF_FUNC_get_stackid; static int (*bpf_probe_write_user)(void *dst, void *src, int size) = (void *) BPF_FUNC_probe_write_user; +static int (*bpf_current_task_under_cgroup)(void *map, int index) = + (void *) BPF_FUNC_current_task_under_cgroup; +static int (*bpf_skb_get_tunnel_key)(void *ctx, void *key, int size, int flags) = + (void *) BPF_FUNC_skb_get_tunnel_key; +static int (*bpf_skb_set_tunnel_key)(void *ctx, void *key, int size, int flags) = + (void *) BPF_FUNC_skb_set_tunnel_key; +static int (*bpf_skb_get_tunnel_opt)(void *ctx, void *md, int size) = + (void *) BPF_FUNC_skb_get_tunnel_opt; +static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) = + (void *) BPF_FUNC_skb_set_tunnel_opt; +static unsigned long long (*bpf_get_prandom_u32)(void) = + (void *) BPF_FUNC_get_prandom_u32; /* llvm builtin functions that eBPF C program may use to * emit BPF_LD_ABS and BPF_LD_IND instructions diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 0cfda2320320..97913e109b14 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -51,6 +51,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; bool is_xdp = strncmp(event, "xdp", 3) == 0; + bool is_perf_event = strncmp(event, "perf_event", 10) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -69,6 +70,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_TRACEPOINT; } else if (is_xdp) { prog_type = BPF_PROG_TYPE_XDP; + } else if (is_perf_event) { + prog_type = BPF_PROG_TYPE_PERF_EVENT; } else { printf("Unknown event '%s'\n", event); return -1; @@ -82,7 +85,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; - if (is_xdp) + if (is_xdp || is_perf_event) return 0; if (is_socket) { @@ -326,6 +329,7 @@ int load_bpf_file(char *path) memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 || memcmp(shname_prog, "xdp", 3) == 0 || + memcmp(shname_prog, "perf_event", 10) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -344,6 +348,7 @@ int load_bpf_file(char *path) memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || memcmp(shname, "xdp", 3) == 0 || + memcmp(shname, "perf_event", 10) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/libbpf.h b/samples/bpf/libbpf.h index 364582b77888..ac6edb61b64a 100644 --- a/samples/bpf/libbpf.h +++ b/samples/bpf/libbpf.h @@ -85,6 +85,14 @@ extern char bpf_log_buf[LOG_BUF_SIZE]; .off = 0, \ .imm = IMM }) +#define BPF_MOV32_IMM(DST, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_ALU | BPF_MOV | BPF_K, \ + .dst_reg = DST, \ + .src_reg = 0, \ + .off = 0, \ + .imm = IMM }) + /* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ #define BPF_LD_IMM64(DST, IMM) \ BPF_LD_IMM64_RAW(DST, 0, IMM) diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c new file mode 100644 index 000000000000..774a681f374a --- /dev/null +++ b/samples/bpf/sampleip_kern.c @@ -0,0 +1,38 @@ +/* Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define MAX_IPS 8192 + +struct bpf_map_def SEC("maps") ip_map = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(u64), + .value_size = sizeof(u32), + .max_entries = MAX_IPS, +}; + +SEC("perf_event") +int do_sample(struct bpf_perf_event_data *ctx) +{ + u64 ip; + u32 *value, init_val = 1; + + ip = ctx->regs.ip; + value = bpf_map_lookup_elem(&ip_map, &ip); + if (value) + *value += 1; + else + /* E2BIG not tested for this example only */ + bpf_map_update_elem(&ip_map, &ip, &init_val, BPF_NOEXIST); + + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c new file mode 100644 index 000000000000..260a6bdd6413 --- /dev/null +++ b/samples/bpf/sampleip_user.c @@ -0,0 +1,196 @@ +/* + * sampleip: sample instruction pointer and frequency count in a BPF map. + * + * Copyright 2016 Netflix, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define DEFAULT_FREQ 99 +#define DEFAULT_SECS 5 +#define MAX_IPS 8192 +#define PAGE_OFFSET 0xffff880000000000 + +static int nr_cpus; + +static void usage(void) +{ + printf("USAGE: sampleip [-F freq] [duration]\n"); + printf(" -F freq # sample frequency (Hertz), default 99\n"); + printf(" duration # sampling duration (seconds), default 5\n"); +} + +static int sampling_start(int *pmu_fd, int freq) +{ + int i; + + struct perf_event_attr pe_sample_attr = { + .type = PERF_TYPE_SOFTWARE, + .freq = 1, + .sample_period = freq, + .config = PERF_COUNT_SW_CPU_CLOCK, + .inherit = 1, + }; + + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(&pe_sample_attr, -1 /* pid */, i, + -1 /* group_fd */, 0 /* flags */); + if (pmu_fd[i] < 0) { + fprintf(stderr, "ERROR: Initializing perf sampling\n"); + return 1; + } + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, + prog_fd[0]) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + } + + return 0; +} + +static void sampling_end(int *pmu_fd) +{ + int i; + + for (i = 0; i < nr_cpus; i++) + close(pmu_fd[i]); +} + +struct ipcount { + __u64 ip; + __u32 count; +}; + +/* used for sorting */ +struct ipcount counts[MAX_IPS]; + +static int count_cmp(const void *p1, const void *p2) +{ + return ((struct ipcount *)p1)->count - ((struct ipcount *)p2)->count; +} + +static void print_ip_map(int fd) +{ + struct ksym *sym; + __u64 key, next_key; + __u32 value; + int i, max; + + printf("%-19s %-32s %s\n", "ADDR", "KSYM", "COUNT"); + + /* fetch IPs and counts */ + key = 0, i = 0; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + counts[i].ip = next_key; + counts[i++].count = value; + key = next_key; + } + max = i; + + /* sort and print */ + qsort(counts, max, sizeof(struct ipcount), count_cmp); + for (i = 0; i < max; i++) { + if (counts[i].ip > PAGE_OFFSET) { + sym = ksym_search(counts[i].ip); + printf("0x%-17llx %-32s %u\n", counts[i].ip, sym->name, + counts[i].count); + } else { + printf("0x%-17llx %-32s %u\n", counts[i].ip, "(user)", + counts[i].count); + } + } + + if (max == MAX_IPS) { + printf("WARNING: IP hash was full (max %d entries); ", max); + printf("may have dropped samples\n"); + } +} + +static void int_exit(int sig) +{ + printf("\n"); + print_ip_map(map_fd[0]); + exit(0); +} + +int main(int argc, char **argv) +{ + char filename[256]; + int *pmu_fd, opt, freq = DEFAULT_FREQ, secs = DEFAULT_SECS; + + /* process arguments */ + while ((opt = getopt(argc, argv, "F:h")) != -1) { + switch (opt) { + case 'F': + freq = atoi(optarg); + break; + case 'h': + default: + usage(); + return 0; + } + } + if (argc - optind == 1) + secs = atoi(argv[optind]); + if (freq == 0 || secs == 0) { + usage(); + return 1; + } + + /* initialize kernel symbol translation */ + if (load_kallsyms()) { + fprintf(stderr, "ERROR: loading /proc/kallsyms\n"); + return 2; + } + + /* create perf FDs for each CPU */ + nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + pmu_fd = malloc(nr_cpus * sizeof(int)); + if (pmu_fd == NULL) { + fprintf(stderr, "ERROR: malloc of pmu_fd\n"); + return 1; + } + + /* load BPF program */ + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + fprintf(stderr, "ERROR: loading BPF program (errno %d):\n", + errno); + if (strcmp(bpf_log_buf, "") == 0) + fprintf(stderr, "Try: ulimit -l unlimited\n"); + else + fprintf(stderr, "%s", bpf_log_buf); + return 1; + } + signal(SIGINT, int_exit); + + /* do sampling */ + printf("Sampling at %d Hertz for %d seconds. Ctrl-C also ends.\n", + freq, secs); + if (sampling_start(pmu_fd, freq) != 0) + return 1; + sleep(secs); + sampling_end(pmu_fd); + free(pmu_fd); + + /* output sample counts */ + print_ip_map(map_fd[0]); + + return 0; +} diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c index ba0e177ff561..44e5846c988f 100644 --- a/samples/bpf/sockex2_kern.c +++ b/samples/bpf/sockex2_kern.c @@ -14,7 +14,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -59,7 +59,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { __u64 verlen; @@ -83,7 +83,7 @@ static inline __u64 parse_ip(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto } static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_proto, - struct flow_keys *flow) + struct bpf_flow_keys *flow) { *ip_proto = load_byte(skb, nhoff + offsetof(struct ipv6hdr, nexthdr)); @@ -96,7 +96,7 @@ static inline __u64 parse_ipv6(struct __sk_buff *skb, __u64 nhoff, __u64 *ip_pro return nhoff; } -static inline bool flow_dissector(struct __sk_buff *skb, struct flow_keys *flow) +static inline bool flow_dissector(struct __sk_buff *skb, struct bpf_flow_keys *flow) { __u64 nhoff = ETH_HLEN; __u64 ip_proto; @@ -198,7 +198,7 @@ struct bpf_map_def SEC("maps") hash_map = { SEC("socket2") int bpf_prog2(struct __sk_buff *skb) { - struct flow_keys flow; + struct bpf_flow_keys flow; struct pair *value; u32 key; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 41ae2fd21b13..95907f8d2b17 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -61,7 +61,7 @@ struct vlan_hdr { __be16 h_vlan_encapsulated_proto; }; -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -88,7 +88,7 @@ static inline __u32 ipv6_addr_hash(struct __sk_buff *ctx, __u64 off) } struct globals { - struct flow_keys flow; + struct bpf_flow_keys flow; }; struct bpf_map_def SEC("maps") percpu_map = { @@ -114,14 +114,14 @@ struct pair { struct bpf_map_def SEC("maps") hash_map = { .type = BPF_MAP_TYPE_HASH, - .key_size = sizeof(struct flow_keys), + .key_size = sizeof(struct bpf_flow_keys), .value_size = sizeof(struct pair), .max_entries = 1024, }; static void update_stats(struct __sk_buff *skb, struct globals *g) { - struct flow_keys key = g->flow; + struct bpf_flow_keys key = g->flow; struct pair *value; value = bpf_map_lookup_elem(&hash_map, &key); diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c index d4184ab5f3ac..3fcfd8c4b2a3 100644 --- a/samples/bpf/sockex3_user.c +++ b/samples/bpf/sockex3_user.c @@ -7,7 +7,7 @@ #include #include -struct flow_keys { +struct bpf_flow_keys { __be32 src; __be32 dst; union { @@ -49,7 +49,7 @@ int main(int argc, char **argv) (void) f; for (i = 0; i < 5; i++) { - struct flow_keys key = {}, next_key; + struct bpf_flow_keys key = {}, next_key; struct pair value; sleep(1); diff --git a/samples/bpf/tcbpf2_kern.c b/samples/bpf/tcbpf2_kern.c new file mode 100644 index 000000000000..3303bb85593b --- /dev/null +++ b/samples/bpf/tcbpf2_kern.c @@ -0,0 +1,381 @@ +/* Copyright (c) 2016 VMware + * Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +#define _htonl __builtin_bswap32 +#define ERROR(ret) do {\ + char fmt[] = "ERROR line:%d ret:%d\n";\ + bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \ + } while(0) + +struct geneve_opt { + __be16 opt_class; + u8 type; + u8 length:5; + u8 r3:1; + u8 r2:1; + u8 r1:1; + u8 opt_data[8]; /* hard-coded to 8 byte */ +}; + +struct vxlan_metadata { + u32 gbp; +}; + +SEC("gre_set_tunnel") +int _gre_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("gre_get_tunnel") +int _gre_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "key %d remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("vxlan_set_tunnel") +int _vxlan_set_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */ + ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("vxlan_get_tunnel") +int _vxlan_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct vxlan_metadata md; + char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, md.gbp); + + return TC_ACT_OK; +} + +SEC("geneve_set_tunnel") +int _geneve_set_tunnel(struct __sk_buff *skb) +{ + int ret, ret2; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + + __builtin_memset(&key, 0x0, sizeof(key)); + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + key.tunnel_id = 2; + key.tunnel_tos = 0; + key.tunnel_ttl = 64; + + __builtin_memset(&gopt, 0x0, sizeof(gopt)); + gopt.opt_class = 0x102; /* Open Virtual Networking (OVN) */ + gopt.type = 0x08; + gopt.r1 = 1; + gopt.r2 = 0; + gopt.r3 = 1; + gopt.length = 2; /* 4-byte multiple */ + *(int *) &gopt.opt_data = 0xdeadbeef; + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_ZERO_CSUM_TX); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("geneve_get_tunnel") +int _geneve_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + struct geneve_opt gopt; + char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt)); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), + key.tunnel_id, key.remote_ipv4, gopt.opt_class); + return TC_ACT_OK; +} + +SEC("ipip_set_tunnel") +int _ipip_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.tunnel_ttl = 64; + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) + return TC_ACT_SHOT; + + if (tcp->dest == htons(5200)) + key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */ + else if (tcp->dest == htons(5201)) + key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */ + else + return TC_ACT_SHOT; + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip_get_tunnel") +int _ipip_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip 0x%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4); + return TC_ACT_OK; +} + +SEC("ipip6_set_tunnel") +int _ipip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct iphdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->protocol == IPPROTO_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->protocol != IPPROTO_TCP || iph->ihl != 5) { + ERROR(iph->protocol); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ipip6_get_tunnel") +int _ipip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + +SEC("ip6ip6_set_tunnel") +int _ip6ip6_set_tunnel(struct __sk_buff *skb) +{ + struct bpf_tunnel_key key = {}; + void *data = (void *)(long)skb->data; + struct ipv6hdr *iph = data; + struct tcphdr *tcp = data + sizeof(*iph); + void *data_end = (void *)(long)skb->data_end; + int ret; + + /* single length check */ + if (data + sizeof(*iph) + sizeof(*tcp) > data_end) { + ERROR(1); + return TC_ACT_SHOT; + } + + key.remote_ipv6[0] = _htonl(0x2401db00); + key.tunnel_ttl = 64; + + if (iph->nexthdr == NEXTHDR_ICMP) { + key.remote_ipv6[3] = _htonl(1); + } else { + if (iph->nexthdr != NEXTHDR_TCP) { + ERROR(iph->nexthdr); + return TC_ACT_SHOT; + } + + if (tcp->dest == htons(5200)) { + key.remote_ipv6[3] = _htonl(1); + } else if (tcp->dest == htons(5201)) { + key.remote_ipv6[3] = _htonl(2); + } else { + ERROR(tcp->dest); + return TC_ACT_SHOT; + } + } + + ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + return TC_ACT_OK; +} + +SEC("ip6ip6_get_tunnel") +int _ip6ip6_get_tunnel(struct __sk_buff *skb) +{ + int ret; + struct bpf_tunnel_key key; + char fmt[] = "remote ip6 %x::%x\n"; + + ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), BPF_F_TUNINFO_IPV6); + if (ret < 0) { + ERROR(ret); + return TC_ACT_SHOT; + } + + bpf_trace_printk(fmt, sizeof(fmt), _htonl(key.remote_ipv6[0]), + _htonl(key.remote_ipv6[3])); + return TC_ACT_OK; +} + + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_current_task_under_cgroup_kern.c b/samples/bpf/test_current_task_under_cgroup_kern.c new file mode 100644 index 000000000000..86b28d7d6c99 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_kern.c @@ -0,0 +1,43 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#include +#include +#include +#include "bpf_helpers.h" +#include + +struct bpf_map_def SEC("maps") cgroup_map = { + .type = BPF_MAP_TYPE_CGROUP_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u32), + .max_entries = 1, +}; + +struct bpf_map_def SEC("maps") perf_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(u64), + .max_entries = 1, +}; + +/* Writes the last PID that called sync to a map at index 0 */ +SEC("kprobe/sys_sync") +int bpf_prog1(struct pt_regs *ctx) +{ + u64 pid = bpf_get_current_pid_tgid(); + int idx = 0; + + if (!bpf_current_task_under_cgroup(&cgroup_map, 0)) + return 0; + + bpf_map_update_elem(&perf_map, &idx, &pid, BPF_ANY); + return 0; +} + +char _license[] SEC("license") = "GPL"; +u32 _version SEC("version") = LINUX_VERSION_CODE; diff --git a/samples/bpf/test_current_task_under_cgroup_user.c b/samples/bpf/test_current_task_under_cgroup_user.c new file mode 100644 index 000000000000..30b0bce884f9 --- /dev/null +++ b/samples/bpf/test_current_task_under_cgroup_user.c @@ -0,0 +1,145 @@ +/* Copyright (c) 2016 Sargun Dhillon + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define _GNU_SOURCE +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define CGROUP_MOUNT_PATH "/mnt" +#define CGROUP_PATH "/mnt/my-cgroup" + +#define clean_errno() (errno == 0 ? "None" : strerror(errno)) +#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) + +static int join_cgroup(char *path) +{ + int fd, rc = 0; + pid_t pid = getpid(); + char cgroup_path[PATH_MAX + 1]; + + snprintf(cgroup_path, sizeof(cgroup_path), "%s/cgroup.procs", path); + + fd = open(cgroup_path, O_WRONLY); + if (fd < 0) { + log_err("Opening Cgroup"); + return 1; + } + + if (dprintf(fd, "%d\n", pid) < 0) { + log_err("Joining Cgroup"); + rc = 1; + } + close(fd); + return rc; +} + +int main(int argc, char **argv) +{ + char filename[256]; + int cg2, idx = 0; + pid_t remote_pid, local_pid = getpid(); + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + /* + * This is to avoid interfering with existing cgroups. Unfortunately, + * most people don't have cgroupv2 enabled at this point in time. + * It's easier to create our own mount namespace and manage it + * ourselves. + */ + if (unshare(CLONE_NEWNS)) { + log_err("unshare"); + return 1; + } + + if (mount("none", "/", NULL, MS_REC | MS_PRIVATE, NULL)) { + log_err("mount fakeroot"); + return 1; + } + + if (mount("none", CGROUP_MOUNT_PATH, "cgroup2", 0, NULL)) { + log_err("mount cgroup2"); + return 1; + } + + if (mkdir(CGROUP_PATH, 0777) && errno != EEXIST) { + log_err("mkdir cgroup"); + return 1; + } + + cg2 = open(CGROUP_PATH, O_RDONLY); + if (cg2 < 0) { + log_err("opening target cgroup"); + goto cleanup_cgroup_err; + } + + if (bpf_update_elem(map_fd[0], &idx, &cg2, BPF_ANY)) { + log_err("Adding target cgroup to map"); + goto cleanup_cgroup_err; + } + if (join_cgroup("/mnt/my-cgroup")) { + log_err("Leaving target cgroup"); + goto cleanup_cgroup_err; + } + + /* + * The installed helper program catched the sync call, and should + * write it to the map. + */ + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid != remote_pid) { + fprintf(stderr, + "BPF Helper didn't write correct PID to map, but: %d\n", + remote_pid); + goto leave_cgroup_err; + } + + /* Verify the negative scenario; leave the cgroup */ + if (join_cgroup(CGROUP_MOUNT_PATH)) + goto leave_cgroup_err; + + remote_pid = 0; + bpf_update_elem(map_fd[1], &idx, &remote_pid, BPF_ANY); + + sync(); + bpf_lookup_elem(map_fd[1], &idx, &remote_pid); + + if (local_pid == remote_pid) { + fprintf(stderr, "BPF cgroup negative test did not work\n"); + goto cleanup_cgroup_err; + } + + rmdir(CGROUP_PATH); + return 0; + + /* Error condition, cleanup */ +leave_cgroup_err: + join_cgroup(CGROUP_MOUNT_PATH); +cleanup_cgroup_err: + rmdir(CGROUP_PATH); + return 1; +} diff --git a/samples/bpf/test_ipip.sh b/samples/bpf/test_ipip.sh new file mode 100755 index 000000000000..196925403ab4 --- /dev/null +++ b/samples/bpf/test_ipip.sh @@ -0,0 +1,178 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip netns add at_ns1 + ip netns add at_ns2 + ip link add veth0 type veth peer name veth0b + ip link add veth1 type veth peer name veth1b + ip link add veth2 type veth peer name veth2b + ip link set veth0b up + ip link set veth1b up + ip link set veth2b up + ip link set dev veth0b mtu 1500 + ip link set dev veth1b mtu 1500 + ip link set dev veth2b mtu 1500 + ip link set veth0 netns at_ns0 + ip link set veth1 netns at_ns1 + ip link set veth2 netns at_ns2 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip netns exec at_ns1 ip addr add 172.16.1.101/24 dev veth1 + ip netns exec at_ns1 ip addr add 2401:db00::2/64 dev veth1 nodad + ip netns exec at_ns1 ip link set dev veth1 up + ip netns exec at_ns2 ip addr add 172.16.1.200/24 dev veth2 + ip netns exec at_ns2 ip addr add 2401:db00::3/64 dev veth2 nodad + ip netns exec at_ns2 ip link set dev veth2 up + ip link add br0 type bridge + ip link set br0 up + ip link set dev br0 mtu 1500 + ip link set veth0b master br0 + ip link set veth1b master br0 + ip link set veth2b master br0 +} + +function add_ipip_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ipip local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ipip local 172.16.1.101 remote 172.16.1.200 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ipip external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ipip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ipip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 10.1.1.100/24 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ipip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ip6ip6_tunnel { + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::1/64 remote 2401:db00::3/64 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 2601:646::1/64 + ip netns exec at_ns1 \ + ip link add dev $DEV_NS type ip6tnl mode ip6ip6 local 2401:db00::2/64 remote 2401:db00::3/64 + ip netns exec at_ns1 ip link set dev $DEV_NS up + # same inner IP address in at_ns0 and at_ns1 + ip netns exec at_ns1 ip addr add dev $DEV_NS 2601:646::1/64 + + ip netns exec at_ns2 ip link add dev $DEV type ip6tnl mode ip6ip6 external + ip netns exec at_ns2 ip link set dev $DEV up + ip netns exec at_ns2 ip addr add dev $DEV 2601:646::2/64 +} + +function attach_bpf { + DEV=$1 + SET_TUNNEL=$2 + GET_TUNNEL=$3 + ip netns exec at_ns2 tc qdisc add dev $DEV clsact + ip netns exec at_ns2 tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL + ip netns exec at_ns2 tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL +} + +function test_ipip { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip_tunnel + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv4 over IPv6 tunnel +function test_ipip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ipip6_tunnel + attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel + + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns2 ping -c 1 10.1.1.100 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5200 + ip netns exec at_ns2 iperf -c 10.1.1.100 -n 5k -p 5201 + cleanup +} + +# IPv6 over IPv6 tunnel +function test_ip6ip6 { + DEV_NS=ipip_std + DEV=ipip_bpf + config_device +# tcpdump -nei br0 & + cat /sys/kernel/debug/tracing/trace_pipe & + + add_ip6ip6_tunnel + attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel + + ip netns exec at_ns0 ping -6 -c 1 2601:646::2 + ip netns exec at_ns2 ping -6 -c 1 2601:646::1 + ip netns exec at_ns0 iperf -6sD -p 5200 > /dev/null + ip netns exec at_ns1 iperf -6sD -p 5201 > /dev/null + sleep 0.2 + # tcp check _same_ IP over different tunnels + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5200 + ip netns exec at_ns2 iperf -6c 2601:646::1 -n 5k -p 5201 + cleanup +} + +function cleanup { + set +ex + pkill iperf + ip netns delete at_ns0 + ip netns delete at_ns1 + ip netns delete at_ns2 + ip link del veth0 + ip link del veth1 + ip link del veth2 + ip link del br0 + pkill tcpdump + pkill cat + set -ex +} + +cleanup +echo "Testing IP tunnels..." +test_ipip +test_ipip6 +test_ip6ip6 +echo "*** PASS ***" diff --git a/samples/bpf/test_tunnel_bpf.sh b/samples/bpf/test_tunnel_bpf.sh new file mode 100755 index 000000000000..1ff634f187b7 --- /dev/null +++ b/samples/bpf/test_tunnel_bpf.sh @@ -0,0 +1,167 @@ +#!/bin/bash +# In Namespace 0 (at_ns0) using native tunnel +# Overlay IP: 10.1.1.100 +# local 192.16.1.100 remote 192.16.1.200 +# veth0 IP: 172.16.1.100, tunnel dev 00 + +# Out of Namespace using BPF set/get on lwtunnel +# Overlay IP: 10.1.1.200 +# local 172.16.1.200 remote 172.16.1.100 +# veth1 IP: 172.16.1.200, tunnel dev 11 + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth1 + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip link set dev veth0 up + ip link set dev veth1 up mtu 1500 + ip addr add dev veth1 172.16.1.200/24 +} + +function add_gre_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE key 2 local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE key 2 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +function add_vxlan_tunnel { + # Set static ARP entry here because iptables set-mark works + # on L3 packet, as a result not applying to ARP packets, + # causing errors at get_tunnel_{key/opt}. + + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 2 dstport 4789 gbp remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS address 52:54:00:d9:01:00 up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00 + ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF + + # out of namespace + ip link add dev $DEV type $TYPE external gbp dstport 4789 + ip link set dev $DEV address 52:54:00:d9:02:00 up + ip addr add dev $DEV 10.1.1.200/24 + arp -s 10.1.1.100 52:54:00:d9:01:00 +} + +function add_geneve_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE id 2 dstport 6081 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE dstport 6081 external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +function add_ipip_tunnel { + # in namespace + ip netns exec at_ns0 \ + ip link add dev $DEV_NS type $TYPE local 172.16.1.100 remote 172.16.1.200 + ip netns exec at_ns0 ip link set dev $DEV_NS up + ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24 + + # out of namespace + ip link add dev $DEV type $TYPE external + ip link set dev $DEV up + ip addr add dev $DEV 10.1.1.200/24 +} + +function attach_bpf { + DEV=$1 + SET_TUNNEL=$2 + GET_TUNNEL=$3 + tc qdisc add dev $DEV clsact + tc filter add dev $DEV egress bpf da obj tcbpf2_kern.o sec $SET_TUNNEL + tc filter add dev $DEV ingress bpf da obj tcbpf2_kern.o sec $GET_TUNNEL +} + +function test_gre { + TYPE=gretap + DEV_NS=gretap00 + DEV=gretap11 + config_device + add_gre_tunnel + attach_bpf $DEV gre_set_tunnel gre_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + +function test_vxlan { + TYPE=vxlan + DEV_NS=vxlan00 + DEV=vxlan11 + config_device + add_vxlan_tunnel + attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + +function test_geneve { + TYPE=geneve + DEV_NS=geneve00 + DEV=geneve11 + config_device + add_geneve_tunnel + attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + cleanup +} + +function test_ipip { + TYPE=ipip + DEV_NS=ipip00 + DEV=ipip11 + config_device + tcpdump -nei veth1 & + cat /sys/kernel/debug/tracing/trace_pipe & + add_ipip_tunnel + ethtool -K veth1 gso off gro off rx off tx off + ip link set dev veth1 mtu 1500 + attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel + ping -c 1 10.1.1.100 + ip netns exec at_ns0 ping -c 1 10.1.1.200 + ip netns exec at_ns0 iperf -sD -p 5200 > /dev/null + sleep 0.2 + iperf -c 10.1.1.100 -n 5k -p 5200 + cleanup +} + +function cleanup { + set +ex + pkill iperf + ip netns delete at_ns0 + ip link del veth1 + ip link del ipip11 + ip link del gretap11 + ip link del geneve11 + pkill tcpdump + pkill cat + set -ex +} + +cleanup +echo "Testing GRE tunnel..." +test_gre +echo "Testing VXLAN tunnel..." +test_vxlan +echo "Testing GENEVE tunnel..." +test_geneve +echo "Testing IPIP tunnel..." +test_ipip +echo "*** PASS ***" diff --git a/samples/bpf/test_verifier.c b/samples/bpf/test_verifier.c index fe2fcec98c1f..369ffaad3799 100644 --- a/samples/bpf/test_verifier.c +++ b/samples/bpf/test_verifier.c @@ -29,6 +29,7 @@ struct bpf_test { struct bpf_insn insns[MAX_INSNS]; int fixup[MAX_FIXUPS]; int prog_array_fixup[MAX_FIXUPS]; + int test_val_map_fixup[MAX_FIXUPS]; const char *errstr; const char *errstr_unpriv; enum { @@ -39,6 +40,19 @@ struct bpf_test { enum bpf_prog_type prog_type; }; +/* Note we want this to be 64 bit aligned so that the end of our array is + * actually the end of the structure. + */ +#define MAX_ENTRIES 11 +struct test_val { + unsigned index; + int foo[MAX_ENTRIES]; +}; + +struct other_val { + unsigned int action[32]; +}; + static struct bpf_test tests[] = { { "add+sub+mul", @@ -290,6 +304,29 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack", .result = REJECT, }, + { + "invalid argument register", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .errstr = "R1 !read_ok", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "non-invalid argument register", + .insns = { + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_6), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_cgroup_classid), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "check valid spill/fill", .insns = { @@ -1209,6 +1246,54 @@ static struct bpf_test tests[] = { .errstr = "invalid read from stack off -8+0 size 8", .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "raw_stack: skb_load_bytes, negative len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, -8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, negative len 2", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, ~0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "raw_stack: skb_load_bytes, zero len", + .insns = { + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid stack type R3", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, { "raw_stack: skb_load_bytes, no init", .insns = { @@ -1449,7 +1534,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test1", + "direct packet access: test1", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1466,7 +1551,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test2", + "direct packet access: test2", .insns = { BPF_MOV64_IMM(BPF_REG_0, 1), BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, @@ -1499,7 +1584,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, { - "pkt: test3", + "direct packet access: test3", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1511,7 +1596,7 @@ static struct bpf_test tests[] = { .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, }, { - "pkt: test4", + "direct packet access: test4 (write)", .insns = { BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct __sk_buff, data)), @@ -1524,10 +1609,780 @@ static struct bpf_test tests[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN(), }, - .errstr = "cannot write", + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test5 (pkt_end >= reg, good access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test6 (pkt_end >= reg, bad access)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", .result = REJECT, .prog_type = BPF_PROG_TYPE_SCHED_CLS, }, + { + "direct packet access: test7 (pkt_end >= reg, both accesses)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test8 (double test, variant 1)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test9 (double test, variant 2)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "direct packet access: test10 (write invalid)", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "invalid access to packet", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test1, valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result_unpriv = ACCEPT, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test2, unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test3, variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test4, packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test5, packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct xdp_md, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct xdp_md, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_XDP, + }, + { + "helper access to packet: test6, cls valid packet_ptr range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_2), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {5}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test7, cls unchecked packet_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {1}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test8, cls variable add", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10), + BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5), + BPF_MOV64_REG(BPF_REG_5, BPF_REG_4), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {11}, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test9, cls packet_ptr with bad range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {7}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test10, cls packet_ptr with too short range", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup = {6}, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test11, cls unsuitable helper 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 42), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test12, cls unsuitable helper 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_4, 4), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "helper access to the packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test13, cls helper ok", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test14, cls helper fail sub", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "type=inv expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test15, cls helper fail range 1", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 8), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test16, cls helper fail range 2", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, -9), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test17, cls helper fail range 3", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, ~0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test18, cls helper fail range zero", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test19, pkt end as input", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "R1 type=pkt_end expected=fp", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "helper access to packet: test20, wrong reg", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7), + BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6), + BPF_MOV64_IMM(BPF_REG_2, 4), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_MOV64_IMM(BPF_REG_4, 0), + BPF_MOV64_IMM(BPF_REG_5, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = REJECT, + .errstr = "invalid access to packet", + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + }, + { + "valid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, 4), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "valid map access into an array with a signed variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, + }, + { + "invalid map access into an array with a constant", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2, + offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a register", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is outside of the array range", + .result = REJECT, + }, + { + "invalid map access into an array with a variable", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with no floor check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), + BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1), + BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3}, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .result = REJECT, + }, + { + "invalid map access into an array with a invalid max check", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8), + BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct test_val, foo)), + BPF_EXIT_INSN(), + }, + .test_val_map_fixup = {3, 11}, + .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.", + .result = REJECT, + }, }; static int probe_filter_length(struct bpf_insn *fp) @@ -1541,12 +2396,12 @@ static int probe_filter_length(struct bpf_insn *fp) return len + 1; } -static int create_map(void) +static int create_map(size_t val_size, int num) { int map_fd; map_fd = bpf_create_map(BPF_MAP_TYPE_HASH, - sizeof(long long), sizeof(long long), 1024, 0); + sizeof(long long), val_size, num, 0); if (map_fd < 0) printf("failed to create map '%s'\n", strerror(errno)); @@ -1576,12 +2431,13 @@ static int test(void) int prog_len = probe_filter_length(prog); int *fixup = tests[i].fixup; int *prog_array_fixup = tests[i].prog_array_fixup; + int *test_val_map_fixup = tests[i].test_val_map_fixup; int expected_result; const char *expected_errstr; - int map_fd = -1, prog_array_fd = -1; + int map_fd = -1, prog_array_fd = -1, test_val_map_fd = -1; if (*fixup) { - map_fd = create_map(); + map_fd = create_map(sizeof(long long), 1024); do { prog[*fixup].imm = map_fd; @@ -1596,6 +2452,18 @@ static int test(void) prog_array_fixup++; } while (*prog_array_fixup); } + if (*test_val_map_fixup) { + /* Unprivileged can't create a hash map.*/ + if (unpriv) + continue; + test_val_map_fd = create_map(sizeof(struct test_val), + 256); + do { + prog[*test_val_map_fixup].imm = test_val_map_fd; + test_val_map_fixup++; + } while (*test_val_map_fixup); + } + printf("#%d %s ", i, tests[i].descr); prog_fd = bpf_prog_load(prog_type ?: BPF_PROG_TYPE_SOCKET_FILTER, @@ -1642,6 +2510,8 @@ fail: close(map_fd); if (prog_array_fd >= 0) close(prog_array_fd); + if (test_val_map_fd >= 0) + close(test_val_map_fd); close(prog_fd); } diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c new file mode 100644 index 000000000000..71a8ed32823e --- /dev/null +++ b/samples/bpf/trace_event_kern.c @@ -0,0 +1,65 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include "bpf_helpers.h" + +struct key_t { + char comm[TASK_COMM_LEN]; + u32 kernstack; + u32 userstack; +}; + +struct bpf_map_def SEC("maps") counts = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(struct key_t), + .value_size = sizeof(u64), + .max_entries = 10000, +}; + +struct bpf_map_def SEC("maps") stackmap = { + .type = BPF_MAP_TYPE_STACK_TRACE, + .key_size = sizeof(u32), + .value_size = PERF_MAX_STACK_DEPTH * sizeof(u64), + .max_entries = 10000, +}; + +#define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP) +#define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK) + +SEC("perf_event") +int bpf_prog1(struct bpf_perf_event_data *ctx) +{ + char fmt[] = "CPU-%d period %lld ip %llx"; + u32 cpu = bpf_get_smp_processor_id(); + struct key_t key; + u64 *val, one = 1; + + if (ctx->sample_period < 10000) + /* ignore warmup */ + return 0; + bpf_get_current_comm(&key.comm, sizeof(key.comm)); + key.kernstack = bpf_get_stackid(ctx, &stackmap, KERN_STACKID_FLAGS); + key.userstack = bpf_get_stackid(ctx, &stackmap, USER_STACKID_FLAGS); + if ((int)key.kernstack < 0 && (int)key.userstack < 0) { + bpf_trace_printk(fmt, sizeof(fmt), cpu, ctx->sample_period, + ctx->regs.ip); + return 0; + } + + val = bpf_map_lookup_elem(&counts, &key); + if (val) + (*val)++; + else + bpf_map_update_elem(&counts, &key, &one, BPF_NOEXIST); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c new file mode 100644 index 000000000000..9a130d31ecf2 --- /dev/null +++ b/samples/bpf/trace_event_user.c @@ -0,0 +1,213 @@ +/* Copyright (c) 2016 Facebook + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "libbpf.h" +#include "bpf_load.h" + +#define SAMPLE_FREQ 50 + +static bool sys_read_seen, sys_write_seen; + +static void print_ksym(__u64 addr) +{ + struct ksym *sym; + + if (!addr) + return; + sym = ksym_search(addr); + printf("%s;", sym->name); + if (!strcmp(sym->name, "sys_read")) + sys_read_seen = true; + else if (!strcmp(sym->name, "sys_write")) + sys_write_seen = true; +} + +static void print_addr(__u64 addr) +{ + if (!addr) + return; + printf("%llx;", addr); +} + +#define TASK_COMM_LEN 16 + +struct key_t { + char comm[TASK_COMM_LEN]; + __u32 kernstack; + __u32 userstack; +}; + +static void print_stack(struct key_t *key, __u64 count) +{ + __u64 ip[PERF_MAX_STACK_DEPTH] = {}; + static bool warned; + int i; + + printf("%3lld %s;", count, key->comm); + if (bpf_lookup_elem(map_fd[1], &key->kernstack, ip) != 0) { + printf("---;"); + } else { + for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) + print_ksym(ip[i]); + } + printf("-;"); + if (bpf_lookup_elem(map_fd[1], &key->userstack, ip) != 0) { + printf("---;"); + } else { + for (i = PERF_MAX_STACK_DEPTH - 1; i >= 0; i--) + print_addr(ip[i]); + } + printf("\n"); + + if (key->kernstack == -EEXIST && !warned) { + printf("stackmap collisions seen. Consider increasing size\n"); + warned = true; + } else if ((int)key->kernstack < 0 && (int)key->userstack < 0) { + printf("err stackid %d %d\n", key->kernstack, key->userstack); + } +} + +static void int_exit(int sig) +{ + kill(0, SIGKILL); + exit(0); +} + +static void print_stacks(void) +{ + struct key_t key = {}, next_key; + __u64 value; + __u32 stackid = 0, next_id; + int fd = map_fd[0], stack_map = map_fd[1]; + + sys_read_seen = sys_write_seen = false; + while (bpf_get_next_key(fd, &key, &next_key) == 0) { + bpf_lookup_elem(fd, &next_key, &value); + print_stack(&next_key, value); + bpf_delete_elem(fd, &next_key); + key = next_key; + } + + if (!sys_read_seen || !sys_write_seen) { + printf("BUG kernel stack doesn't contain sys_read() and sys_write()\n"); + int_exit(0); + } + + /* clear stack map */ + while (bpf_get_next_key(stack_map, &stackid, &next_id) == 0) { + bpf_delete_elem(stack_map, &next_id); + stackid = next_id; + } +} + +static void test_perf_event_all_cpu(struct perf_event_attr *attr) +{ + int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + int *pmu_fd = malloc(nr_cpus * sizeof(int)); + int i; + + /* open perf_event on all cpus */ + for (i = 0; i < nr_cpus; i++) { + pmu_fd[i] = perf_event_open(attr, -1, i, -1, 0); + if (pmu_fd[i] < 0) { + printf("perf_event_open failed\n"); + goto all_cpu_err; + } + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); + assert(ioctl(pmu_fd[i], PERF_EVENT_IOC_ENABLE, 0) == 0); + } + system("dd if=/dev/zero of=/dev/null count=5000k"); + print_stacks(); +all_cpu_err: + for (i--; i >= 0; i--) + close(pmu_fd[i]); + free(pmu_fd); +} + +static void test_perf_event_task(struct perf_event_attr *attr) +{ + int pmu_fd; + + /* open task bound event */ + pmu_fd = perf_event_open(attr, 0, -1, -1, 0); + if (pmu_fd < 0) { + printf("perf_event_open failed\n"); + return; + } + assert(ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd[0]) == 0); + assert(ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0) == 0); + system("dd if=/dev/zero of=/dev/null count=5000k"); + print_stacks(); + close(pmu_fd); +} + +static void test_bpf_perf_event(void) +{ + struct perf_event_attr attr_type_hw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .inherit = 1, + }; + struct perf_event_attr attr_type_sw = { + .sample_freq = SAMPLE_FREQ, + .freq = 1, + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + .inherit = 1, + }; + + test_perf_event_all_cpu(&attr_type_hw); + test_perf_event_task(&attr_type_hw); + test_perf_event_all_cpu(&attr_type_sw); + test_perf_event_task(&attr_type_sw); +} + + +int main(int argc, char **argv) +{ + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); + + signal(SIGINT, int_exit); + + if (load_kallsyms()) { + printf("failed to process /proc/kallsyms\n"); + return 1; + } + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 2; + } + + if (fork() == 0) { + read_trace_pipe(); + return 0; + } + test_bpf_perf_event(); + + int_exit(0); + return 0; +} diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c index f95f232cbab9..fd12d7154d42 100644 --- a/samples/bpf/tracex5_kern.c +++ b/samples/bpf/tracex5_kern.c @@ -19,20 +19,18 @@ struct bpf_map_def SEC("maps") progs = { .max_entries = 1024, }; -SEC("kprobe/seccomp_phase1") +SEC("kprobe/__seccomp_filter") int bpf_prog1(struct pt_regs *ctx) { - struct seccomp_data sd; - - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + int sc_nr = (int)PT_REGS_PARM1(ctx); /* dispatch into next BPF program depending on syscall number */ - bpf_tail_call(ctx, &progs, sd.nr); + bpf_tail_call(ctx, &progs, sc_nr); /* fall through -> unknown syscall */ - if (sd.nr >= __NR_getuid && sd.nr <= __NR_getsid) { + if (sc_nr >= __NR_getuid && sc_nr <= __NR_getsid) { char fmt[] = "syscall=%d (one of get/set uid/pid/gid)\n"; - bpf_trace_printk(fmt, sizeof(fmt), sd.nr); + bpf_trace_printk(fmt, sizeof(fmt), sc_nr); } return 0; } @@ -42,7 +40,7 @@ PROG(__NR_write)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] == 512) { char fmt[] = "write(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), @@ -55,7 +53,7 @@ PROG(__NR_read)(struct pt_regs *ctx) { struct seccomp_data sd; - bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM1(ctx)); + bpf_probe_read(&sd, sizeof(sd), (void *)PT_REGS_PARM2(ctx)); if (sd.args[2] > 128 && sd.args[2] <= 1024) { char fmt[] = "read(fd=%d, buf=%p, size=%d)\n"; bpf_trace_printk(fmt, sizeof(fmt), diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c index a04dd3cd4358..36b5925bb137 100644 --- a/samples/bpf/tracex5_user.c +++ b/samples/bpf/tracex5_user.c @@ -6,6 +6,7 @@ #include #include "libbpf.h" #include "bpf_load.h" +#include /* install fake seccomp program to enable seccomp code path inside the kernel, * so that our kprobe attached to seccomp_phase1() can be triggered @@ -27,8 +28,10 @@ int main(int ac, char **argv) { FILE *f; char filename[256]; + struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + setrlimit(RLIMIT_MEMLOCK, &r); if (load_bpf_file(filename)) { printf("%s", bpf_log_buf);